From a735713db305b1383393b223c4e6e530e038c41e Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 02:00:36 +0000
Subject: [PATCH 1/7] Add: Dynamic prompt cache breakpoints for Anthropic
 prefix caching

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 e2e/perstack-cli/continue.test.ts             |  15 ++
 e2e/perstack-cli/providers.test.ts            |   7 +
 packages/runtime/src/messages/message.test.ts | 227 ++++++++++++++++++
 packages/runtime/src/messages/message.ts      |  37 +++
 .../states/generating-tool-call.ts            |   4 +-
 5 files changed, 289 insertions(+), 1 deletion(-)

diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts
index e67e01e8..271ff045 100644
--- a/e2e/perstack-cli/continue.test.ts
+++ b/e2e/perstack-cli/continue.test.ts
@@ -72,6 +72,16 @@ describe.concurrent("Continue Job", () => {
     ).toBe(true)
     const completeEvents = filterEventsByType(continueResult.events, "completeRun")
     expect(completeEvents.length).toBe(1)
+
+    // Verify usage tracking flows through multi-turn conversations.
+    // On turn 2, the conversation prefix from turn 1 is resent — with prompt caching
+    // enabled (applyCacheBreakpoints), cachedInputTokens should be populated.
+    // Actual cache hits depend on the model's minimum token threshold
+    // (e.g. 1024 for Sonnet, 4096 for Haiku 4.5).
+    const completeEvent = completeEvents[0]
+    const usage = (completeEvent as { usage?: Record<string, number> }).usage
+    expect(usage).toBeDefined()
+    expect(typeof usage?.cachedInputTokens).toBe("number")
   })
 
   // ─────────────────────────────────────────────────────────────────────────
@@ -128,6 +138,11 @@ describe.concurrent("Continue Job", () => {
     expect(continueCompleteEvents.length).toBe(1)
     const lastCompleteEvent = continueCompleteEvents[continueCompleteEvents.length - 1]
     expect((lastCompleteEvent as { text?: string }).text).toBeDefined()
+
+    // Verify usage includes cache metrics on continued run
+    const usage = (lastCompleteEvent as { usage?: Record<string, number> }).usage
+    expect(usage).toBeDefined()
+    expect(typeof usage?.cachedInputTokens).toBe("number")
   })
 
   // ─────────────────────────────────────────────────────────────────────────
diff --git a/e2e/perstack-cli/providers.test.ts b/e2e/perstack-cli/providers.test.ts
index 2256005d..c71a1201 100644
--- a/e2e/perstack-cli/providers.test.ts
+++ b/e2e/perstack-cli/providers.test.ts
@@ -47,6 +47,13 @@ describe.concurrent("LLM Providers", () => {
       // Note: text may be empty when using attemptCompletion tool (explicit completion)
       // The actual response is in the checkpoint messages, not in completeRun.text
       expect((completeEvent as { text?: string }).text).toBeDefined()
+
+      // Verify usage tracking includes cache token metrics
+      const usage = (completeEvent as { usage?: Record<string, unknown> }).usage
+      expect(usage).toBeDefined()
+      expect(typeof usage?.inputTokens).toBe("number")
+      expect(typeof usage?.outputTokens).toBe("number")
+      expect(typeof usage?.cachedInputTokens).toBe("number")
     },
     LLM_TIMEOUT,
   )
diff --git a/packages/runtime/src/messages/message.test.ts b/packages/runtime/src/messages/message.test.ts
index 14d6f6af..d8ab9100 100644
--- a/packages/runtime/src/messages/message.test.ts
+++ b/packages/runtime/src/messages/message.test.ts
@@ -1,6 +1,8 @@
 import { describe, expect, it } from "bun:test"
+import type { Message } from "@perstack/core"
 import { createInstructionMessage } from "./instruction-message.js"
 import {
+  applyCacheBreakpoints,
   createExpertMessage,
   createToolMessage,
   createUserMessage,
@@ -593,3 +595,228 @@ describe("@perstack/messages: instruction-message", () => {
     })
   })
 })
+
+describe("applyCacheBreakpoints", () => {
+  it("returns empty array for empty input", () => {
+    const result = applyCacheBreakpoints([])
+    expect(result).toEqual([])
+  })
+
+  it("returns copy of array when only one message exists", () => {
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "msg-1",
+        cache: true,
+        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
+      },
+    ]
+    const result = applyCacheBreakpoints(messages)
+    expect(result).toEqual(messages)
+    expect(result).not.toBe(messages)
+  })
+
+  it("sets cache on last message when multiple messages exist", () => {
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "msg-1",
+        cache: true,
+        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
+      },
+      {
+        type: "userMessage",
+        id: "msg-2",
+        contents: [{ id: "c-2", type: "textPart", text: "Hello" }],
+      },
+    ]
+    const result = applyCacheBreakpoints(messages)
+    expect(result[0].cache).toBe(true)
+    expect(result[1].cache).toBe(true)
+  })
+
+  it("preserves instruction message cache flag", () => {
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "msg-1",
+        cache: true,
+        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
+      },
+      {
+        type: "userMessage",
+        id: "msg-2",
+        contents: [{ id: "c-2", type: "textPart", text: "User input" }],
+      },
+      {
+        type: "expertMessage",
+        id: "msg-3",
+        contents: [{ id: "c-3", type: "textPart", text: "Response" }],
+      },
+    ]
+    const result = applyCacheBreakpoints(messages)
+    expect(result[0].cache).toBe(true)
+    expect(result[0]).toBe(messages[0])
+  })
+
+  it("clears cache from middle messages", () => {
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "msg-1",
+        cache: true,
+        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
+      },
+      {
+        type: "userMessage",
+        id: "msg-2",
+        cache: true,
+        contents: [{ id: "c-2", type: "textPart", text: "User input" }],
+      },
+      {
+        type: "expertMessage",
+        id: "msg-3",
+        contents: [{ id: "c-3", type: "textPart", text: "Response" }],
+      },
+      {
+        type: "toolMessage",
+        id: "msg-4",
+        contents: [
+          {
+            id: "c-4",
+            type: "toolResultPart",
+            toolCallId: "tc-1",
+            toolName: "readFile",
+            contents: [{ id: "c-5", type: "textPart", text: "file contents" }],
+            isError: false,
+          },
+        ],
+      },
+    ]
+    const result = applyCacheBreakpoints(messages)
+    expect(result[0].cache).toBe(true)
+    expect(result[1].cache).toBeUndefined()
+    expect(result[2].cache).toBeUndefined()
+    expect(result[3].cache).toBe(true)
+  })
+
+  it("does not mutate the original messages array", () => {
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "msg-1",
+        cache: true,
+        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
+      },
+      {
+        type: "userMessage",
+        id: "msg-2",
+        contents: [{ id: "c-2", type: "textPart", text: "Hello" }],
+      },
+    ]
+    const original = JSON.parse(JSON.stringify(messages))
+    applyCacheBreakpoints(messages)
+    expect(messages).toEqual(original)
+  })
+
+  it("avoids creating new object when last message already has cache true", () => {
+    const lastMsg: Message = {
+      type: "expertMessage",
+      id: "msg-2",
+      cache: true,
+      contents: [{ id: "c-2", type: "textPart", text: "Response" }],
+    }
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "msg-1",
+        cache: true,
+        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
+      },
+      lastMsg,
+    ]
+    const result = applyCacheBreakpoints(messages)
+    expect(result[1]).toBe(lastMsg)
+  })
+
+  it("handles multi-turn conversation correctly", () => {
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "msg-1",
+        cache: true,
+        contents: [{ id: "c-1", type: "textPart", text: "System" }],
+      },
+      {
+        type: "userMessage",
+        id: "msg-2",
+        contents: [{ id: "c-2", type: "textPart", text: "Input" }],
+      },
+      {
+        type: "expertMessage",
+        id: "msg-3",
+        cache: true,
+        contents: [
+          {
+            id: "c-3",
+            type: "toolCallPart",
+            toolCallId: "tc-1",
+            toolName: "search",
+            args: {},
+          },
+        ],
+      },
+      {
+        type: "toolMessage",
+        id: "msg-4",
+        cache: true,
+        contents: [
+          {
+            id: "c-4",
+            type: "toolResultPart",
+            toolCallId: "tc-1",
+            toolName: "search",
+            contents: [{ id: "c-5", type: "textPart", text: "results" }],
+            isError: false,
+          },
+        ],
+      },
+      {
+        type: "expertMessage",
+        id: "msg-5",
+        contents: [
+          {
+            id: "c-6",
+            type: "toolCallPart",
+            toolCallId: "tc-2",
+            toolName: "write",
+            args: {},
+          },
+        ],
+      },
+      {
+        type: "toolMessage",
+        id: "msg-6",
+        contents: [
+          {
+            id: "c-7",
+            type: "toolResultPart",
+            toolCallId: "tc-2",
+            toolName: "write",
+            contents: [{ id: "c-8", type: "textPart", text: "done" }],
+            isError: false,
+          },
+        ],
+      },
+    ]
+
+    const result = applyCacheBreakpoints(messages)
+
+    expect(result[0].cache).toBe(true)
+    expect(result[1].cache).toBeUndefined()
+    expect(result[2].cache).toBeUndefined()
+    expect(result[3].cache).toBeUndefined()
+    expect(result[4].cache).toBeUndefined()
+    expect(result[5].cache).toBe(true)
+  })
+})
diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts
index 6984d84b..9c78df86 100644
--- a/packages/runtime/src/messages/message.ts
+++ b/packages/runtime/src/messages/message.ts
@@ -131,6 +131,43 @@ export function messageToCoreMessage(message: Message): ModelMessage {
       }
   }
 }
+
+/**
+ * Apply cache breakpoints to messages for optimal prompt caching.
+ *
+ * Anthropic supports up to 4 cache breakpoints. This function uses 2:
+ * - Breakpoint 1: Instruction message (preserves existing cache flag)
+ * - Breakpoint 2: Last message in conversation (dynamic, moves each turn)
+ *
+ * Cache flags on non-strategic positions are cleared to stay within limits.
+ * This is a pure function — it does not mutate the input array.
+ *
+ * For non-Anthropic providers, cache flags are harmless (messageToCoreMessage
+ * only translates them to Anthropic providerOptions).
+ */
+export function applyCacheBreakpoints(messages: ReadonlyArray<Message>): Message[] {
+  if (messages.length <= 1) {
+    return [...messages]
+  }
+
+  const lastIndex = messages.length - 1
+
+  return messages.map((msg, index) => {
+    // Preserve instruction message cache (breakpoint 1)
+    if (msg.type === "instructionMessage") {
+      return msg
+    }
+
+    // Set cache on the last message (breakpoint 2 — dynamic frontier)
+    if (index === lastIndex) {
+      return msg.cache === true ? msg : { ...msg, cache: true }
+    }
+
+    // Clear cache from non-strategic positions
+    return msg.cache ? { ...msg, cache: undefined } : msg
+  })
+}
+
 function instructionContentsToCoreContent(
   contents: InstructionMessage["contents"],
 ): SystemModelMessage["content"] {
diff --git a/packages/runtime/src/state-machine/states/generating-tool-call.ts b/packages/runtime/src/state-machine/states/generating-tool-call.ts
index 0bcdf011..30d44ac0 100644
--- a/packages/runtime/src/state-machine/states/generating-tool-call.ts
+++ b/packages/runtime/src/state-machine/states/generating-tool-call.ts
@@ -22,6 +22,7 @@ import { getToolSet } from "../../helpers/tool-set.js"
 import { createEmptyUsage, sumUsage, usageFromGenerateTextResult } from "../../helpers/usage.js"
 import type { StreamCallbacks } from "../../llm/types.js"
 import {
+  applyCacheBreakpoints,
   createExpertMessage,
   createToolMessage,
   createUserMessage,
@@ -100,6 +101,7 @@ export async function generatingToolCallLogic({
   llmExecutor,
 }: RunSnapshot["context"]): Promise<RunEvent> {
   const { messages } = checkpoint
+  const cachedMessages = applyCacheBreakpoints(messages)
 
   // Track if reasoning was completed via callback (to avoid duplicate emissions)
   let reasoningCompletedViaCallback = false
@@ -127,7 +129,7 @@ export async function generatingToolCallLogic({
 
   const executionResult = await llmExecutor.streamText(
     {
-      messages: messages.map(messageToCoreMessage),
+      messages: cachedMessages.map(messageToCoreMessage),
       maxRetries: setting.maxRetries,
       tools: getToolSet(skillManager),
       toolChoice: "auto",

From a626717f5cdb5cb96a78529cc724de46684b7833 Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 02:02:57 +0000
Subject: [PATCH 2/7] Chore: Add changeset for prompt cache breakpoints

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .changeset/prompt-cache-breakpoints.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/prompt-cache-breakpoints.md

diff --git a/.changeset/prompt-cache-breakpoints.md b/.changeset/prompt-cache-breakpoints.md
new file mode 100644
index 00000000..22821c47
--- /dev/null
+++ b/.changeset/prompt-cache-breakpoints.md
@@ -0,0 +1,5 @@
+---
+"@perstack/runtime": patch
+---
+
+Add dynamic prompt cache breakpoints for Anthropic prefix caching

From 324dbc982fdbab0dfa13d230fa2b171da9f8bd56 Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 02:06:41 +0000
Subject: [PATCH 3/7] Chore: Bump perstack and create-expert in changeset

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .changeset/prompt-cache-breakpoints.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.changeset/prompt-cache-breakpoints.md b/.changeset/prompt-cache-breakpoints.md
index 22821c47..84575ffe 100644
--- a/.changeset/prompt-cache-breakpoints.md
+++ b/.changeset/prompt-cache-breakpoints.md
@@ -1,5 +1,7 @@
 ---
 "@perstack/runtime": patch
+"perstack": patch
+"create-expert": patch
 ---
 
 Add dynamic prompt cache breakpoints for Anthropic prefix caching

From a7a670e84eeb700aa1bbd1d04dd878a1042e1abc Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 02:15:12 +0000
Subject: [PATCH 4/7] Add: Cache breakpoint on last tool definition for
 Anthropic prefix caching

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 packages/runtime/src/helpers/tool-set.ts | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/packages/runtime/src/helpers/tool-set.ts b/packages/runtime/src/helpers/tool-set.ts
index db27d368..f51372f4 100644
--- a/packages/runtime/src/helpers/tool-set.ts
+++ b/packages/runtime/src/helpers/tool-set.ts
@@ -2,11 +2,20 @@ import type { SkillManager } from "@perstack/skill-manager"
 import { jsonSchema, type ToolSet, tool } from "ai"
 
 export function getToolSet(skillManager: SkillManager): ToolSet {
+  const defs = skillManager.getToolDefinitions()
+  const lastIndex = defs.length - 1
   const tools: ToolSet = {}
-  for (const def of skillManager.getToolDefinitions()) {
+  for (let i = 0; i < defs.length; i++) {
+    const def = defs[i]
     tools[def.name] = tool({
       description: def.description,
       inputSchema: jsonSchema(def.inputSchema),
+      // Cache breakpoint on last tool for Anthropic prefix caching.
+      // Prefix order: tools → system → messages. Caching the last tool
+      // creates a stable breakpoint that survives system/message changes.
+      ...(i === lastIndex
+        ? { providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } } }
+        : {}),
     })
   }
   return tools

From 1f3c97f7775fd136e93009182c5def875c7335ed Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 02:26:39 +0000
Subject: [PATCH 5/7] Update: Use 20-block lookback window strategy for cache
 breakpoints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BP1 on system message covers tools+system (prefix order: tools→system→messages).
BP2-4 distributed every ~20 content blocks across conversation messages,
working backwards from the last message. Removes redundant tool-level BP.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 packages/runtime/src/helpers/tool-set.ts      |  11 +-
 packages/runtime/src/messages/message.test.ts | 187 +++++++++++++++++-
 packages/runtime/src/messages/message.ts      |  53 ++++-
 3 files changed, 230 insertions(+), 21 deletions(-)

diff --git a/packages/runtime/src/helpers/tool-set.ts b/packages/runtime/src/helpers/tool-set.ts
index f51372f4..db27d368 100644
--- a/packages/runtime/src/helpers/tool-set.ts
+++ b/packages/runtime/src/helpers/tool-set.ts
@@ -2,20 +2,11 @@ import type { SkillManager } from "@perstack/skill-manager"
 import { jsonSchema, type ToolSet, tool } from "ai"
 
 export function getToolSet(skillManager: SkillManager): ToolSet {
-  const defs = skillManager.getToolDefinitions()
-  const lastIndex = defs.length - 1
   const tools: ToolSet = {}
-  for (let i = 0; i < defs.length; i++) {
-    const def = defs[i]
+  for (const def of skillManager.getToolDefinitions()) {
     tools[def.name] = tool({
       description: def.description,
       inputSchema: jsonSchema(def.inputSchema),
-      // Cache breakpoint on last tool for Anthropic prefix caching.
-      // Prefix order: tools → system → messages. Caching the last tool
-      // creates a stable breakpoint that survives system/message changes.
-      ...(i === lastIndex
-        ? { providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } } }
-        : {}),
     })
   }
   return tools
diff --git a/packages/runtime/src/messages/message.test.ts b/packages/runtime/src/messages/message.test.ts
index d8ab9100..c27062af 100644
--- a/packages/runtime/src/messages/message.test.ts
+++ b/packages/runtime/src/messages/message.test.ts
@@ -739,7 +739,7 @@ describe("applyCacheBreakpoints", () => {
     expect(result[1]).toBe(lastMsg)
   })
 
-  it("handles multi-turn conversation correctly", () => {
+  it("handles multi-turn conversation correctly (< 20 blocks, single BP)", () => {
     const messages: Message[] = [
       {
         type: "instructionMessage",
@@ -819,4 +819,189 @@ describe("applyCacheBreakpoints", () => {
     expect(result[4].cache).toBeUndefined()
     expect(result[5].cache).toBe(true)
   })
+
+  it("places second BP when content blocks exceed 20-block lookback window", () => {
+    // Build a conversation with > 20 content blocks:
+    // Each tool-use step = expertMessage(3 parts) + toolMessage(1 part) = 4 blocks
+    // 6 steps = 24 blocks + 1 userMessage = 25 blocks total
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "instr",
+        cache: true,
+        contents: [{ id: "c-instr", type: "textPart", text: "System" }],
+      },
+      {
+        type: "userMessage",
+        id: "user-0",
+        contents: [{ id: "c-u0", type: "textPart", text: "Go" }],
+      },
+    ]
+    for (let step = 0; step < 6; step++) {
+      messages.push({
+        type: "expertMessage",
+        id: `expert-${step}`,
+        contents: [
+          { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" },
+          { id: `txt-${step}`, type: "textPart", text: `Step ${step}` },
+          {
+            id: `tc-${step}`,
+            type: "toolCallPart",
+            toolCallId: `tc-${step}`,
+            toolName: "doWork",
+            args: {},
+          },
+        ],
+      })
+      messages.push({
+        type: "toolMessage",
+        id: `tool-${step}`,
+        contents: [
+          {
+            id: `tr-${step}`,
+            type: "toolResultPart",
+            toolCallId: `tc-${step}`,
+            toolName: "doWork",
+            contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }],
+            isError: false,
+          },
+        ],
+      })
+    }
+    // Total non-instruction: 1 (user) + 6*(3+1) = 25 blocks, 13 messages
+
+    const result = applyCacheBreakpoints(messages)
+
+    // BP1: instruction (index 0)
+    expect(result[0].cache).toBe(true)
+    // Last message (index 13) should always have BP
+    expect(result[13].cache).toBe(true)
+
+    // A second BP should exist somewhere in the middle
+    const middleBps = result.slice(1, 13).filter((m) => m.cache === true)
+    expect(middleBps.length).toBe(1)
+
+    // All other non-BP messages should have cache cleared
+    const nonBpMessages = result.slice(1).filter((m) => m.cache !== true)
+    for (const msg of nonBpMessages) {
+      expect(msg.cache).toBeUndefined()
+    }
+  })
+
+  it("places up to 3 message BPs for very long conversations (> 40 blocks)", () => {
+    // 11 tool-use steps = 44 blocks + 1 user = 45 blocks total
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "instr",
+        cache: true,
+        contents: [{ id: "c-instr", type: "textPart", text: "System" }],
+      },
+      {
+        type: "userMessage",
+        id: "user-0",
+        contents: [{ id: "c-u0", type: "textPart", text: "Go" }],
+      },
+    ]
+    for (let step = 0; step < 11; step++) {
+      messages.push({
+        type: "expertMessage",
+        id: `expert-${step}`,
+        contents: [
+          { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" },
+          { id: `txt-${step}`, type: "textPart", text: `Step ${step}` },
+          {
+            id: `tc-${step}`,
+            type: "toolCallPart",
+            toolCallId: `tc-${step}`,
+            toolName: "doWork",
+            args: {},
+          },
+        ],
+      })
+      messages.push({
+        type: "toolMessage",
+        id: `tool-${step}`,
+        contents: [
+          {
+            id: `tr-${step}`,
+            type: "toolResultPart",
+            toolCallId: `tc-${step}`,
+            toolName: "doWork",
+            contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }],
+            isError: false,
+          },
+        ],
+      })
+    }
+    // Total non-instruction: 1 + 11*4 = 45 blocks, 24 messages (indices 0..24)
+
+    const result = applyCacheBreakpoints(messages)
+
+    // BP1: instruction
+    expect(result[0].cache).toBe(true)
+    // Last message always has BP
+    expect(result[result.length - 1].cache).toBe(true)
+
+    // Should have 3 message BPs total (max)
+    const messageBps = result.slice(1).filter((m) => m.cache === true)
+    expect(messageBps.length).toBe(3)
+  })
+
+  it("does not exceed 3 message BPs even for extremely long conversations", () => {
+    // 20 tool-use steps = 80 blocks + 1 user = 81 blocks total
+    const messages: Message[] = [
+      {
+        type: "instructionMessage",
+        id: "instr",
+        cache: true,
+        contents: [{ id: "c-instr", type: "textPart", text: "System" }],
+      },
+      {
+        type: "userMessage",
+        id: "user-0",
+        contents: [{ id: "c-u0", type: "textPart", text: "Go" }],
+      },
+    ]
+    for (let step = 0; step < 20; step++) {
+      messages.push({
+        type: "expertMessage",
+        id: `expert-${step}`,
+        contents: [
+          { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" },
+          { id: `txt-${step}`, type: "textPart", text: `Step ${step}` },
+          {
+            id: `tc-${step}`,
+            type: "toolCallPart",
+            toolCallId: `tc-${step}`,
+            toolName: "doWork",
+            args: {},
+          },
+        ],
+      })
+      messages.push({
+        type: "toolMessage",
+        id: `tool-${step}`,
+        contents: [
+          {
+            id: `tr-${step}`,
+            type: "toolResultPart",
+            toolCallId: `tc-${step}`,
+            toolName: "doWork",
+            contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }],
+            isError: false,
+          },
+        ],
+      })
+    }
+
+    const result = applyCacheBreakpoints(messages)
+
+    // Should still max out at 3 message BPs
+    const messageBps = result.slice(1).filter((m) => m.cache === true)
+    expect(messageBps.length).toBe(3)
+
+    // Last message always has BP
+    expect(result[result.length - 1].cache).toBe(true)
+  })
 })
diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts
index 9c78df86..332eea7c 100644
--- a/packages/runtime/src/messages/message.ts
+++ b/packages/runtime/src/messages/message.ts
@@ -133,13 +133,16 @@ export function messageToCoreMessage(message: Message): ModelMessage {
 }
 
 /**
- * Apply cache breakpoints to messages for optimal prompt caching.
+ * Apply cache breakpoints to messages for Anthropic prompt caching.
  *
- * Anthropic supports up to 4 cache breakpoints. This function uses 2:
- * - Breakpoint 1: Instruction message (preserves existing cache flag)
- * - Breakpoint 2: Last message in conversation (dynamic, moves each turn)
+ * Anthropic allows up to 4 explicit cache breakpoints per request and uses a
+ * 20-block lookback window from each breakpoint to find cache matches.
+ *
+ * Breakpoint strategy:
+ * - BP1: Instruction message (caches tools + system; prefix order: tools → system → messages)
+ * - BP2–BP4: Up to 3 breakpoints across conversation messages, placed every ~20 content
+ *   blocks (working backwards from the last message) to ensure full lookback coverage.
  *
- * Cache flags on non-strategic positions are cleared to stay within limits.
  * This is a pure function — it does not mutate the input array.
  *
  * For non-Anthropic providers, cache flags are harmless (messageToCoreMessage
@@ -150,20 +153,50 @@ export function applyCacheBreakpoints(messages: ReadonlyArray<Message>): Message
     return [...messages]
   }
 
-  const lastIndex = messages.length - 1
+  // BP2–BP4: up to 3 message breakpoints (BP1 is the instruction message)
+  const MAX_MESSAGE_BREAKPOINTS = 3
+  // Anthropic checks up to 20 content blocks before each breakpoint
+  const LOOKBACK_WINDOW = 20
+
+  // Collect non-instruction messages with their content block counts
+  const msgMeta: Array<{ originalIndex: number; blockCount: number }> = []
+  for (let i = 0; i < messages.length; i++) {
+    if (messages[i].type !== "instructionMessage") {
+      msgMeta.push({ originalIndex: i, blockCount: messages[i].contents.length })
+    }
+  }
+
+  // Determine which messages get cache breakpoints
+  const breakpointIndices = new Set<number>()
+
+  if (msgMeta.length > 0) {
+    // Always place a breakpoint on the last message (dynamic frontier)
+    breakpointIndices.add(msgMeta[msgMeta.length - 1].originalIndex)
+    let bpsPlaced = 1
+    let blocksSinceBp = 0
+
+    // Walk backwards, placing a breakpoint every ~20 content blocks
+    for (let i = msgMeta.length - 2; i >= 0 && bpsPlaced < MAX_MESSAGE_BREAKPOINTS; i--) {
+      blocksSinceBp += msgMeta[i].blockCount
+      if (blocksSinceBp >= LOOKBACK_WINDOW) {
+        breakpointIndices.add(msgMeta[i].originalIndex)
+        bpsPlaced++
+        blocksSinceBp = 0
+      }
+    }
+  }
 
   return messages.map((msg, index) => {
-    // Preserve instruction message cache (breakpoint 1)
+    // Preserve instruction message cache (BP1: caches tools + system)
     if (msg.type === "instructionMessage") {
       return msg
     }
 
-    // Set cache on the last message (breakpoint 2 — dynamic frontier)
-    if (index === lastIndex) {
+    if (breakpointIndices.has(index)) {
       return msg.cache === true ? msg : { ...msg, cache: true }
     }
 
-    // Clear cache from non-strategic positions
+    // Clear stale cache flags from non-strategic positions
     return msg.cache ? { ...msg, cache: undefined } : msg
   })
 }

From d157fa8c3bdedf0e4cc5bf8d26980c98cb2edc3e Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 03:02:14 +0000
Subject: [PATCH 6/7] Update: Switch to automatic caching via @ai-sdk/anthropic
 3.0.47

Replace explicit applyCacheBreakpoints() with Anthropic's request-level
automatic caching (cache_control: {type: "ephemeral"}). Auto-places
breakpoints on last system, tool, and message blocks optimally.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .changeset/prompt-cache-breakpoints.md        |   3 +-
 bun.lock                                      |  61 +--
 e2e/perstack-cli/continue.test.ts             |   4 +-
 package.json                                  |   5 +-
 .../providers/anthropic/src/adapter.test.ts   |  15 +-
 packages/providers/anthropic/src/adapter.ts   |  14 +-
 .../src/messages/instruction-message.ts       |   1 -
 packages/runtime/src/messages/message.test.ts | 414 +-----------------
 packages/runtime/src/messages/message.ts      |  69 ---
 .../states/generating-tool-call.ts            |   4 +-
 .../src/state-machine/states/init.test.ts     |   1 -
 11 files changed, 67 insertions(+), 524 deletions(-)

diff --git a/.changeset/prompt-cache-breakpoints.md b/.changeset/prompt-cache-breakpoints.md
index 84575ffe..1c296d73 100644
--- a/.changeset/prompt-cache-breakpoints.md
+++ b/.changeset/prompt-cache-breakpoints.md
@@ -1,7 +1,8 @@
 ---
 "@perstack/runtime": patch
+"@perstack/anthropic-provider": patch
 "perstack": patch
 "create-expert": patch
 ---
 
-Add dynamic prompt cache breakpoints for Anthropic prefix caching
+Enable automatic prompt caching for Anthropic via request-level cache_control
diff --git a/bun.lock b/bun.lock
index 4dad4e33..9ab53252 100644
--- a/bun.lock
+++ b/bun.lock
@@ -4,6 +4,9 @@
   "workspaces": {
     "": {
       "name": "perstack-monorepo",
+      "dependencies": {
+        "@ai-sdk/anthropic": "^3.0.47",
+      },
       "devDependencies": {
         "@biomejs/biome": "^2.4.2",
         "@changesets/changelog-github": "^0.5.2",
@@ -18,10 +21,10 @@
     },
     "apps/base": {
       "name": "@perstack/base",
-      "version": "0.0.66",
+      "version": "0.0.68",
       "dependencies": {
         "@modelcontextprotocol/sdk": "^1.26.0",
-        "@perstack/core": "0.0.54",
+        "@perstack/core": "0.0.56",
         "commander": "^14.0.3",
         "zod": "^4.3.6",
       },
@@ -33,7 +36,7 @@
     },
     "apps/create-expert": {
       "name": "create-expert",
-      "version": "0.0.43",
+      "version": "0.0.45",
       "bin": {
         "create-expert": "bin/cli.ts",
       },
@@ -52,7 +55,7 @@
     },
     "apps/create-expert-skill": {
       "name": "@perstack/create-expert-skill",
-      "version": "0.0.3",
+      "version": "0.0.5",
       "dependencies": {
         "@modelcontextprotocol/sdk": "^1.26.0",
         "commander": "^14.0.3",
@@ -68,7 +71,7 @@
     },
     "apps/perstack": {
       "name": "perstack",
-      "version": "0.0.95",
+      "version": "0.0.97",
       "dependencies": {
         "commander": "^14.0.3",
       },
@@ -85,7 +88,7 @@
     },
     "packages/core": {
       "name": "@perstack/core",
-      "version": "0.0.54",
+      "version": "0.0.56",
       "dependencies": {
         "@paralleldrive/cuid2": "^3.3.0",
         "zod": "^4.3.6",
@@ -98,7 +101,7 @@
     },
     "packages/filesystem": {
       "name": "@perstack/filesystem-storage",
-      "version": "0.0.25",
+      "version": "0.0.27",
       "dependencies": {
         "@perstack/core": "workspace:*",
       },
@@ -111,7 +114,7 @@
     },
     "packages/installer": {
       "name": "@perstack/installer",
-      "version": "0.0.18",
+      "version": "0.0.20",
       "dependencies": {
         "@perstack/api-client": "^0.0.56",
         "@perstack/core": "workspace:*",
@@ -127,7 +130,7 @@
     },
     "packages/log": {
       "name": "@perstack/log",
-      "version": "0.0.11",
+      "version": "0.0.13",
       "dependencies": {
         "@perstack/core": "workspace:*",
         "@perstack/filesystem-storage": "workspace:*",
@@ -140,7 +143,7 @@
     },
     "packages/perstack-toml": {
       "name": "@perstack/perstack-toml",
-      "version": "0.0.10",
+      "version": "0.0.12",
       "dependencies": {
         "@perstack/core": "workspace:*",
         "smol-toml": "^1.6.0",
@@ -154,7 +157,7 @@
     },
     "packages/providers/anthropic": {
       "name": "@perstack/anthropic-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@ai-sdk/anthropic": "^3.0.44",
         "@perstack/core": "workspace:*",
@@ -169,7 +172,7 @@
     },
     "packages/providers/azure-openai": {
       "name": "@perstack/azure-openai-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@ai-sdk/azure": "^3.0.31",
         "@perstack/core": "workspace:*",
@@ -184,7 +187,7 @@
     },
     "packages/providers/bedrock": {
       "name": "@perstack/bedrock-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@ai-sdk/amazon-bedrock": "^4.0.60",
         "@perstack/core": "workspace:*",
@@ -199,7 +202,7 @@
     },
     "packages/providers/core": {
       "name": "@perstack/provider-core",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@perstack/core": "workspace:*",
         "undici": "^7.22.0",
@@ -213,7 +216,7 @@
     },
     "packages/providers/deepseek": {
       "name": "@perstack/deepseek-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@ai-sdk/deepseek": "^2.0.20",
         "@perstack/core": "workspace:*",
@@ -228,7 +231,7 @@
     },
     "packages/providers/google": {
       "name": "@perstack/google-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@ai-sdk/google": "^3.0.29",
         "@perstack/core": "workspace:*",
@@ -243,7 +246,7 @@
     },
     "packages/providers/ollama": {
       "name": "@perstack/ollama-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@perstack/core": "workspace:*",
         "@perstack/provider-core": "workspace:*",
@@ -258,7 +261,7 @@
     },
     "packages/providers/openai": {
       "name": "@perstack/openai-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@ai-sdk/openai": "^3.0.29",
         "@perstack/core": "workspace:*",
@@ -273,7 +276,7 @@
     },
     "packages/providers/vertex": {
       "name": "@perstack/vertex-provider",
-      "version": "0.0.27",
+      "version": "0.0.29",
       "dependencies": {
         "@ai-sdk/google-vertex": "^4.0.58",
         "@perstack/core": "workspace:*",
@@ -288,7 +291,7 @@
     },
     "packages/react": {
       "name": "@perstack/react",
-      "version": "0.0.58",
+      "version": "0.0.60",
       "dependencies": {
         "@perstack/core": "workspace:*",
       },
@@ -307,7 +310,7 @@
     },
     "packages/runtime": {
       "name": "@perstack/runtime",
-      "version": "0.0.115",
+      "version": "0.0.117",
       "dependencies": {
         "@ai-sdk/amazon-bedrock": "^4.0.60",
         "@ai-sdk/anthropic": "^3.0.44",
@@ -319,8 +322,8 @@
         "@modelcontextprotocol/sdk": "^1.26.0",
         "@paralleldrive/cuid2": "^3.3.0",
         "@perstack/api-client": "^0.0.56",
-        "@perstack/base": "0.0.66",
-        "@perstack/core": "0.0.54",
+        "@perstack/base": "0.0.68",
+        "@perstack/core": "0.0.56",
         "ai": "^6.0.86",
         "ollama-ai-provider-v2": "^3.3.0",
         "smol-toml": "^1.6.0",
@@ -346,7 +349,7 @@
     },
     "packages/skill-manager": {
       "name": "@perstack/skill-manager",
-      "version": "0.0.12",
+      "version": "0.0.14",
       "dependencies": {
         "@modelcontextprotocol/sdk": "^1.26.0",
         "@paralleldrive/cuid2": "^3.3.0",
@@ -362,7 +365,7 @@
     },
     "packages/tui": {
       "name": "@perstack/tui",
-      "version": "0.0.16",
+      "version": "0.0.18",
       "dependencies": {
         "@paralleldrive/cuid2": "^3.3.0",
         "@perstack/core": "workspace:*",
@@ -379,7 +382,7 @@
     },
     "packages/tui-components": {
       "name": "@perstack/tui-components",
-      "version": "0.0.18",
+      "version": "0.0.20",
       "dependencies": {
         "@perstack/core": "workspace:*",
         "@perstack/react": "workspace:*",
@@ -397,7 +400,7 @@
   "packages": {
     "@ai-sdk/amazon-bedrock": ["@ai-sdk/amazon-bedrock@4.0.63", "", { "dependencies": { "@ai-sdk/anthropic": "3.0.46", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@smithy/eventstream-codec": "^4.0.1", "@smithy/util-utf8": "^4.0.0", "aws4fetch": "^1.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-kNOaIaOXWFZFWbB0xM1l/bQYo7XwTkpdHbrA6n9A2U1c4/DcLF/+Rwc3vZF6MHPVSjoYVG0qxIa7jh39rKftYA=="],
 
-    "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="],
+    "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.47", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-E6Z3i/xvxGDxRskMMbuX9+xDK4l5LesrP2O7YQ0CcbAkYP25qTo/kYGf/AsJrLkNIY23HeO/kheUWtG1XZllDA=="],
 
     "@ai-sdk/azure": ["@ai-sdk/azure@3.0.31", "", { "dependencies": { "@ai-sdk/openai": "3.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-W9x6nt+yf+Ns0/Wx7U9TXHLmfu7mOUqy1b/drtVd3DvNfDudyruQM/YjM2268Q0FatSrPlA2RlnPVPGRH/4V8Q=="],
 
@@ -1325,6 +1328,10 @@
 
     "zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="],
 
+    "@ai-sdk/amazon-bedrock/@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="],
+
+    "@ai-sdk/google-vertex/@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="],
+
     "@aws-crypto/util/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="],
 
     "@babel/code-frame/@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.28.5", "", {}, "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q=="],
diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts
index 271ff045..ad43c3e5 100644
--- a/e2e/perstack-cli/continue.test.ts
+++ b/e2e/perstack-cli/continue.test.ts
@@ -74,8 +74,8 @@ describe.concurrent("Continue Job", () => {
     expect(completeEvents.length).toBe(1)
 
     // Verify usage tracking flows through multi-turn conversations.
-    // On turn 2, the conversation prefix from turn 1 is resent — with prompt caching
-    // enabled (applyCacheBreakpoints), cachedInputTokens should be populated.
+    // On turn 2, the conversation prefix from turn 1 is resent — with automatic
+    // prompt caching enabled via providerOptions, cachedInputTokens should be populated.
     // Actual cache hits depend on the model's minimum token threshold
     // (e.g. 1024 for Sonnet, 4096 for Haiku 4.5).
     const completeEvent = completeEvents[0]
diff --git a/package.json b/package.json
index 5434341c..f9773419 100644
--- a/package.json
+++ b/package.json
@@ -42,5 +42,8 @@
   "engines": {
     "bun": ">=1.2.0"
   },
-  "packageManager": "bun@1.3.5"
+  "packageManager": "bun@1.3.5",
+  "dependencies": {
+    "@ai-sdk/anthropic": "^3.0.47"
+  }
 }
diff --git a/packages/providers/anthropic/src/adapter.test.ts b/packages/providers/anthropic/src/adapter.test.ts
index f8393aff..9dd23a4a 100644
--- a/packages/providers/anthropic/src/adapter.test.ts
+++ b/packages/providers/anthropic/src/adapter.test.ts
@@ -64,19 +64,23 @@ describe("AnthropicProviderAdapter", () => {
   })
 
   describe("getProviderOptions", () => {
-    it("returns undefined when no skills provided", () => {
+    it("returns cacheControl when no skills provided", () => {
       const adapter = new AnthropicProviderAdapter(mockConfig)
       const options = adapter.getProviderOptions()
-      expect(options).toBeUndefined()
+      expect(options).toEqual({
+        anthropic: { cacheControl: { type: "ephemeral" } },
+      })
     })
 
-    it("returns undefined when empty skills array provided", () => {
+    it("returns cacheControl when empty skills array provided", () => {
       const adapter = new AnthropicProviderAdapter(mockConfig)
       const options = adapter.getProviderOptions({ skills: [] })
-      expect(options).toBeUndefined()
+      expect(options).toEqual({
+        anthropic: { cacheControl: { type: "ephemeral" } },
+      })
     })
 
-    it("returns provider options with builtin skills", () => {
+    it("returns provider options with builtin skills and cacheControl", () => {
       const adapter = new AnthropicProviderAdapter(mockConfig)
       const options = adapter.getProviderOptions({
         skills: [{ type: "builtin", skillId: "pdf" }],
@@ -86,6 +90,7 @@ describe("AnthropicProviderAdapter", () => {
           container: {
             skills: [{ type: "builtin", name: "pdf" }],
           },
+          cacheControl: { type: "ephemeral" },
         },
       })
     })
diff --git a/packages/providers/anthropic/src/adapter.ts b/packages/providers/anthropic/src/adapter.ts
index 598bb93c..3b27d8d1 100644
--- a/packages/providers/anthropic/src/adapter.ts
+++ b/packages/providers/anthropic/src/adapter.ts
@@ -40,7 +40,19 @@ export class AnthropicProviderAdapter extends BaseProviderAdapter {
   }
 
   override getProviderOptions(config?: ProviderOptionsConfig): ProviderOptions | undefined {
-    return buildProviderOptions(config?.skills)
+    const skillOptions = buildProviderOptions(config?.skills)
+    const cacheOptions: ProviderOptions = {
+      anthropic: { cacheControl: { type: "ephemeral" } },
+    }
+    if (!skillOptions) {
+      return cacheOptions
+    }
+    return {
+      anthropic: {
+        ...skillOptions["anthropic"],
+        ...cacheOptions["anthropic"],
+      },
+    }
   }
 
   override getReasoningOptions(budget: ReasoningBudget): ProviderOptions | undefined {
diff --git a/packages/runtime/src/messages/instruction-message.ts b/packages/runtime/src/messages/instruction-message.ts
index b0adf1d1..4fd07d85 100644
--- a/packages/runtime/src/messages/instruction-message.ts
+++ b/packages/runtime/src/messages/instruction-message.ts
@@ -65,7 +65,6 @@ export function createInstructionMessage(expert: Expert, startedAt: number): Ins
       },
     ],
     id: createId(),
-    cache: true,
   }
 }
 
diff --git a/packages/runtime/src/messages/message.test.ts b/packages/runtime/src/messages/message.test.ts
index c27062af..c3f97230 100644
--- a/packages/runtime/src/messages/message.test.ts
+++ b/packages/runtime/src/messages/message.test.ts
@@ -1,8 +1,6 @@
 import { describe, expect, it } from "bun:test"
-import type { Message } from "@perstack/core"
 import { createInstructionMessage } from "./instruction-message.js"
 import {
-  applyCacheBreakpoints,
   createExpertMessage,
   createToolMessage,
   createUserMessage,
@@ -518,7 +516,7 @@ describe("@perstack/messages: instruction-message", () => {
       }
       const result = createInstructionMessage(expert, startedAt)
       expect(result.type).toBe("instructionMessage")
-      expect(result.cache).toBe(true)
+      expect(result.cache).toBeUndefined()
       expect(result.contents[0].type).toBe("textPart")
       expect(result.contents[0].text).toContain("You are a test expert.")
     })
@@ -595,413 +593,3 @@ describe("@perstack/messages: instruction-message", () => {
     })
   })
 })
-
-describe("applyCacheBreakpoints", () => {
-  it("returns empty array for empty input", () => {
-    const result = applyCacheBreakpoints([])
-    expect(result).toEqual([])
-  })
-
-  it("returns copy of array when only one message exists", () => {
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "msg-1",
-        cache: true,
-        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
-      },
-    ]
-    const result = applyCacheBreakpoints(messages)
-    expect(result).toEqual(messages)
-    expect(result).not.toBe(messages)
-  })
-
-  it("sets cache on last message when multiple messages exist", () => {
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "msg-1",
-        cache: true,
-        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
-      },
-      {
-        type: "userMessage",
-        id: "msg-2",
-        contents: [{ id: "c-2", type: "textPart", text: "Hello" }],
-      },
-    ]
-    const result = applyCacheBreakpoints(messages)
-    expect(result[0].cache).toBe(true)
-    expect(result[1].cache).toBe(true)
-  })
-
-  it("preserves instruction message cache flag", () => {
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "msg-1",
-        cache: true,
-        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
-      },
-      {
-        type: "userMessage",
-        id: "msg-2",
-        contents: [{ id: "c-2", type: "textPart", text: "User input" }],
-      },
-      {
-        type: "expertMessage",
-        id: "msg-3",
-        contents: [{ id: "c-3", type: "textPart", text: "Response" }],
-      },
-    ]
-    const result = applyCacheBreakpoints(messages)
-    expect(result[0].cache).toBe(true)
-    expect(result[0]).toBe(messages[0])
-  })
-
-  it("clears cache from middle messages", () => {
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "msg-1",
-        cache: true,
-        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
-      },
-      {
-        type: "userMessage",
-        id: "msg-2",
-        cache: true,
-        contents: [{ id: "c-2", type: "textPart", text: "User input" }],
-      },
-      {
-        type: "expertMessage",
-        id: "msg-3",
-        contents: [{ id: "c-3", type: "textPart", text: "Response" }],
-      },
-      {
-        type: "toolMessage",
-        id: "msg-4",
-        contents: [
-          {
-            id: "c-4",
-            type: "toolResultPart",
-            toolCallId: "tc-1",
-            toolName: "readFile",
-            contents: [{ id: "c-5", type: "textPart", text: "file contents" }],
-            isError: false,
-          },
-        ],
-      },
-    ]
-    const result = applyCacheBreakpoints(messages)
-    expect(result[0].cache).toBe(true)
-    expect(result[1].cache).toBeUndefined()
-    expect(result[2].cache).toBeUndefined()
-    expect(result[3].cache).toBe(true)
-  })
-
-  it("does not mutate the original messages array", () => {
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "msg-1",
-        cache: true,
-        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
-      },
-      {
-        type: "userMessage",
-        id: "msg-2",
-        contents: [{ id: "c-2", type: "textPart", text: "Hello" }],
-      },
-    ]
-    const original = JSON.parse(JSON.stringify(messages))
-    applyCacheBreakpoints(messages)
-    expect(messages).toEqual(original)
-  })
-
-  it("avoids creating new object when last message already has cache true", () => {
-    const lastMsg: Message = {
-      type: "expertMessage",
-      id: "msg-2",
-      cache: true,
-      contents: [{ id: "c-2", type: "textPart", text: "Response" }],
-    }
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "msg-1",
-        cache: true,
-        contents: [{ id: "c-1", type: "textPart", text: "System prompt" }],
-      },
-      lastMsg,
-    ]
-    const result = applyCacheBreakpoints(messages)
-    expect(result[1]).toBe(lastMsg)
-  })
-
-  it("handles multi-turn conversation correctly (< 20 blocks, single BP)", () => {
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "msg-1",
-        cache: true,
-        contents: [{ id: "c-1", type: "textPart", text: "System" }],
-      },
-      {
-        type: "userMessage",
-        id: "msg-2",
-        contents: [{ id: "c-2", type: "textPart", text: "Input" }],
-      },
-      {
-        type: "expertMessage",
-        id: "msg-3",
-        cache: true,
-        contents: [
-          {
-            id: "c-3",
-            type: "toolCallPart",
-            toolCallId: "tc-1",
-            toolName: "search",
-            args: {},
-          },
-        ],
-      },
-      {
-        type: "toolMessage",
-        id: "msg-4",
-        cache: true,
-        contents: [
-          {
-            id: "c-4",
-            type: "toolResultPart",
-            toolCallId: "tc-1",
-            toolName: "search",
-            contents: [{ id: "c-5", type: "textPart", text: "results" }],
-            isError: false,
-          },
-        ],
-      },
-      {
-        type: "expertMessage",
-        id: "msg-5",
-        contents: [
-          {
-            id: "c-6",
-            type: "toolCallPart",
-            toolCallId: "tc-2",
-            toolName: "write",
-            args: {},
-          },
-        ],
-      },
-      {
-        type: "toolMessage",
-        id: "msg-6",
-        contents: [
-          {
-            id: "c-7",
-            type: "toolResultPart",
-            toolCallId: "tc-2",
-            toolName: "write",
-            contents: [{ id: "c-8", type: "textPart", text: "done" }],
-            isError: false,
-          },
-        ],
-      },
-    ]
-
-    const result = applyCacheBreakpoints(messages)
-
-    expect(result[0].cache).toBe(true)
-    expect(result[1].cache).toBeUndefined()
-    expect(result[2].cache).toBeUndefined()
-    expect(result[3].cache).toBeUndefined()
-    expect(result[4].cache).toBeUndefined()
-    expect(result[5].cache).toBe(true)
-  })
-
-  it("places second BP when content blocks exceed 20-block lookback window", () => {
-    // Build a conversation with > 20 content blocks:
-    // Each tool-use step = expertMessage(3 parts) + toolMessage(1 part) = 4 blocks
-    // 6 steps = 24 blocks + 1 userMessage = 25 blocks total
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "instr",
-        cache: true,
-        contents: [{ id: "c-instr", type: "textPart", text: "System" }],
-      },
-      {
-        type: "userMessage",
-        id: "user-0",
-        contents: [{ id: "c-u0", type: "textPart", text: "Go" }],
-      },
-    ]
-    for (let step = 0; step < 6; step++) {
-      messages.push({
-        type: "expertMessage",
-        id: `expert-${step}`,
-        contents: [
-          { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" },
-          { id: `txt-${step}`, type: "textPart", text: `Step ${step}` },
-          {
-            id: `tc-${step}`,
-            type: "toolCallPart",
-            toolCallId: `tc-${step}`,
-            toolName: "doWork",
-            args: {},
-          },
-        ],
-      })
-      messages.push({
-        type: "toolMessage",
-        id: `tool-${step}`,
-        contents: [
-          {
-            id: `tr-${step}`,
-            type: "toolResultPart",
-            toolCallId: `tc-${step}`,
-            toolName: "doWork",
-            contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }],
-            isError: false,
-          },
-        ],
-      })
-    }
-    // Total non-instruction: 1 (user) + 6*(3+1) = 25 blocks, 13 messages
-
-    const result = applyCacheBreakpoints(messages)
-
-    // BP1: instruction (index 0)
-    expect(result[0].cache).toBe(true)
-    // Last message (index 13) should always have BP
-    expect(result[13].cache).toBe(true)
-
-    // A second BP should exist somewhere in the middle
-    const middleBps = result.slice(1, 13).filter((m) => m.cache === true)
-    expect(middleBps.length).toBe(1)
-
-    // All other non-BP messages should have cache cleared
-    const nonBpMessages = result.slice(1).filter((m) => m.cache !== true)
-    for (const msg of nonBpMessages) {
-      expect(msg.cache).toBeUndefined()
-    }
-  })
-
-  it("places up to 3 message BPs for very long conversations (> 40 blocks)", () => {
-    // 11 tool-use steps = 44 blocks + 1 user = 45 blocks total
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "instr",
-        cache: true,
-        contents: [{ id: "c-instr", type: "textPart", text: "System" }],
-      },
-      {
-        type: "userMessage",
-        id: "user-0",
-        contents: [{ id: "c-u0", type: "textPart", text: "Go" }],
-      },
-    ]
-    for (let step = 0; step < 11; step++) {
-      messages.push({
-        type: "expertMessage",
-        id: `expert-${step}`,
-        contents: [
-          { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" },
-          { id: `txt-${step}`, type: "textPart", text: `Step ${step}` },
-          {
-            id: `tc-${step}`,
-            type: "toolCallPart",
-            toolCallId: `tc-${step}`,
-            toolName: "doWork",
-            args: {},
-          },
-        ],
-      })
-      messages.push({
-        type: "toolMessage",
-        id: `tool-${step}`,
-        contents: [
-          {
-            id: `tr-${step}`,
-            type: "toolResultPart",
-            toolCallId: `tc-${step}`,
-            toolName: "doWork",
-            contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }],
-            isError: false,
-          },
-        ],
-      })
-    }
-    // Total non-instruction: 1 + 11*4 = 45 blocks, 24 messages (indices 0..24)
-
-    const result = applyCacheBreakpoints(messages)
-
-    // BP1: instruction
-    expect(result[0].cache).toBe(true)
-    // Last message always has BP
-    expect(result[result.length - 1].cache).toBe(true)
-
-    // Should have 3 message BPs total (max)
-    const messageBps = result.slice(1).filter((m) => m.cache === true)
-    expect(messageBps.length).toBe(3)
-  })
-
-  it("does not exceed 3 message BPs even for extremely long conversations", () => {
-    // 20 tool-use steps = 80 blocks + 1 user = 81 blocks total
-    const messages: Message[] = [
-      {
-        type: "instructionMessage",
-        id: "instr",
-        cache: true,
-        contents: [{ id: "c-instr", type: "textPart", text: "System" }],
-      },
-      {
-        type: "userMessage",
-        id: "user-0",
-        contents: [{ id: "c-u0", type: "textPart", text: "Go" }],
-      },
-    ]
-    for (let step = 0; step < 20; step++) {
-      messages.push({
-        type: "expertMessage",
-        id: `expert-${step}`,
-        contents: [
-          { id: `th-${step}`, type: "thinkingPart", thinking: "...", signature: "sig" },
-          { id: `txt-${step}`, type: "textPart", text: `Step ${step}` },
-          {
-            id: `tc-${step}`,
-            type: "toolCallPart",
-            toolCallId: `tc-${step}`,
-            toolName: "doWork",
-            args: {},
-          },
-        ],
-      })
-      messages.push({
-        type: "toolMessage",
-        id: `tool-${step}`,
-        contents: [
-          {
-            id: `tr-${step}`,
-            type: "toolResultPart",
-            toolCallId: `tc-${step}`,
-            toolName: "doWork",
-            contents: [{ id: `trr-${step}`, type: "textPart", text: "ok" }],
-            isError: false,
-          },
-        ],
-      })
-    }
-
-    const result = applyCacheBreakpoints(messages)
-
-    // Should still max out at 3 message BPs
-    const messageBps = result.slice(1).filter((m) => m.cache === true)
-    expect(messageBps.length).toBe(3)
-
-    // Last message always has BP
-    expect(result[result.length - 1].cache).toBe(true)
-  })
-})
diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts
index 332eea7c..d4bb1422 100644
--- a/packages/runtime/src/messages/message.ts
+++ b/packages/runtime/src/messages/message.ts
@@ -132,75 +132,6 @@ export function messageToCoreMessage(message: Message): ModelMessage {
   }
 }
 
-/**
- * Apply cache breakpoints to messages for Anthropic prompt caching.
- *
- * Anthropic allows up to 4 explicit cache breakpoints per request and uses a
- * 20-block lookback window from each breakpoint to find cache matches.
- *
- * Breakpoint strategy:
- * - BP1: Instruction message (caches tools + system; prefix order: tools → system → messages)
- * - BP2–BP4: Up to 3 breakpoints across conversation messages, placed every ~20 content
- *   blocks (working backwards from the last message) to ensure full lookback coverage.
- *
- * This is a pure function — it does not mutate the input array.
- *
- * For non-Anthropic providers, cache flags are harmless (messageToCoreMessage
- * only translates them to Anthropic providerOptions).
- */
-export function applyCacheBreakpoints(messages: ReadonlyArray<Message>): Message[] {
-  if (messages.length <= 1) {
-    return [...messages]
-  }
-
-  // BP2–BP4: up to 3 message breakpoints (BP1 is the instruction message)
-  const MAX_MESSAGE_BREAKPOINTS = 3
-  // Anthropic checks up to 20 content blocks before each breakpoint
-  const LOOKBACK_WINDOW = 20
-
-  // Collect non-instruction messages with their content block counts
-  const msgMeta: Array<{ originalIndex: number; blockCount: number }> = []
-  for (let i = 0; i < messages.length; i++) {
-    if (messages[i].type !== "instructionMessage") {
-      msgMeta.push({ originalIndex: i, blockCount: messages[i].contents.length })
-    }
-  }
-
-  // Determine which messages get cache breakpoints
-  const breakpointIndices = new Set<number>()
-
-  if (msgMeta.length > 0) {
-    // Always place a breakpoint on the last message (dynamic frontier)
-    breakpointIndices.add(msgMeta[msgMeta.length - 1].originalIndex)
-    let bpsPlaced = 1
-    let blocksSinceBp = 0
-
-    // Walk backwards, placing a breakpoint every ~20 content blocks
-    for (let i = msgMeta.length - 2; i >= 0 && bpsPlaced < MAX_MESSAGE_BREAKPOINTS; i--) {
-      blocksSinceBp += msgMeta[i].blockCount
-      if (blocksSinceBp >= LOOKBACK_WINDOW) {
-        breakpointIndices.add(msgMeta[i].originalIndex)
-        bpsPlaced++
-        blocksSinceBp = 0
-      }
-    }
-  }
-
-  return messages.map((msg, index) => {
-    // Preserve instruction message cache (BP1: caches tools + system)
-    if (msg.type === "instructionMessage") {
-      return msg
-    }
-
-    if (breakpointIndices.has(index)) {
-      return msg.cache === true ? msg : { ...msg, cache: true }
-    }
-
-    // Clear stale cache flags from non-strategic positions
-    return msg.cache ? { ...msg, cache: undefined } : msg
-  })
-}
-
 function instructionContentsToCoreContent(
   contents: InstructionMessage["contents"],
 ): SystemModelMessage["content"] {
diff --git a/packages/runtime/src/state-machine/states/generating-tool-call.ts b/packages/runtime/src/state-machine/states/generating-tool-call.ts
index 30d44ac0..0bcdf011 100644
--- a/packages/runtime/src/state-machine/states/generating-tool-call.ts
+++ b/packages/runtime/src/state-machine/states/generating-tool-call.ts
@@ -22,7 +22,6 @@ import { getToolSet } from "../../helpers/tool-set.js"
 import { createEmptyUsage, sumUsage, usageFromGenerateTextResult } from "../../helpers/usage.js"
 import type { StreamCallbacks } from "../../llm/types.js"
 import {
-  applyCacheBreakpoints,
   createExpertMessage,
   createToolMessage,
   createUserMessage,
@@ -101,7 +100,6 @@ export async function generatingToolCallLogic({
   llmExecutor,
 }: RunSnapshot["context"]): Promise<RunEvent> {
   const { messages } = checkpoint
-  const cachedMessages = applyCacheBreakpoints(messages)
 
   // Track if reasoning was completed via callback (to avoid duplicate emissions)
   let reasoningCompletedViaCallback = false
@@ -129,7 +127,7 @@ export async function generatingToolCallLogic({
 
   const executionResult = await llmExecutor.streamText(
     {
-      messages: cachedMessages.map(messageToCoreMessage),
+      messages: messages.map(messageToCoreMessage),
       maxRetries: setting.maxRetries,
       tools: getToolSet(skillManager),
       toolChoice: "auto",
diff --git a/packages/runtime/src/state-machine/states/init.test.ts b/packages/runtime/src/state-machine/states/init.test.ts
index 0e55473d..064e7e5d 100644
--- a/packages/runtime/src/state-machine/states/init.test.ts
+++ b/packages/runtime/src/state-machine/states/init.test.ts
@@ -43,7 +43,6 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => {
           type: "instructionMessage",
           id: expect.any(String),
           contents: [{ type: "textPart", id: expect.any(String), text: expect.any(String) }],
-          cache: true,
         },
         {
           type: "userMessage",

From 59a28048c0ed981f6ed346d5c47ec50602be447d Mon Sep 17 00:00:00 2001
From: HiranoMasaaki <lambda.groove@gmail.com>
Date: Wed, 25 Feb 2026 03:03:47 +0000
Subject: [PATCH 7/7] Fix: Move @ai-sdk/anthropic dependency to correct package
 scope

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bun.lock                                  | 5 +----
 package.json                              | 5 +----
 packages/providers/anthropic/package.json | 2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/bun.lock b/bun.lock
index 9ab53252..b3ba2c4d 100644
--- a/bun.lock
+++ b/bun.lock
@@ -4,9 +4,6 @@
   "workspaces": {
     "": {
       "name": "perstack-monorepo",
-      "dependencies": {
-        "@ai-sdk/anthropic": "^3.0.47",
-      },
       "devDependencies": {
         "@biomejs/biome": "^2.4.2",
         "@changesets/changelog-github": "^0.5.2",
@@ -159,7 +156,7 @@
       "name": "@perstack/anthropic-provider",
       "version": "0.0.29",
       "dependencies": {
-        "@ai-sdk/anthropic": "^3.0.44",
+        "@ai-sdk/anthropic": "^3.0.47",
         "@perstack/core": "workspace:*",
         "@perstack/provider-core": "workspace:*",
       },
diff --git a/package.json b/package.json
index f9773419..5434341c 100644
--- a/package.json
+++ b/package.json
@@ -42,8 +42,5 @@
   "engines": {
     "bun": ">=1.2.0"
   },
-  "packageManager": "bun@1.3.5",
-  "dependencies": {
-    "@ai-sdk/anthropic": "^3.0.47"
-  }
+  "packageManager": "bun@1.3.5"
 }
diff --git a/packages/providers/anthropic/package.json b/packages/providers/anthropic/package.json
index 271f1312..8a37688f 100644
--- a/packages/providers/anthropic/package.json
+++ b/packages/providers/anthropic/package.json
@@ -27,7 +27,7 @@
     "test": "bun test"
   },
   "dependencies": {
-    "@ai-sdk/anthropic": "^3.0.44",
+    "@ai-sdk/anthropic": "^3.0.47",
     "@perstack/core": "workspace:*",
     "@perstack/provider-core": "workspace:*"
   },