diff --git a/.changeset/prompt-cache-breakpoints.md b/.changeset/prompt-cache-breakpoints.md new file mode 100644 index 00000000..1c296d73 --- /dev/null +++ b/.changeset/prompt-cache-breakpoints.md @@ -0,0 +1,8 @@ +--- +"@perstack/runtime": patch +"@perstack/anthropic-provider": patch +"perstack": patch +"create-expert": patch +--- + +Enable automatic prompt caching for Anthropic via request-level cache_control diff --git a/bun.lock b/bun.lock index 4dad4e33..b3ba2c4d 100644 --- a/bun.lock +++ b/bun.lock @@ -18,10 +18,10 @@ }, "apps/base": { "name": "@perstack/base", - "version": "0.0.66", + "version": "0.0.68", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", - "@perstack/core": "0.0.54", + "@perstack/core": "0.0.56", "commander": "^14.0.3", "zod": "^4.3.6", }, @@ -33,7 +33,7 @@ }, "apps/create-expert": { "name": "create-expert", - "version": "0.0.43", + "version": "0.0.45", "bin": { "create-expert": "bin/cli.ts", }, @@ -52,7 +52,7 @@ }, "apps/create-expert-skill": { "name": "@perstack/create-expert-skill", - "version": "0.0.3", + "version": "0.0.5", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", "commander": "^14.0.3", @@ -68,7 +68,7 @@ }, "apps/perstack": { "name": "perstack", - "version": "0.0.95", + "version": "0.0.97", "dependencies": { "commander": "^14.0.3", }, @@ -85,7 +85,7 @@ }, "packages/core": { "name": "@perstack/core", - "version": "0.0.54", + "version": "0.0.56", "dependencies": { "@paralleldrive/cuid2": "^3.3.0", "zod": "^4.3.6", @@ -98,7 +98,7 @@ }, "packages/filesystem": { "name": "@perstack/filesystem-storage", - "version": "0.0.25", + "version": "0.0.27", "dependencies": { "@perstack/core": "workspace:*", }, @@ -111,7 +111,7 @@ }, "packages/installer": { "name": "@perstack/installer", - "version": "0.0.18", + "version": "0.0.20", "dependencies": { "@perstack/api-client": "^0.0.56", "@perstack/core": "workspace:*", @@ -127,7 +127,7 @@ }, "packages/log": { "name": "@perstack/log", - "version": "0.0.11", + "version": "0.0.13", "dependencies": { "@perstack/core": "workspace:*", "@perstack/filesystem-storage": "workspace:*", @@ -140,7 +140,7 @@ }, "packages/perstack-toml": { "name": "@perstack/perstack-toml", - "version": "0.0.10", + "version": "0.0.12", "dependencies": { "@perstack/core": "workspace:*", "smol-toml": "^1.6.0", @@ -154,9 +154,9 @@ }, "packages/providers/anthropic": { "name": "@perstack/anthropic-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { - "@ai-sdk/anthropic": "^3.0.44", + "@ai-sdk/anthropic": "^3.0.47", "@perstack/core": "workspace:*", "@perstack/provider-core": "workspace:*", }, @@ -169,7 +169,7 @@ }, "packages/providers/azure-openai": { "name": "@perstack/azure-openai-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/azure": "^3.0.31", "@perstack/core": "workspace:*", @@ -184,7 +184,7 @@ }, "packages/providers/bedrock": { "name": "@perstack/bedrock-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/amazon-bedrock": "^4.0.60", "@perstack/core": "workspace:*", @@ -199,7 +199,7 @@ }, "packages/providers/core": { "name": "@perstack/provider-core", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@perstack/core": "workspace:*", "undici": "^7.22.0", @@ -213,7 +213,7 @@ }, "packages/providers/deepseek": { "name": "@perstack/deepseek-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/deepseek": "^2.0.20", "@perstack/core": "workspace:*", @@ -228,7 +228,7 @@ }, "packages/providers/google": { "name": "@perstack/google-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/google": "^3.0.29", "@perstack/core": "workspace:*", @@ -243,7 +243,7 @@ }, "packages/providers/ollama": { "name": "@perstack/ollama-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@perstack/core": "workspace:*", "@perstack/provider-core": "workspace:*", @@ -258,7 +258,7 @@ }, "packages/providers/openai": { "name": "@perstack/openai-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/openai": "^3.0.29", "@perstack/core": "workspace:*", @@ -273,7 +273,7 @@ }, "packages/providers/vertex": { "name": "@perstack/vertex-provider", - "version": "0.0.27", + "version": "0.0.29", "dependencies": { "@ai-sdk/google-vertex": "^4.0.58", "@perstack/core": "workspace:*", @@ -288,7 +288,7 @@ }, "packages/react": { "name": "@perstack/react", - "version": "0.0.58", + "version": "0.0.60", "dependencies": { "@perstack/core": "workspace:*", }, @@ -307,7 +307,7 @@ }, "packages/runtime": { "name": "@perstack/runtime", - "version": "0.0.115", + "version": "0.0.117", "dependencies": { "@ai-sdk/amazon-bedrock": "^4.0.60", "@ai-sdk/anthropic": "^3.0.44", @@ -319,8 +319,8 @@ "@modelcontextprotocol/sdk": "^1.26.0", "@paralleldrive/cuid2": "^3.3.0", "@perstack/api-client": "^0.0.56", - "@perstack/base": "0.0.66", - "@perstack/core": "0.0.54", + "@perstack/base": "0.0.68", + "@perstack/core": "0.0.56", "ai": "^6.0.86", "ollama-ai-provider-v2": "^3.3.0", "smol-toml": "^1.6.0", @@ -346,7 +346,7 @@ }, "packages/skill-manager": { "name": "@perstack/skill-manager", - "version": "0.0.12", + "version": "0.0.14", "dependencies": { "@modelcontextprotocol/sdk": "^1.26.0", "@paralleldrive/cuid2": "^3.3.0", @@ -362,7 +362,7 @@ }, "packages/tui": { "name": "@perstack/tui", - "version": "0.0.16", + "version": "0.0.18", "dependencies": { "@paralleldrive/cuid2": "^3.3.0", "@perstack/core": "workspace:*", @@ -379,7 +379,7 @@ }, "packages/tui-components": { "name": "@perstack/tui-components", - "version": "0.0.18", + "version": "0.0.20", "dependencies": { "@perstack/core": "workspace:*", "@perstack/react": "workspace:*", @@ -397,7 +397,7 @@ "packages": { "@ai-sdk/amazon-bedrock": ["@ai-sdk/amazon-bedrock@4.0.63", "", { "dependencies": { "@ai-sdk/anthropic": "3.0.46", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@smithy/eventstream-codec": "^4.0.1", "@smithy/util-utf8": "^4.0.0", "aws4fetch": "^1.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-kNOaIaOXWFZFWbB0xM1l/bQYo7XwTkpdHbrA6n9A2U1c4/DcLF/+Rwc3vZF6MHPVSjoYVG0qxIa7jh39rKftYA=="], - "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="], + "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.47", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-E6Z3i/xvxGDxRskMMbuX9+xDK4l5LesrP2O7YQ0CcbAkYP25qTo/kYGf/AsJrLkNIY23HeO/kheUWtG1XZllDA=="], "@ai-sdk/azure": ["@ai-sdk/azure@3.0.31", "", { "dependencies": { "@ai-sdk/openai": "3.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-W9x6nt+yf+Ns0/Wx7U9TXHLmfu7mOUqy1b/drtVd3DvNfDudyruQM/YjM2268Q0FatSrPlA2RlnPVPGRH/4V8Q=="], @@ -1325,6 +1325,10 @@ "zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="], + "@ai-sdk/amazon-bedrock/@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="], + + "@ai-sdk/google-vertex/@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.46", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-zXJPiNHaIiQ6XUqLeSYZ3ZbSzjqt1pNWEUf2hlkXlmmw8IF8KI0ruuGaDwKCExmtuNRf0E4TDxhsc9wRgWTzpw=="], + "@aws-crypto/util/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], "@babel/code-frame/@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.28.5", "", {}, "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q=="], diff --git a/e2e/perstack-cli/continue.test.ts b/e2e/perstack-cli/continue.test.ts index e67e01e8..ad43c3e5 100644 --- a/e2e/perstack-cli/continue.test.ts +++ b/e2e/perstack-cli/continue.test.ts @@ -72,6 +72,16 @@ describe.concurrent("Continue Job", () => { ).toBe(true) const completeEvents = filterEventsByType(continueResult.events, "completeRun") expect(completeEvents.length).toBe(1) + + // Verify usage tracking flows through multi-turn conversations. + // On turn 2, the conversation prefix from turn 1 is resent — with automatic + // prompt caching enabled via providerOptions, cachedInputTokens should be populated. + // Actual cache hits depend on the model's minimum token threshold + // (e.g. 1024 for Sonnet, 4096 for Haiku 4.5). + const completeEvent = completeEvents[0] + const usage = (completeEvent as { usage?: Record }).usage + expect(usage).toBeDefined() + expect(typeof usage?.cachedInputTokens).toBe("number") }) // ───────────────────────────────────────────────────────────────────────── @@ -128,6 +138,11 @@ describe.concurrent("Continue Job", () => { expect(continueCompleteEvents.length).toBe(1) const lastCompleteEvent = continueCompleteEvents[continueCompleteEvents.length - 1] expect((lastCompleteEvent as { text?: string }).text).toBeDefined() + + // Verify usage includes cache metrics on continued run + const usage = (lastCompleteEvent as { usage?: Record }).usage + expect(usage).toBeDefined() + expect(typeof usage?.cachedInputTokens).toBe("number") }) // ───────────────────────────────────────────────────────────────────────── diff --git a/e2e/perstack-cli/providers.test.ts b/e2e/perstack-cli/providers.test.ts index 2256005d..c71a1201 100644 --- a/e2e/perstack-cli/providers.test.ts +++ b/e2e/perstack-cli/providers.test.ts @@ -47,6 +47,13 @@ describe.concurrent("LLM Providers", () => { // Note: text may be empty when using attemptCompletion tool (explicit completion) // The actual response is in the checkpoint messages, not in completeRun.text expect((completeEvent as { text?: string }).text).toBeDefined() + + // Verify usage tracking includes cache token metrics + const usage = (completeEvent as { usage?: Record }).usage + expect(usage).toBeDefined() + expect(typeof usage?.inputTokens).toBe("number") + expect(typeof usage?.outputTokens).toBe("number") + expect(typeof usage?.cachedInputTokens).toBe("number") }, LLM_TIMEOUT, ) diff --git a/packages/providers/anthropic/package.json b/packages/providers/anthropic/package.json index 271f1312..8a37688f 100644 --- a/packages/providers/anthropic/package.json +++ b/packages/providers/anthropic/package.json @@ -27,7 +27,7 @@ "test": "bun test" }, "dependencies": { - "@ai-sdk/anthropic": "^3.0.44", + "@ai-sdk/anthropic": "^3.0.47", "@perstack/core": "workspace:*", "@perstack/provider-core": "workspace:*" }, diff --git a/packages/providers/anthropic/src/adapter.test.ts b/packages/providers/anthropic/src/adapter.test.ts index f8393aff..9dd23a4a 100644 --- a/packages/providers/anthropic/src/adapter.test.ts +++ b/packages/providers/anthropic/src/adapter.test.ts @@ -64,19 +64,23 @@ describe("AnthropicProviderAdapter", () => { }) describe("getProviderOptions", () => { - it("returns undefined when no skills provided", () => { + it("returns cacheControl when no skills provided", () => { const adapter = new AnthropicProviderAdapter(mockConfig) const options = adapter.getProviderOptions() - expect(options).toBeUndefined() + expect(options).toEqual({ + anthropic: { cacheControl: { type: "ephemeral" } }, + }) }) - it("returns undefined when empty skills array provided", () => { + it("returns cacheControl when empty skills array provided", () => { const adapter = new AnthropicProviderAdapter(mockConfig) const options = adapter.getProviderOptions({ skills: [] }) - expect(options).toBeUndefined() + expect(options).toEqual({ + anthropic: { cacheControl: { type: "ephemeral" } }, + }) }) - it("returns provider options with builtin skills", () => { + it("returns provider options with builtin skills and cacheControl", () => { const adapter = new AnthropicProviderAdapter(mockConfig) const options = adapter.getProviderOptions({ skills: [{ type: "builtin", skillId: "pdf" }], @@ -86,6 +90,7 @@ describe("AnthropicProviderAdapter", () => { container: { skills: [{ type: "builtin", name: "pdf" }], }, + cacheControl: { type: "ephemeral" }, }, }) }) diff --git a/packages/providers/anthropic/src/adapter.ts b/packages/providers/anthropic/src/adapter.ts index 598bb93c..3b27d8d1 100644 --- a/packages/providers/anthropic/src/adapter.ts +++ b/packages/providers/anthropic/src/adapter.ts @@ -40,7 +40,19 @@ export class AnthropicProviderAdapter extends BaseProviderAdapter { } override getProviderOptions(config?: ProviderOptionsConfig): ProviderOptions | undefined { - return buildProviderOptions(config?.skills) + const skillOptions = buildProviderOptions(config?.skills) + const cacheOptions: ProviderOptions = { + anthropic: { cacheControl: { type: "ephemeral" } }, + } + if (!skillOptions) { + return cacheOptions + } + return { + anthropic: { + ...skillOptions["anthropic"], + ...cacheOptions["anthropic"], + }, + } } override getReasoningOptions(budget: ReasoningBudget): ProviderOptions | undefined { diff --git a/packages/runtime/src/messages/instruction-message.ts b/packages/runtime/src/messages/instruction-message.ts index b0adf1d1..4fd07d85 100644 --- a/packages/runtime/src/messages/instruction-message.ts +++ b/packages/runtime/src/messages/instruction-message.ts @@ -65,7 +65,6 @@ export function createInstructionMessage(expert: Expert, startedAt: number): Ins }, ], id: createId(), - cache: true, } } diff --git a/packages/runtime/src/messages/message.test.ts b/packages/runtime/src/messages/message.test.ts index 14d6f6af..c3f97230 100644 --- a/packages/runtime/src/messages/message.test.ts +++ b/packages/runtime/src/messages/message.test.ts @@ -516,7 +516,7 @@ describe("@perstack/messages: instruction-message", () => { } const result = createInstructionMessage(expert, startedAt) expect(result.type).toBe("instructionMessage") - expect(result.cache).toBe(true) + expect(result.cache).toBeUndefined() expect(result.contents[0].type).toBe("textPart") expect(result.contents[0].text).toContain("You are a test expert.") }) diff --git a/packages/runtime/src/messages/message.ts b/packages/runtime/src/messages/message.ts index 6984d84b..d4bb1422 100644 --- a/packages/runtime/src/messages/message.ts +++ b/packages/runtime/src/messages/message.ts @@ -131,6 +131,7 @@ export function messageToCoreMessage(message: Message): ModelMessage { } } } + function instructionContentsToCoreContent( contents: InstructionMessage["contents"], ): SystemModelMessage["content"] { diff --git a/packages/runtime/src/state-machine/states/init.test.ts b/packages/runtime/src/state-machine/states/init.test.ts index 0e55473d..064e7e5d 100644 --- a/packages/runtime/src/state-machine/states/init.test.ts +++ b/packages/runtime/src/state-machine/states/init.test.ts @@ -43,7 +43,6 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => { type: "instructionMessage", id: expect.any(String), contents: [{ type: "textPart", id: expect.any(String), text: expect.any(String) }], - cache: true, }, { type: "userMessage",