Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .changeset/prompt-cache-breakpoints.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"@perstack/runtime": patch
"@perstack/anthropic-provider": patch
"perstack": patch
"create-expert": patch
---

Enable automatic prompt caching for Anthropic via request-level cache_control
60 changes: 32 additions & 28 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions e2e/perstack-cli/continue.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,16 @@ describe.concurrent("Continue Job", () => {
).toBe(true)
const completeEvents = filterEventsByType(continueResult.events, "completeRun")
expect(completeEvents.length).toBe(1)

// Verify usage tracking flows through multi-turn conversations.
// On turn 2, the conversation prefix from turn 1 is resent — with automatic
// prompt caching enabled via providerOptions, cachedInputTokens should be populated.
// Actual cache hits depend on the model's minimum token threshold
// (e.g. 1024 for Sonnet, 4096 for Haiku 4.5).
const completeEvent = completeEvents[0]
const usage = (completeEvent as { usage?: Record<string, number> }).usage
expect(usage).toBeDefined()
expect(typeof usage?.cachedInputTokens).toBe("number")
})

// ─────────────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -128,6 +138,11 @@ describe.concurrent("Continue Job", () => {
expect(continueCompleteEvents.length).toBe(1)
const lastCompleteEvent = continueCompleteEvents[continueCompleteEvents.length - 1]
expect((lastCompleteEvent as { text?: string }).text).toBeDefined()

// Verify usage includes cache metrics on continued run
const usage = (lastCompleteEvent as { usage?: Record<string, number> }).usage
expect(usage).toBeDefined()
expect(typeof usage?.cachedInputTokens).toBe("number")
})

// ─────────────────────────────────────────────────────────────────────────
Expand Down
7 changes: 7 additions & 0 deletions e2e/perstack-cli/providers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ describe.concurrent("LLM Providers", () => {
// Note: text may be empty when using attemptCompletion tool (explicit completion)
// The actual response is in the checkpoint messages, not in completeRun.text
expect((completeEvent as { text?: string }).text).toBeDefined()

// Verify usage tracking includes cache token metrics
const usage = (completeEvent as { usage?: Record<string, unknown> }).usage
expect(usage).toBeDefined()
expect(typeof usage?.inputTokens).toBe("number")
expect(typeof usage?.outputTokens).toBe("number")
expect(typeof usage?.cachedInputTokens).toBe("number")
},
LLM_TIMEOUT,
)
Expand Down
2 changes: 1 addition & 1 deletion packages/providers/anthropic/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"test": "bun test"
},
"dependencies": {
"@ai-sdk/anthropic": "^3.0.44",
"@ai-sdk/anthropic": "^3.0.47",
"@perstack/core": "workspace:*",
"@perstack/provider-core": "workspace:*"
},
Expand Down
15 changes: 10 additions & 5 deletions packages/providers/anthropic/src/adapter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,23 @@ describe("AnthropicProviderAdapter", () => {
})

describe("getProviderOptions", () => {
it("returns undefined when no skills provided", () => {
it("returns cacheControl when no skills provided", () => {
const adapter = new AnthropicProviderAdapter(mockConfig)
const options = adapter.getProviderOptions()
expect(options).toBeUndefined()
expect(options).toEqual({
anthropic: { cacheControl: { type: "ephemeral" } },
})
})

it("returns undefined when empty skills array provided", () => {
it("returns cacheControl when empty skills array provided", () => {
const adapter = new AnthropicProviderAdapter(mockConfig)
const options = adapter.getProviderOptions({ skills: [] })
expect(options).toBeUndefined()
expect(options).toEqual({
anthropic: { cacheControl: { type: "ephemeral" } },
})
})

it("returns provider options with builtin skills", () => {
it("returns provider options with builtin skills and cacheControl", () => {
const adapter = new AnthropicProviderAdapter(mockConfig)
const options = adapter.getProviderOptions({
skills: [{ type: "builtin", skillId: "pdf" }],
Expand All @@ -86,6 +90,7 @@ describe("AnthropicProviderAdapter", () => {
container: {
skills: [{ type: "builtin", name: "pdf" }],
},
cacheControl: { type: "ephemeral" },
},
})
})
Expand Down
14 changes: 13 additions & 1 deletion packages/providers/anthropic/src/adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,19 @@ export class AnthropicProviderAdapter extends BaseProviderAdapter {
}

override getProviderOptions(config?: ProviderOptionsConfig): ProviderOptions | undefined {
return buildProviderOptions(config?.skills)
const skillOptions = buildProviderOptions(config?.skills)
const cacheOptions: ProviderOptions = {
anthropic: { cacheControl: { type: "ephemeral" } },
}
if (!skillOptions) {
return cacheOptions
}
return {
anthropic: {
...skillOptions["anthropic"],
...cacheOptions["anthropic"],
},
}
}

override getReasoningOptions(budget: ReasoningBudget): ProviderOptions | undefined {
Expand Down
1 change: 0 additions & 1 deletion packages/runtime/src/messages/instruction-message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ export function createInstructionMessage(expert: Expert, startedAt: number): Ins
},
],
id: createId(),
cache: true,
}
}

Expand Down
2 changes: 1 addition & 1 deletion packages/runtime/src/messages/message.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ describe("@perstack/messages: instruction-message", () => {
}
const result = createInstructionMessage(expert, startedAt)
expect(result.type).toBe("instructionMessage")
expect(result.cache).toBe(true)
expect(result.cache).toBeUndefined()
expect(result.contents[0].type).toBe("textPart")
expect(result.contents[0].text).toContain("You are a test expert.")
})
Expand Down
1 change: 1 addition & 0 deletions packages/runtime/src/messages/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ export function messageToCoreMessage(message: Message): ModelMessage {
}
}
}

function instructionContentsToCoreContent(
contents: InstructionMessage["contents"],
): SystemModelMessage["content"] {
Expand Down
1 change: 0 additions & 1 deletion packages/runtime/src/state-machine/states/init.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ describe("@perstack/runtime: StateMachineLogic['Init']", () => {
type: "instructionMessage",
id: expect.any(String),
contents: [{ type: "textPart", id: expect.any(String), text: expect.any(String) }],
cache: true,
},
{
type: "userMessage",
Expand Down