From 4b60917d1b69aee6d73eca7b005344876bdee881 Mon Sep 17 00:00:00 2001 From: Elliott Lawson Date: Wed, 9 Jul 2025 22:38:13 -0400 Subject: [PATCH 1/2] fix: prevent tool result cache accumulation across multiple tool calls Apply tool_result_cache_type only to the last tool result message across all messages instead of applying it to each tool result during creation. This prevents cache accumulation when there are multiple tool call rounds. --- src/Providers/Anthropic/Handlers/Text.php | 5 - src/Providers/Anthropic/Maps/MessageMap.php | 24 +- .../Anthropic/ToolResultCachingTest.php | 247 ++++++++++++++++++ 3 files changed, 270 insertions(+), 6 deletions(-) create mode 100644 tests/Providers/Anthropic/ToolResultCachingTest.php diff --git a/src/Providers/Anthropic/Handlers/Text.php b/src/Providers/Anthropic/Handlers/Text.php index bc5b31780..891b748b7 100644 --- a/src/Providers/Anthropic/Handlers/Text.php +++ b/src/Providers/Anthropic/Handlers/Text.php @@ -104,11 +104,6 @@ protected function handleToolCalls(): Response $toolResults = $this->callTools($this->request->tools(), $this->tempResponse->toolCalls); $message = new ToolResultMessage($toolResults); - // Apply tool result caching if configured - if ($tool_result_cache_type = $this->request->providerOptions('tool_result_cache_type')) { - $message->withProviderOptions(['cacheType' => $tool_result_cache_type]); - } - $this->request->addMessage($message); $this->request->resetToolChoice(); diff --git a/src/Providers/Anthropic/Maps/MessageMap.php b/src/Providers/Anthropic/Maps/MessageMap.php index 09347b01f..d10fd280f 100644 --- a/src/Providers/Anthropic/Maps/MessageMap.php +++ b/src/Providers/Anthropic/Maps/MessageMap.php @@ -32,10 +32,32 @@ public static function map(array $messages, array $requestProviderOptions = []): throw new PrismException('Anthropic does not support SystemMessages in the messages array. Use withSystemPrompt or withSystemPrompts instead.'); } - return array_map( + $mappedMessages = array_map( fn (Message $message): array => self::mapMessage($message, $requestProviderOptions), $messages ); + + if (isset($requestProviderOptions['tool_result_cache_type'])) { + $lastToolResultIndex = null; + + for ($i = count($mappedMessages) - 1; $i >= 0; $i--) { + if ($mappedMessages[$i]['role'] === 'user' && + isset($mappedMessages[$i]['content'][0]['type']) && + $mappedMessages[$i]['content'][0]['type'] === 'tool_result') { + $lastToolResultIndex = $i; + break; + } + } + + if ($lastToolResultIndex !== null) { + $lastContent = &$mappedMessages[$lastToolResultIndex]['content']; + $lastContent[count($lastContent) - 1]['cache_control'] = [ + 'type' => $requestProviderOptions['tool_result_cache_type'], + ]; + } + } + + return $mappedMessages; } /** diff --git a/tests/Providers/Anthropic/ToolResultCachingTest.php b/tests/Providers/Anthropic/ToolResultCachingTest.php new file mode 100644 index 000000000..5c8453797 --- /dev/null +++ b/tests/Providers/Anthropic/ToolResultCachingTest.php @@ -0,0 +1,247 @@ +set('prism.providers.anthropic.api_key', env('ANTHROPIC_API_KEY', 'sk-1234')); +}); + +it('applies tool_result_cache_type only to the last tool result message across all messages', function (): void { + // Create test messages simulating multiple tool call rounds + $messages = [ + new UserMessage('What time is the tigers game today and should I wear a coat?'), + new AssistantMessage('', toolCalls: [ + new ToolCall( + id: 'call_1', + name: 'search', + arguments: ['query' => 'Detroit Tigers baseball game time today'] + ), + ]), + new ToolResultMessage([ + new ToolResult( + toolCallId: 'call_1', + toolName: 'search', + args: ['query' => 'Detroit Tigers baseball game time today'], + result: 'The tigers game is at 3pm in detroit' + ), + ]), + new AssistantMessage('', toolCalls: [ + new ToolCall( + id: 'call_2', + name: 'weather', + arguments: ['city' => 'Detroit'] + ), + ]), + new ToolResultMessage([ + new ToolResult( + toolCallId: 'call_2', + toolName: 'weather', + args: ['city' => 'Detroit'], + result: 'The weather will be 75° and sunny' + ), + ]), + new AssistantMessage('The Tigers game is at 3pm today. The weather will be 75° and sunny, so you won\'t need a coat!'), + ]; + + // Map the messages with provider options + $mappedMessages = MessageMap::map( + $messages, + ['tool_result_cache_type' => 'ephemeral'] + ); + + // Verify that only the last tool result message has cache_control + $toolResultMessages = array_filter($mappedMessages, fn ($message): bool => $message['role'] === 'user' && + isset($message['content'][0]['type']) && + $message['content'][0]['type'] === 'tool_result'); + + expect(count($toolResultMessages))->toBe(2); + + // Get the tool result messages by their indices + $toolResultIndices = array_keys($toolResultMessages); + $firstToolResultIndex = $toolResultIndices[0]; + $lastToolResultIndex = $toolResultIndices[1]; + + // First tool result should NOT have cache_control + $firstToolResult = $mappedMessages[$firstToolResultIndex]; + expect($firstToolResult['content'][0])->not->toHaveKey('cache_control'); + + // Last tool result SHOULD have cache_control + $lastToolResult = $mappedMessages[$lastToolResultIndex]; + expect($lastToolResult['content'][0])->toHaveKey('cache_control'); + expect($lastToolResult['content'][0]['cache_control'])->toBe(['type' => 'ephemeral']); +}); + +it('handles single tool result message with cache_control', function (): void { + $messages = [ + new UserMessage('What is the weather?'), + new AssistantMessage('', toolCalls: [ + new ToolCall( + id: 'call_1', + name: 'weather', + arguments: ['city' => 'Detroit'] + ), + ]), + new ToolResultMessage([ + new ToolResult( + toolCallId: 'call_1', + toolName: 'weather', + args: ['city' => 'Detroit'], + result: 'The weather will be 75° and sunny' + ), + ]), + ]; + + // Map the messages with provider options + $mappedMessages = MessageMap::map( + $messages, + ['tool_result_cache_type' => 'ephemeral'] + ); + + // Find the tool result message + $toolResultMessage = null; + foreach ($mappedMessages as $message) { + if ($message['role'] === 'user' && + isset($message['content'][0]['type']) && + $message['content'][0]['type'] === 'tool_result') { + $toolResultMessage = $message; + break; + } + } + + // The single tool result should have cache_control + expect($toolResultMessage)->not->toBeNull(); + expect($toolResultMessage['content'][0])->toHaveKey('cache_control'); + expect($toolResultMessage['content'][0]['cache_control'])->toBe(['type' => 'ephemeral']); +}); + +it('does not apply cache_control when tool_result_cache_type is not set', function (): void { + $messages = [ + new UserMessage('What is the weather?'), + new AssistantMessage('', toolCalls: [ + new ToolCall( + id: 'call_1', + name: 'weather', + arguments: ['city' => 'Detroit'] + ), + ]), + new ToolResultMessage([ + new ToolResult( + toolCallId: 'call_1', + toolName: 'weather', + args: ['city' => 'Detroit'], + result: 'The weather will be 75° and sunny' + ), + ]), + ]; + + // Map the messages without provider options + $mappedMessages = MessageMap::map($messages); + + // Find the tool result message + $toolResultMessage = null; + foreach ($mappedMessages as $message) { + if ($message['role'] === 'user' && + isset($message['content'][0]['type']) && + $message['content'][0]['type'] === 'tool_result') { + $toolResultMessage = $message; + break; + } + } + + // Should not have cache_control + expect($toolResultMessage)->not->toBeNull(); + expect($toolResultMessage['content'][0])->not->toHaveKey('cache_control'); +}); + +it('sends only one cache block when request has multiple tool results in full lifecycle', function (): void { + Prism::fake(); + + // Simulate a request that already has multiple tool call rounds in history + $request = Prism::text() + ->using('anthropic', 'claude-3-5-sonnet-latest') + ->withMessages([ + new UserMessage('What time is the game and weather?'), + new AssistantMessage('', toolCalls: [ + new ToolCall( + id: 'call_1', + name: 'search', + arguments: ['query' => 'game time'] + ), + ]), + new ToolResultMessage([ + new ToolResult( + toolCallId: 'call_1', + toolName: 'search', + args: ['query' => 'game time'], + result: '3pm' + ), + ]), + new AssistantMessage('', toolCalls: [ + new ToolCall( + id: 'call_2', + name: 'weather', + arguments: ['city' => 'Detroit'] + ), + ]), + new ToolResultMessage([ + new ToolResult( + toolCallId: 'call_2', + toolName: 'weather', + args: ['city' => 'Detroit'], + result: 'sunny' + ), + ]), + ]) + ->withProviderOptions(['tool_result_cache_type' => 'ephemeral']); + + // Get the actual payload that would be sent + $payload = Text::buildHttpRequestPayload($request->toRequest()); + + // Count cache blocks in the payload + $cacheBlocks = 0; + foreach ($payload['messages'] as $message) { + foreach ($message['content'] as $content) { + if (isset($content['cache_control'])) { + $cacheBlocks++; + } + } + } + + expect($cacheBlocks)->toBe(1); + + // Find the last tool result message + $lastToolResultIndex = null; + for ($i = count($payload['messages']) - 1; $i >= 0; $i--) { + if ($payload['messages'][$i]['role'] === 'user' && + isset($payload['messages'][$i]['content'][0]['type']) && + $payload['messages'][$i]['content'][0]['type'] === 'tool_result') { + $lastToolResultIndex = $i; + break; + } + } + + // Verify the cache is on the last tool result + expect($lastToolResultIndex)->not->toBeNull(); + expect($payload['messages'][$lastToolResultIndex]['content'][0])->toHaveKey('cache_control'); + expect($payload['messages'][$lastToolResultIndex]['content'][0]['cache_control'])->toBe(['type' => 'ephemeral']); + + // Verify earlier tool results don't have cache + for ($i = 0; $i < $lastToolResultIndex; $i++) { + if ($payload['messages'][$i]['role'] === 'user' && + isset($payload['messages'][$i]['content'][0]['type']) && + $payload['messages'][$i]['content'][0]['type'] === 'tool_result') { + expect($payload['messages'][$i]['content'][0])->not->toHaveKey('cache_control'); + } + } +}); From a34ce718e40c0db84e649e5aefac19a11547349a Mon Sep 17 00:00:00 2001 From: TJ Miller Date: Sun, 25 Jan 2026 17:09:47 -0500 Subject: [PATCH 2/2] test: add integration test with real API fixtures --- .../tool-result-caching-multi-step-1.json | 1 + .../tool-result-caching-multi-step-2.json | 1 + .../Anthropic/ToolResultCachingTest.php | 37 ++++++++++++++++++- 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 tests/Fixtures/anthropic/tool-result-caching-multi-step-1.json create mode 100644 tests/Fixtures/anthropic/tool-result-caching-multi-step-2.json diff --git a/tests/Fixtures/anthropic/tool-result-caching-multi-step-1.json b/tests/Fixtures/anthropic/tool-result-caching-multi-step-1.json new file mode 100644 index 000000000..14717a1b7 --- /dev/null +++ b/tests/Fixtures/anthropic/tool-result-caching-multi-step-1.json @@ -0,0 +1 @@ +{"model":"claude-sonnet-4-20250514","id":"msg_01WszxmDcCTJWvQnyrZuYevD","type":"message","role":"assistant","content":[{"type":"text","text":"I'll help you find the Detroit Tigers game time today and check the weather in Detroit to advise on whether you should wear a coat."},{"type":"tool_use","id":"toolu_01DLVLVQttBP5DXr3ri6us49","name":"search","input":{"query":"Detroit Tigers baseball game today time schedule"}},{"type":"tool_use","id":"toolu_01SFHFTJjudPJHhcZY5E9kim","name":"weather","input":{"city":"Detroit"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":478,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":120,"service_tier":"standard"}} \ No newline at end of file diff --git a/tests/Fixtures/anthropic/tool-result-caching-multi-step-2.json b/tests/Fixtures/anthropic/tool-result-caching-multi-step-2.json new file mode 100644 index 000000000..1927395f7 --- /dev/null +++ b/tests/Fixtures/anthropic/tool-result-caching-multi-step-2.json @@ -0,0 +1 @@ +{"model":"claude-sonnet-4-20250514","id":"msg_01BUHWfpYmd6D8h9GfvueZVW","type":"message","role":"assistant","content":[{"type":"text","text":"Great news! Here's what I found:\n\n**Detroit Tigers Game**: The game is today at 3:00 PM in Detroit.\n\n**Weather in Detroit**: It will be 75°F and sunny.\n\n**Coat Recommendation**: With sunny skies and a comfortable temperature of 75°F, you won't need a coat! This is perfect baseball weather - warm enough to be comfortable but not too hot. You might want to bring sunglasses and sunscreen instead, and perhaps a light shirt or jersey to show your Tigers support!\n\nHave a great time at the game!"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":676,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":127,"service_tier":"standard"}} \ No newline at end of file diff --git a/tests/Providers/Anthropic/ToolResultCachingTest.php b/tests/Providers/Anthropic/ToolResultCachingTest.php index 5c8453797..1c8026df5 100644 --- a/tests/Providers/Anthropic/ToolResultCachingTest.php +++ b/tests/Providers/Anthropic/ToolResultCachingTest.php @@ -4,7 +4,8 @@ namespace Tests\Providers\Anthropic; -use Prism\Prism\Prism; +use Prism\Prism\Facades\Prism; +use Prism\Prism\Facades\Tool; use Prism\Prism\Providers\Anthropic\Handlers\Text; use Prism\Prism\Providers\Anthropic\Maps\MessageMap; use Prism\Prism\ValueObjects\Messages\AssistantMessage; @@ -12,6 +13,7 @@ use Prism\Prism\ValueObjects\Messages\UserMessage; use Prism\Prism\ValueObjects\ToolCall; use Prism\Prism\ValueObjects\ToolResult; +use Tests\Fixtures\FixtureResponse; beforeEach(function (): void { config()->set('prism.providers.anthropic.api_key', env('ANTHROPIC_API_KEY', 'sk-1234')); @@ -61,7 +63,7 @@ ); // Verify that only the last tool result message has cache_control - $toolResultMessages = array_filter($mappedMessages, fn ($message): bool => $message['role'] === 'user' && + $toolResultMessages = array_filter($mappedMessages, fn (array $message): bool => $message['role'] === 'user' && isset($message['content'][0]['type']) && $message['content'][0]['type'] === 'tool_result'); @@ -245,3 +247,34 @@ } } }); + +it('can use tool_result_cache_type with multi-step tool calls via real API', function (): void { + FixtureResponse::fakeResponseSequence('v1/messages', 'anthropic/tool-result-caching-multi-step'); + + $tools = [ + Tool::as('weather') + ->for('useful when you need to search for current weather conditions') + ->withStringParameter('city', 'the city you want the weather for') + ->using(fn (string $city): string => 'The weather will be 75° and sunny'), + Tool::as('search') + ->for('useful for searching current events or data') + ->withStringParameter('query', 'The detailed search query') + ->using(fn (string $query): string => 'The tigers game is at 3pm in detroit'), + ]; + + $response = Prism::text() + ->using('anthropic', 'claude-sonnet-4-20250514') + ->withTools($tools) + ->withMaxSteps(3) + ->withPrompt('Search for what time the Detroit Tigers baseball game is today, then check the weather in Detroit to tell me if I should wear a coat.') + ->withProviderOptions(['tool_result_cache_type' => 'ephemeral']) + ->asText(); + + expect($response->steps->count())->toBeGreaterThanOrEqual(1); + + $totalToolCalls = $response->steps->sum(fn ($step): int => count($step->toolCalls ?? [])); + expect($totalToolCalls)->toBeGreaterThanOrEqual(1); + + expect($response->text)->toContain('3'); + expect($response->text)->toContain('75'); +});