From 383bd9d1df7d71b0d8756ac4a2bb1fd0f935805e Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 18 Feb 2026 21:15:51 +0000 Subject: [PATCH 1/4] fix: enforce strict no-args schema on attemptCompletion and unify completion path - Change attemptCompletion inputSchema to z.object({}).strict() producing additionalProperties: false, preventing LLMs from passing args - Remove extractTextFromLastMessage shortcut in CallingMcpTools; always transition to GeneratingRunResult for final text generation - Revert "OK" fallback text in GeneratingRunResult - Update E2E expert instructions to simple "Call attemptCompletion" Co-Authored-By: Claude Opus 4.6 --- .../strict-attempt-completion-schema.md | 14 ++++ apps/base/src/tools/attempt-completion.ts | 3 +- e2e/experts/bundled-base.toml | 2 +- e2e/experts/cli-commands.toml | 4 +- e2e/experts/continue-resume.toml | 4 +- e2e/experts/delegate-chain.toml | 6 +- e2e/experts/error-handling.toml | 2 +- e2e/experts/errors.toml | 4 +- e2e/experts/global-runtime.toml | 2 +- e2e/experts/lockfile.toml | 2 +- e2e/experts/mixed-tools.toml | 2 +- e2e/experts/multi-modal.toml | 4 +- e2e/experts/parallel-delegate.toml | 6 +- e2e/experts/reasoning-budget.toml | 24 +++--- e2e/experts/runtime-version-future.toml | 4 +- e2e/experts/runtime-version.toml | 10 +-- e2e/experts/skills.toml | 10 +-- e2e/experts/special-tools.toml | 2 +- e2e/experts/versioned-base.toml | 2 +- e2e/perstack-cli/delegate.test.ts | 2 +- .../states/calling-mcp-tools.test.ts | 82 +------------------ .../state-machine/states/calling-mcp-tools.ts | 79 +----------------- .../states/generating-run-result.test.ts | 13 +-- .../states/generating-run-result.ts | 10 +-- 24 files changed, 73 insertions(+), 220 deletions(-) create mode 100644 .changeset/strict-attempt-completion-schema.md diff --git a/.changeset/strict-attempt-completion-schema.md b/.changeset/strict-attempt-completion-schema.md new file mode 100644 index 00000000..b57f2a09 --- /dev/null +++ b/.changeset/strict-attempt-completion-schema.md @@ -0,0 +1,14 @@ +--- +"@perstack/base": patch +"@perstack/runtime": patch +--- + +fix: enforce no-args schema on attemptCompletion and unify completion path + +- Change attemptCompletion inputSchema to `z.object({}).strict()` which produces + `additionalProperties: false` in the JSON Schema sent to LLMs, preventing them + from passing arguments (e.g. `{result: "OK"}`) to the tool +- Remove the `extractTextFromLastMessage` shortcut path in CallingMcpTools so that + attemptCompletion always transitions to GeneratingRunResult for final text generation +- Revert the "OK" fallback text in GeneratingRunResult (runtime should not inject text) +- Update E2E expert instructions to use simple "Call attemptCompletion" without result args diff --git a/apps/base/src/tools/attempt-completion.ts b/apps/base/src/tools/attempt-completion.ts index a3b5f5f4..2f61067e 100644 --- a/apps/base/src/tools/attempt-completion.ts +++ b/apps/base/src/tools/attempt-completion.ts @@ -1,4 +1,5 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { z } from "zod" import { errorToolResult, successToolResult } from "../lib/tool-result.js" import { getRemainingTodos } from "./todo.js" @@ -20,7 +21,7 @@ export function registerAttemptCompletion(server: McpServer) { { title: "Attempt completion", description: "Signal task completion. Validates all todos are complete before ending.", - inputSchema: {}, + inputSchema: z.object({}).strict(), }, async () => { try { diff --git a/e2e/experts/bundled-base.toml b/e2e/experts/bundled-base.toml index e783d0be..b5d324a9 100644 --- a/e2e/experts/bundled-base.toml +++ b/e2e/experts/bundled-base.toml @@ -13,7 +13,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for bundled base with InMemoryTransport" instruction = """ -Call readTextFile on "perstack.toml" and then call attemptCompletion with result "OK" +Call readTextFile on "perstack.toml", then call attemptCompletion. """ [experts."e2e-bundled-base".skills."@perstack/base"] diff --git a/e2e/experts/cli-commands.toml b/e2e/experts/cli-commands.toml index 085986fa..e79b206f 100644 --- a/e2e/experts/cli-commands.toml +++ b/e2e/experts/cli-commands.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for CLI publish command validation" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-publish-test".skills."@perstack/base"] @@ -22,7 +22,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "E2E test expert for CLI log command validation" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-cli-simple".skills."@perstack/base"] diff --git a/e2e/experts/continue-resume.toml b/e2e/experts/continue-resume.toml index 18e8fc46..99f40532 100644 --- a/e2e/experts/continue-resume.toml +++ b/e2e/experts/continue-resume.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for continue functionality" instruction = """ 1. Call askUser with question "confirm?" -2. After user responds, call attemptCompletion with result "OK" +2. After user responds, call attemptCompletion """ [experts."e2e-continue".skills."user-input"] @@ -34,7 +34,7 @@ description = "E2E test expert for resume-from functionality" instruction = """ 1. Call think with thought "processing" 2. Call askUser with question "confirm?" -3. After user responds, call attemptCompletion with result "OK" +3. After user responds, call attemptCompletion """ [experts."e2e-resume".skills."user-input"] diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml index 5eedff64..466b0497 100644 --- a/e2e/experts/delegate-chain.toml +++ b/e2e/experts/delegate-chain.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for delegate chain" instruction = """ 1. Delegate to "e2e-delegate-level1" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-delegate-level1"] @@ -25,7 +25,7 @@ version = "1.0.0" description = "First level delegate expert" instruction = """ 1. Delegate to "e2e-delegate-level2" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-delegate-level2"] @@ -39,7 +39,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "Second level delegate expert" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-delegate-level2".skills."@perstack/base"] diff --git a/e2e/experts/error-handling.toml b/e2e/experts/error-handling.toml index e52f496e..cd559be2 100644 --- a/e2e/experts/error-handling.toml +++ b/e2e/experts/error-handling.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for tool error recovery" instruction = """ 1. Use readTextFile with the exact path given by user -2. Call attemptCompletion reporting: success/error + content or error message +2. Call attemptCompletion """ [experts."e2e-tool-error-recovery".skills."@perstack/base"] diff --git a/e2e/experts/errors.toml b/e2e/experts/errors.toml index 771a20b5..b94f9461 100644 --- a/e2e/experts/errors.toml +++ b/e2e/experts/errors.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert with broken MCP skill" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-mcp-error".skills."broken-skill"] @@ -27,7 +27,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "E2E test expert for invalid provider testing" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-invalid-provider".skills."@perstack/base"] diff --git a/e2e/experts/global-runtime.toml b/e2e/experts/global-runtime.toml index a367b869..81c63cef 100644 --- a/e2e/experts/global-runtime.toml +++ b/e2e/experts/global-runtime.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for global runtime configuration" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-global-runtime".skills."@perstack/base"] diff --git a/e2e/experts/lockfile.toml b/e2e/experts/lockfile.toml index 67018a51..1b367bee 100644 --- a/e2e/experts/lockfile.toml +++ b/e2e/experts/lockfile.toml @@ -12,7 +12,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for lockfile functionality" instruction = """ -Call readTextFile on "perstack.toml" and then call attemptCompletion with result "lockfile-test-ok" +Call readTextFile on "perstack.toml", then call attemptCompletion. """ [experts."e2e-lockfile".skills."@perstack/base"] diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml index 066b2939..c304e74f 100644 --- a/e2e/experts/mixed-tools.toml +++ b/e2e/experts/mixed-tools.toml @@ -42,7 +42,7 @@ pick = ["attemptCompletion", "think"] version = "1.0.0" description = "E2E test helper expert" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-helper".skills."@perstack/base"] diff --git a/e2e/experts/multi-modal.toml b/e2e/experts/multi-modal.toml index 8595784d..442e1cdb 100644 --- a/e2e/experts/multi-modal.toml +++ b/e2e/experts/multi-modal.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for PDF file reading" instruction = """ 1. Use readPdfFile to read the PDF at the specified path -2. Call attemptCompletion with a brief summary of the content +2. Call attemptCompletion """ [experts."e2e-pdf-reader".skills."@perstack/base"] @@ -24,7 +24,7 @@ version = "1.0.0" description = "E2E test expert for image file reading" instruction = """ 1. Use readImageFile to read the image at the specified path -2. Call attemptCompletion with a brief description of the image +2. Call attemptCompletion """ [experts."e2e-image-reader".skills."@perstack/base"] diff --git a/e2e/experts/parallel-delegate.toml b/e2e/experts/parallel-delegate.toml index bb08ace9..584064dd 100644 --- a/e2e/experts/parallel-delegate.toml +++ b/e2e/experts/parallel-delegate.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for parallel delegation" instruction = """ 1. In ONE response, delegate to BOTH "e2e-delegate-math" and "e2e-delegate-text" with "test" -2. When both return, call attemptCompletion with result "OK" +2. When both return, call attemptCompletion """ delegates = ["e2e-delegate-math", "e2e-delegate-text"] @@ -24,7 +24,7 @@ pick = ["attemptCompletion", "think"] version = "1.0.0" description = "Math delegate expert" instruction = """ -Call attemptCompletion with result "Math result: 5" +Call attemptCompletion. """ [experts."e2e-delegate-math".skills."@perstack/base"] @@ -37,7 +37,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "Text processing delegate expert" instruction = """ -Call attemptCompletion with result "Text result: olleh" +Call attemptCompletion. """ [experts."e2e-delegate-text".skills."@perstack/base"] diff --git a/e2e/experts/reasoning-budget.toml b/e2e/experts/reasoning-budget.toml index 67533c11..583d9adc 100644 --- a/e2e/experts/reasoning-budget.toml +++ b/e2e/experts/reasoning-budget.toml @@ -12,7 +12,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with minimal budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-minimal".skills."@perstack/base"] @@ -26,7 +26,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with low budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-low".skills."@perstack/base"] @@ -40,7 +40,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with medium budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-medium".skills."@perstack/base"] @@ -54,7 +54,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with high budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-high".skills."@perstack/base"] @@ -72,7 +72,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with minimal budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-minimal".skills."@perstack/base"] @@ -86,7 +86,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with low budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-low".skills."@perstack/base"] @@ -100,7 +100,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with medium budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-medium".skills."@perstack/base"] @@ -114,7 +114,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with high budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-high".skills."@perstack/base"] @@ -132,7 +132,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with minimal budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-minimal".skills."@perstack/base"] @@ -146,7 +146,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with low budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-low".skills."@perstack/base"] @@ -160,7 +160,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with medium budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-medium".skills."@perstack/base"] @@ -174,7 +174,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with high budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-high".skills."@perstack/base"] diff --git a/e2e/experts/runtime-version-future.toml b/e2e/experts/runtime-version-future.toml index b5e79f0d..d782af64 100644 --- a/e2e/experts/runtime-version-future.toml +++ b/e2e/experts/runtime-version-future.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] [experts."e2e-runtime-future"] version = "1.0.0" minRuntimeVersion = "v99.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-future".skills."@perstack/base"] type = "mcpStdioSkill" @@ -23,7 +23,7 @@ version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ 1. Delegate to "e2e-runtime-future" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-runtime-future"] diff --git a/e2e/experts/runtime-version.toml b/e2e/experts/runtime-version.toml index 55796b4e..6fc86fe6 100644 --- a/e2e/experts/runtime-version.toml +++ b/e2e/experts/runtime-version.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] [experts."e2e-runtime-v1"] version = "1.0.0" minRuntimeVersion = "v1.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-v1".skills."@perstack/base"] type = "mcpStdioSkill" @@ -20,7 +20,7 @@ pick = ["attemptCompletion"] # Expert without minRuntimeVersion (default) [experts."e2e-runtime-default"] version = "1.0.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-default".skills."@perstack/base"] type = "mcpStdioSkill" @@ -34,7 +34,7 @@ version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ 1. Delegate to "e2e-runtime-chain-ok-l1" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-runtime-chain-ok-l1"] @@ -49,7 +49,7 @@ version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ 1. Delegate to "e2e-runtime-chain-ok-l2" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-runtime-chain-ok-l2"] @@ -62,7 +62,7 @@ pick = ["attemptCompletion"] [experts."e2e-runtime-chain-ok-l2"] version = "1.0.0" minRuntimeVersion = "v1.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-chain-ok-l2".skills."@perstack/base"] type = "mcpStdioSkill" diff --git a/e2e/experts/skills.toml b/e2e/experts/skills.toml index bccc59d3..3afab724 100644 --- a/e2e/experts/skills.toml +++ b/e2e/experts/skills.toml @@ -12,8 +12,8 @@ instruction = """ Follow these steps exactly: 1. Use the todo tool to add the user's request as a task 2. Mark the task as completed using the todo tool -3. Call attemptCompletion -If asked to read a file, report that readTextFile is not available via attemptCompletion. +3. Summarize what you did, then call attemptCompletion +If asked to read a file, report that readTextFile is not available, then call attemptCompletion. """ [experts."e2e-pick-tools".skills."@perstack/base"] @@ -26,7 +26,7 @@ pick = ["attemptCompletion", "todo"] version = "1.0.0" description = "E2E test expert with omitted tools" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-omit-tools".skills."@perstack/base"] @@ -40,7 +40,7 @@ version = "1.0.0" description = "E2E test expert with multiple skills" instruction = """ 1. Use todo to track tasks -2. Call attemptCompletion with result "OK" +2. Call attemptCompletion. """ [experts."e2e-multi-skill".skills."exa"] @@ -78,7 +78,7 @@ pick = ["attemptCompletion", "addSkill", "removeSkill"] [experts."e2e-delegate-target"] version = "1.0.0" description = "Simple target expert for delegate testing" -instruction = "Call attemptCompletion with result 'delegated OK'." +instruction = "Call attemptCompletion." [experts."e2e-delegate-target".skills."@perstack/base"] type = "mcpStdioSkill" diff --git a/e2e/experts/special-tools.toml b/e2e/experts/special-tools.toml index 64b00aad..0d550566 100644 --- a/e2e/experts/special-tools.toml +++ b/e2e/experts/special-tools.toml @@ -14,7 +14,7 @@ IMPORTANT: Call ALL 3 tools below IN PARALLEL (single tool call batch): - readImageFile: path="e2e/fixtures/test.gif" - web_search_exa: query="test" -After all 3 complete, call attemptCompletion with "OK". +After all 3 complete, call attemptCompletion. """ [experts."e2e-special-tools".skills."exa"] diff --git a/e2e/experts/versioned-base.toml b/e2e/experts/versioned-base.toml index 23f4c90a..9ff4b01f 100644 --- a/e2e/experts/versioned-base.toml +++ b/e2e/experts/versioned-base.toml @@ -13,7 +13,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for versioned base with StdioTransport" instruction = """ -Call readTextFile on "perstack.toml" and then call attemptCompletion with result "OK" +Call readTextFile on "perstack.toml", then call attemptCompletion. """ [experts."e2e-versioned-base".skills."@perstack/base"] diff --git a/e2e/perstack-cli/delegate.test.ts b/e2e/perstack-cli/delegate.test.ts index 70218587..12155805 100644 --- a/e2e/perstack-cli/delegate.test.ts +++ b/e2e/perstack-cli/delegate.test.ts @@ -24,7 +24,7 @@ describe("Delegate to Expert", () => { * TOML: delegate-chain.toml defines 3 experts forming a delegation chain * Expected: * - Chain starts at root, delegates to level1, then level2 - * - Each expert calls attemptCompletion with "OK" + * - Each expert calls attemptCompletion * - Control flow: chain→level1→level2→(complete)→level1→(complete)→chain→(complete) * - Total 3 completeRun events (one per expert) */ diff --git a/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts b/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts index 0a041daf..0c3695a8 100644 --- a/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts +++ b/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts @@ -352,86 +352,11 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { expect(event.type).toBe("attemptCompletion") }) - it("routes attemptCompletion to completeRun when textPart exists in last message", async () => { + it("always routes attemptCompletion to GeneratingRunResult (no shortcut)", async () => { const setting = createRunSetting() - // Create checkpoint with an expertMessage containing textPart + // Even with text in the last expert message, should go to GeneratingRunResult const checkpoint = createCheckpoint({ messages: [ - { - id: createId(), - type: "instructionMessage" as const, - contents: [ - { id: createId(), type: "textPart" as const, text: "You are a helpful assistant." }, - ], - cache: true, - }, - { - id: createId(), - type: "expertMessage" as const, - contents: [ - { - id: createId(), - type: "thinkingPart" as const, - thinking: "Reasoning about the task...", - signature: "sig", - }, - { - id: createId(), - type: "textPart" as const, - text: "Hello! I am your assistant. How can I help you today?", - }, - { - id: createId(), - type: "toolCallPart" as const, - toolCallId: "tc_123", - toolName: "attemptCompletion", - args: {}, - }, - ], - }, - ], - }) - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - }) - const emptyResult = [{ type: "textPart", text: JSON.stringify({}), id: createId() }] - const skillManager = createMockSkillManager({ - "@perstack/base": createMockMcpAdapter("@perstack/base", "attemptCompletion", emptyResult), - }) - const event = await callingMcpToolsLogic({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager, - llmExecutor: mockLLMExecutor, - }) - expect(event.type).toBe("completeRun") - expect((event as { text: string }).text).toBe( - "Hello! I am your assistant. How can I help you today?", - ) - }) - - it("routes attemptCompletion to attemptCompletion when textPart is empty", async () => { - const setting = createRunSetting() - // Create checkpoint with an expertMessage containing empty textPart - const checkpoint = createCheckpoint({ - messages: [ - { - id: createId(), - type: "instructionMessage" as const, - contents: [ - { id: createId(), type: "textPart" as const, text: "You are a helpful assistant." }, - ], - cache: true, - }, { id: createId(), type: "expertMessage" as const, @@ -439,7 +364,7 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { { id: createId(), type: "textPart" as const, - text: " ", // Whitespace only + text: "Some existing text alongside the tool call", }, { id: createId(), @@ -474,7 +399,6 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { skillManager, llmExecutor: mockLLMExecutor, }) - // Should transition to GeneratingRunResult since textPart is empty expect(event.type).toBe("attemptCompletion") }) diff --git a/packages/runtime/src/state-machine/states/calling-mcp-tools.ts b/packages/runtime/src/state-machine/states/calling-mcp-tools.ts index ca502bd6..9bdc4263 100644 --- a/packages/runtime/src/state-machine/states/calling-mcp-tools.ts +++ b/packages/runtime/src/state-machine/states/calling-mcp-tools.ts @@ -1,14 +1,10 @@ import { attemptCompletion, - completeRun, finishMcpTools, type RunEvent, resolveToolResults, type ToolResult, } from "@perstack/core" -import { calculateContextWindowUsage } from "../../helpers/model.js" -import { createEmptyUsage, sumUsage } from "../../helpers/usage.js" -import { createExpertMessage, createToolMessage } from "../../messages/message.js" import { classifyToolCalls, toolExecutorFactory } from "../../tool-execution/index.js" import type { RunSnapshot } from "../machine.js" @@ -25,38 +21,16 @@ function hasRemainingTodos(toolResult: ToolResult): boolean { } } -/** - * Extract textPart from the last expert message. - * When LLM generates both text and attemptCompletion in one response, - * we should use that text as the final result instead of re-generating. - */ -function extractTextFromLastMessage( - checkpoint: RunSnapshot["context"]["checkpoint"], -): string | undefined { - const lastMessage = checkpoint.messages[checkpoint.messages.length - 1] - if (!lastMessage || lastMessage.type !== "expertMessage") { - return undefined - } - const textPart = lastMessage.contents.find((c) => c.type === "textPart") - if (!textPart || textPart.type !== "textPart") { - return undefined - } - // Only return if there's actual content (not just whitespace) - const text = textPart.text.trim() - return text.length > 0 ? text : undefined -} - /** * CallingMcpTools state: Execute MCP tool calls in parallel. * * Responsibilities: * - Execute MCP tools in parallel - * - Handle attemptCompletion specially (→ GeneratingRunResult or completeRun) + * - Handle attemptCompletion specially (→ GeneratingRunResult) * - Classify remaining tool calls and route to appropriate state * * Transitions: - * - attemptCompletion → GeneratingRunResult (needs LLM to generate final result) - * - completeRun → Stopped (attemptCompletion with existing text) + * - attemptCompletion → GeneratingRunResult (LLM generates final result text) * - finishMcpTools → CallingDelegates (has delegate/interactive tools remaining) * - resolveToolResults → ResolvingToolResult (MCP only, no other tools) */ @@ -87,54 +61,7 @@ export async function callingMcpToolsLogic({ return resolveToolResults(setting, checkpoint, { toolResults: [toolResult] }) } - // Check if LLM already generated a text response along with attemptCompletion - // If so, use that text directly instead of transitioning to GeneratingRunResult - const existingText = extractTextFromLastMessage(checkpoint) - if (existingText) { - // Build tool message for the attemptCompletion result - const toolResultPart = { - type: "toolResultPart" as const, - toolCallId: toolResult.id, - toolName: attemptCompletionTool.toolName, - contents: toolResult.result.filter( - (part) => - part.type === "textPart" || - part.type === "imageInlinePart" || - part.type === "fileInlinePart", - ), - } - const toolMessage = createToolMessage([toolResultPart]) - // Create expertMessage with the existing text for delegation result handling - const expertMessage = createExpertMessage([{ type: "textPart", text: existingText }]) - const newMessages = [toolMessage, expertMessage] - const newUsage = sumUsage(checkpoint.usage, createEmptyUsage()) - - // Complete run directly with the existing text - return completeRun(setting, checkpoint, { - checkpoint: { - ...checkpoint, - messages: [...checkpoint.messages, ...newMessages], - usage: newUsage, - contextWindowUsage: checkpoint.contextWindow - ? calculateContextWindowUsage(newUsage, checkpoint.contextWindow) - : undefined, - status: "completed", - // Clear tool handling state on completion - pendingToolCalls: undefined, - partialToolResults: undefined, - }, - step: { - ...step, - newMessages: [...step.newMessages, ...newMessages], - toolResults: [toolResult], - finishedAt: Date.now(), - }, - text: existingText, - usage: createEmptyUsage(), - }) - } - - // No existing text - transition to GeneratingRunResult to generate final result + // Transition to GeneratingRunResult to generate final result text return attemptCompletion(setting, checkpoint, { toolResult }) } diff --git a/packages/runtime/src/state-machine/states/generating-run-result.test.ts b/packages/runtime/src/state-machine/states/generating-run-result.test.ts index 79d43ef0..eda5493f 100644 --- a/packages/runtime/src/state-machine/states/generating-run-result.test.ts +++ b/packages/runtime/src/state-machine/states/generating-run-result.test.ts @@ -302,7 +302,7 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { } }) - it("falls back to 'OK' when LLM generates empty text", async () => { + it("completes with undefined text when LLM generates empty text", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ @@ -334,16 +334,7 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { }) expect(event.type).toBe("completeRun") if (event.type === "completeRun") { - expect(event.text).toBe("OK") - const lastMessage = event.checkpoint.messages[event.checkpoint.messages.length - 1] - expect(lastMessage.type).toBe("expertMessage") - if (lastMessage.type === "expertMessage") { - const textPart = lastMessage.contents.find((c) => c.type === "textPart") - expect(textPart).toBeDefined() - if (textPart?.type === "textPart") { - expect(textPart.text).toBe("OK") - } - } + expect(event.text).toBeUndefined() } }) }) diff --git a/packages/runtime/src/state-machine/states/generating-run-result.ts b/packages/runtime/src/state-machine/states/generating-run-result.ts index e390f9dc..4c2dec83 100644 --- a/packages/runtime/src/state-machine/states/generating-run-result.ts +++ b/packages/runtime/src/state-machine/states/generating-run-result.ts @@ -148,15 +148,11 @@ export async function generatingRunResultLogic({ const thinkingParts = extractThinkingParts(reasoning as ReasoningPart[] | undefined) const thinkingText = extractThinkingText(reasoning as ReasoningPart[] | undefined) - // Fallback when LLM generates no visible text (e.g., only reasoning/thinking with extended thinking). - // Since GeneratingRunResult is only reached after a successful attemptCompletion (no remaining todos), - // the task is already complete — use "OK" as a minimal non-empty completion text. - const resultText = text || "OK" - // Build ExpertMessage with ThinkingPart + TextPart + // Always include textPart even if empty - required for delegation result handling const expertContents: Array | Omit> = [ ...thinkingParts, - { type: "textPart" as const, text: resultText }, + { type: "textPart" as const, text: text ?? "" }, ] const newMessages = [toolMessage, createExpertMessage(expertContents)] @@ -190,7 +186,7 @@ export async function generatingRunResultLogic({ finishedAt: Date.now(), usage: sumUsage(step.usage, usage), }, - text: resultText, + text, usage, }) } From e82a70e85df06968512e53473a43eaf1b34202cf Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 18 Feb 2026 21:24:34 +0000 Subject: [PATCH 2/4] fix: update multi-modal E2E instructions to generate text before completion The terse "Call attemptCompletion" instruction caused LLMs to complete without generating visible summary/description text, resulting in empty completeRun text. Updated to explicitly request content generation. Co-Authored-By: Claude Opus 4.6 --- e2e/experts/multi-modal.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/e2e/experts/multi-modal.toml b/e2e/experts/multi-modal.toml index 442e1cdb..21d34c9d 100644 --- a/e2e/experts/multi-modal.toml +++ b/e2e/experts/multi-modal.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for PDF file reading" instruction = """ 1. Use readPdfFile to read the PDF at the specified path -2. Call attemptCompletion +2. Summarize the content briefly, then call attemptCompletion """ [experts."e2e-pdf-reader".skills."@perstack/base"] @@ -24,7 +24,7 @@ version = "1.0.0" description = "E2E test expert for image file reading" instruction = """ 1. Use readImageFile to read the image at the specified path -2. Call attemptCompletion +2. Describe the image briefly, then call attemptCompletion """ [experts."e2e-image-reader".skills."@perstack/base"] From fbb0759353ed236f13590c8813dd4fe0a0faf09c Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 18 Feb 2026 21:39:43 +0000 Subject: [PATCH 3/4] fix: extract prior expert text when GeneratingRunResult LLM returns empty When the LLM generates result text alongside the attemptCompletion tool call, the follow-up LLM call in GeneratingRunResult (with tools:{}) often returns empty text since the LLM has nothing more to add. Fall back to extracting text from the last expert message in the checkpoint. Co-Authored-By: Claude Opus 4.6 --- .../strict-attempt-completion-schema.md | 3 + .../states/generating-run-result.test.ts | 67 ++++++++++++++++++- .../states/generating-run-result.ts | 19 +++++- 3 files changed, 87 insertions(+), 2 deletions(-) diff --git a/.changeset/strict-attempt-completion-schema.md b/.changeset/strict-attempt-completion-schema.md index b57f2a09..d22f6502 100644 --- a/.changeset/strict-attempt-completion-schema.md +++ b/.changeset/strict-attempt-completion-schema.md @@ -11,4 +11,7 @@ fix: enforce no-args schema on attemptCompletion and unify completion path - Remove the `extractTextFromLastMessage` shortcut path in CallingMcpTools so that attemptCompletion always transitions to GeneratingRunResult for final text generation - Revert the "OK" fallback text in GeneratingRunResult (runtime should not inject text) +- Add fallback in GeneratingRunResult to extract text from the last expert message when + the follow-up LLM call produces empty text (handles the common case where the LLM + already generated result text alongside the attemptCompletion tool call) - Update E2E expert instructions to use simple "Call attemptCompletion" without result args diff --git a/packages/runtime/src/state-machine/states/generating-run-result.test.ts b/packages/runtime/src/state-machine/states/generating-run-result.test.ts index eda5493f..abf31c05 100644 --- a/packages/runtime/src/state-machine/states/generating-run-result.test.ts +++ b/packages/runtime/src/state-machine/states/generating-run-result.test.ts @@ -302,7 +302,7 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { } }) - it("completes with undefined text when LLM generates empty text", async () => { + it("completes with undefined text when LLM generates empty text and no prior expert text", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ @@ -337,4 +337,69 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { expect(event.text).toBeUndefined() } }) + + it("extracts text from last expert message when LLM generates empty text", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint({ + messages: [ + { + id: createId(), + type: "instructionMessage" as const, + contents: [ + { id: createId(), type: "textPart" as const, text: "You are a helpful assistant." }, + ], + cache: true, + }, + { + id: createId(), + type: "expertMessage" as const, + contents: [ + { + id: createId(), + type: "textPart" as const, + text: "Here is a summary of the PDF content.", + }, + { + id: createId(), + type: "toolCallPart" as const, + toolCallId: "tc_123", + toolName: "attemptCompletion", + args: {}, + }, + ], + }, + ], + }) + const step = createStep({ + toolCalls: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "attemptCompletion", + args: {}, + }, + ], + toolResults: [ + { + id: "tc_123", + skillName: "@perstack/base", + toolName: "attemptCompletion", + result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], + }, + ], + }) + mockLLMExecutor.setMockResult(createMockResult(undefined)) + const event = await StateMachineLogics.GeneratingRunResult({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManager: createMockSkillManagerFromAdapters({}), + llmExecutor: mockLLMExecutor as unknown as LLMExecutor, + }) + expect(event.type).toBe("completeRun") + if (event.type === "completeRun") { + expect(event.text).toBe("Here is a summary of the PDF content.") + } + }) }) diff --git a/packages/runtime/src/state-machine/states/generating-run-result.ts b/packages/runtime/src/state-machine/states/generating-run-result.ts index 4c2dec83..202507bc 100644 --- a/packages/runtime/src/state-machine/states/generating-run-result.ts +++ b/packages/runtime/src/state-machine/states/generating-run-result.ts @@ -142,7 +142,24 @@ export async function generatingRunResultLogic({ const generationResult = executionResult.result const usage = usageFromGenerateTextResult(generationResult) - const { text, reasoning } = generationResult + const { reasoning } = generationResult + + // If the final LLM call produced no text, extract from the last expert message. + // This handles the common case where the LLM generated result text alongside the + // attemptCompletion tool call — that text is in the checkpoint but the follow-up + // LLM call (with tools:{}) has nothing more to add. + let text = generationResult.text + if (!text) { + const lastExpertMessage = [...checkpoint.messages] + .reverse() + .find((m) => m.type === "expertMessage") + if (lastExpertMessage && lastExpertMessage.type === "expertMessage") { + const textPart = lastExpertMessage.contents.find((c) => c.type === "textPart") + if (textPart && textPart.type === "textPart" && textPart.text) { + text = textPart.text + } + } + } // Extract thinking from reasoning (Anthropic, Google) const thinkingParts = extractThinkingParts(reasoning as ReasoningPart[] | undefined) From a5f6edaaef2f27a4222ddc4d17adfaea8da9f2b0 Mon Sep 17 00:00:00 2001 From: HiranoMasaaki Date: Wed, 18 Feb 2026 22:50:06 +0000 Subject: [PATCH 4/4] fix: add result param to attemptCompletion and remove GeneratingRunResult Add optional `result` string parameter to attemptCompletion so the LLM provides its final response directly in the tool call, eliminating the extra LLM round-trip in GeneratingRunResult that was unreliable (often returning empty text) and wasteful. - Add `result?: string` to attemptCompletion inputSchema - Complete runs directly from CallingMcpTools (no GeneratingRunResult) - Remove GeneratingRunResult state, attemptCompletion event type - Remove dead attemptCompletion event handling in React utils - Update system prompt and E2E instructions Co-Authored-By: Claude Opus 4.6 --- .../strict-attempt-completion-schema.md | 21 +- apps/base/src/tools/attempt-completion.ts | 5 +- e2e/experts/multi-modal.toml | 4 +- packages/core/src/schemas/runtime.ts | 5 - .../react/src/utils/event-to-activity.test.ts | 31 -- packages/react/src/utils/event-to-activity.ts | 22 - .../src/messages/instruction-message.ts | 4 +- packages/runtime/src/state-machine/machine.ts | 62 --- .../states/calling-mcp-tools.test.ts | 71 +-- .../state-machine/states/calling-mcp-tools.ts | 61 ++- .../states/generating-run-result.test.ts | 405 ------------------ .../states/generating-run-result.ts | 209 --------- 12 files changed, 120 insertions(+), 780 deletions(-) delete mode 100644 packages/runtime/src/state-machine/states/generating-run-result.test.ts delete mode 100644 packages/runtime/src/state-machine/states/generating-run-result.ts diff --git a/.changeset/strict-attempt-completion-schema.md b/.changeset/strict-attempt-completion-schema.md index d22f6502..f91df974 100644 --- a/.changeset/strict-attempt-completion-schema.md +++ b/.changeset/strict-attempt-completion-schema.md @@ -1,17 +1,16 @@ --- "@perstack/base": patch "@perstack/runtime": patch +"@perstack/core": patch +"@perstack/react": patch --- -fix: enforce no-args schema on attemptCompletion and unify completion path +fix: add result param to attemptCompletion and remove GeneratingRunResult -- Change attemptCompletion inputSchema to `z.object({}).strict()` which produces - `additionalProperties: false` in the JSON Schema sent to LLMs, preventing them - from passing arguments (e.g. `{result: "OK"}`) to the tool -- Remove the `extractTextFromLastMessage` shortcut path in CallingMcpTools so that - attemptCompletion always transitions to GeneratingRunResult for final text generation -- Revert the "OK" fallback text in GeneratingRunResult (runtime should not inject text) -- Add fallback in GeneratingRunResult to extract text from the last expert message when - the follow-up LLM call produces empty text (handles the common case where the LLM - already generated result text alongside the attemptCompletion tool call) -- Update E2E expert instructions to use simple "Call attemptCompletion" without result args +- Add optional `result` string parameter to attemptCompletion tool schema so the LLM + can provide its final response text directly in the tool call +- Complete runs directly from CallingMcpTools when attemptCompletion succeeds, eliminating + the extra LLM round-trip in GeneratingRunResult +- Remove GeneratingRunResult state, its transitions, and the attemptCompletion event type +- Remove dead attemptCompletion event handling code from React event-to-activity utils +- Update E2E expert instructions to pass result via attemptCompletion args diff --git a/apps/base/src/tools/attempt-completion.ts b/apps/base/src/tools/attempt-completion.ts index 2f61067e..dc693b41 100644 --- a/apps/base/src/tools/attempt-completion.ts +++ b/apps/base/src/tools/attempt-completion.ts @@ -20,8 +20,9 @@ export function registerAttemptCompletion(server: McpServer) { "attemptCompletion", { title: "Attempt completion", - description: "Signal task completion. Validates all todos are complete before ending.", - inputSchema: z.object({}).strict(), + description: + "Signal task completion. Provide a result parameter with your final response text. Validates all todos are complete before ending.", + inputSchema: z.object({ result: z.string().optional() }).strict(), }, async () => { try { diff --git a/e2e/experts/multi-modal.toml b/e2e/experts/multi-modal.toml index 21d34c9d..3f023739 100644 --- a/e2e/experts/multi-modal.toml +++ b/e2e/experts/multi-modal.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for PDF file reading" instruction = """ 1. Use readPdfFile to read the PDF at the specified path -2. Summarize the content briefly, then call attemptCompletion +2. Call attemptCompletion with a brief summary of the content as the result """ [experts."e2e-pdf-reader".skills."@perstack/base"] @@ -24,7 +24,7 @@ version = "1.0.0" description = "E2E test expert for image file reading" instruction = """ 1. Use readImageFile to read the image at the specified path -2. Describe the image briefly, then call attemptCompletion +2. Call attemptCompletion with a brief description of the image as the result """ [experts."e2e-image-reader".skills."@perstack/base"] diff --git a/packages/core/src/schemas/runtime.ts b/packages/core/src/schemas/runtime.ts index 901bd11e..58fc2dcc 100644 --- a/packages/core/src/schemas/runtime.ts +++ b/packages/core/src/schemas/runtime.ts @@ -276,9 +276,6 @@ type ExpertStatePayloads = { resolveToolResults: { toolResults: ToolResult[] } - attemptCompletion: { - toolResult: ToolResult - } finishToolCall: { newMessages: (UserMessage | ToolMessage)[] } @@ -443,7 +440,6 @@ export const callTools = createEvent("callTools") export const finishMcpTools = createEvent("finishMcpTools") export const skipDelegates = createEvent("skipDelegates") export const resolveToolResults = createEvent("resolveToolResults") -export const attemptCompletion = createEvent("attemptCompletion") export const finishToolCall = createEvent("finishToolCall") export const resumeToolCalls = createEvent("resumeToolCalls") export const completeRun = createEvent("completeRun") @@ -552,7 +548,6 @@ const EXPERT_STATE_EVENT_TYPES = new Set([ "finishMcpTools", "skipDelegates", "resolveToolResults", - "attemptCompletion", "finishToolCall", "resumeToolCalls", "continueToNextStep", diff --git a/packages/react/src/utils/event-to-activity.test.ts b/packages/react/src/utils/event-to-activity.test.ts index 1b1a312e..07ee6ed6 100644 --- a/packages/react/src/utils/event-to-activity.test.ts +++ b/packages/react/src/utils/event-to-activity.test.ts @@ -398,37 +398,6 @@ describe("processRunEventToActivity", () => { } }) - it("processes attemptCompletion event with single toolResult", () => { - const state = createInitialActivityProcessState() - const activities: Activity[] = [] - - // First add tool call - const toolCall = createToolCall({ id: "attempt-1", toolName: "attemptCompletion" }) - const callEvent = createBaseEvent({ - type: "callTools", - toolCalls: [toolCall], - newMessage: {} as RunEvent["newMessage"], - usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 }, - } as Partial) as RunEvent - processRunEventToActivity(state, callEvent, (a) => activities.push(a)) - - // Then resolve with attemptCompletion event (single toolResult) - const toolResult = createToolResult({ - id: "attempt-1", - toolName: "attemptCompletion", - result: [{ type: "textPart", id: "tp-1", text: "{}" }], - }) - const resultEvent = createBaseEvent({ - id: "e-2", - type: "attemptCompletion", - toolResult, - } as Partial) as RunEvent - processRunEventToActivity(state, resultEvent, (a) => activities.push(a)) - - expect(activities).toHaveLength(1) - expect(activities[0].type).toBe("attemptCompletion") - }) - it("does not log query when startRun has no user message", () => { const state = createInitialActivityProcessState() const activities: Activity[] = [] diff --git a/packages/react/src/utils/event-to-activity.ts b/packages/react/src/utils/event-to-activity.ts index 283b966e..1e009570 100644 --- a/packages/react/src/utils/event-to-activity.ts +++ b/packages/react/src/utils/event-to-activity.ts @@ -13,8 +13,6 @@ import { createGeneralToolActivity, } from "@perstack/core" -const TOOL_RESULT_EVENT_TYPES = new Set(["resolveToolResults", "attemptCompletion"]) - /** * Converts a tool call and result to an Activity. * Delegates to core's createBaseToolActivity/createGeneralToolActivity to avoid duplication. @@ -156,9 +154,6 @@ const isStopRunByInteractiveToolEvent = ( const isToolResultsEvent = (event: RunEvent): event is RunEvent & { toolResults: ToolResult[] } => event.type === "resolveToolResults" && "toolResults" in event -const isToolResultEvent = (event: RunEvent): event is RunEvent & { toolResult: ToolResult } => - TOOL_RESULT_EVENT_TYPES.has(event.type) && "toolResult" in event - /** * Wraps multiple activities in a ParallelActivitiesGroup with shared reasoning. * If only one activity, returns it directly. @@ -495,22 +490,5 @@ export function processRunEventToActivity( } runState.completedReasoning = undefined } - } else if (isToolResultEvent(event)) { - const { toolResult } = event - const tool = state.tools.get(toolResult.id) - if (tool && !tool.logged) { - const activityId = `action-${tool.id}` - const activity = toolToActivity(tool.toolCall, toolResult, runState.completedReasoning, { - id: activityId, - expertKey: event.expertKey, - runId: event.runId, - previousActivityId: runState.lastActivityId, - delegatedBy: runState.delegatedBy, - }) - addActivity(activity) - runState.lastActivityId = activityId - tool.logged = true - runState.completedReasoning = undefined - } } } diff --git a/packages/runtime/src/messages/instruction-message.ts b/packages/runtime/src/messages/instruction-message.ts index 999a9d3d..f3062207 100644 --- a/packages/runtime/src/messages/instruction-message.ts +++ b/packages/runtime/src/messages/instruction-message.ts @@ -5,8 +5,8 @@ import { dedent } from "ts-dedent" function getMetaInstruction(startedAt: number): string { return dedent` Call tools iteratively to complete the user's task. - When the task is complete, or when you cannot help, call attemptCompletion. - Call attemptCompletion ONLY as a tool call — do not include any text response with it. + When the task is complete, call attemptCompletion with a result parameter containing your final response. + When you cannot help, call attemptCompletion without a result. Environment: - Current time: ${new Date(startedAt).toISOString()} diff --git a/packages/runtime/src/state-machine/machine.ts b/packages/runtime/src/state-machine/machine.ts index 012c97a7..9aa5bbcf 100644 --- a/packages/runtime/src/state-machine/machine.ts +++ b/packages/runtime/src/state-machine/machine.ts @@ -8,7 +8,6 @@ import { callingDelegatesLogic } from "./states/calling-delegates.js" import { callingInteractiveToolsLogic } from "./states/calling-interactive-tools.js" import { callingMcpToolsLogic } from "./states/calling-mcp-tools.js" import { finishingStepLogic } from "./states/finishing-step.js" -import { generatingRunResultLogic } from "./states/generating-run-result.js" import { generatingToolCallLogic } from "./states/generating-tool-call.js" import { initLogic } from "./states/init.js" import { preparingForStepLogic } from "./states/preparing-for-step.js" @@ -237,16 +236,6 @@ export const runtimeStateMachine = setup({ }) satisfies Step, }), }, - attemptCompletion: { - target: "GeneratingRunResult", - actions: assign({ - step: ({ context, event }) => - ({ - ...context.step, - toolResults: [event.toolResult], - }) satisfies Step, - }), - }, finishMcpTools: { target: "CallingDelegates", actions: assign({ @@ -336,56 +325,6 @@ export const runtimeStateMachine = setup({ }, }, - GeneratingRunResult: { - on: { - retry: { - target: "FinishingStep", - actions: assign({ - checkpoint: ({ context, event }) => - ({ - ...context.checkpoint, - messages: [...context.checkpoint.messages, ...event.newMessages], - usage: sumUsage(context.checkpoint.usage, event.usage), - retryCount: (context.checkpoint.retryCount ?? 0) + 1, - }) satisfies Checkpoint, - step: ({ context, event }) => - ({ - ...context.step, - newMessages: event.newMessages, - toolCalls: event.toolCalls, - toolResults: event.toolResults, - usage: sumUsage(context.step.usage, event.usage), - }) satisfies Step, - }), - }, - stopRunByError: { - target: "Stopped", - actions: assign({ - checkpoint: ({ event }) => - ({ - ...event.checkpoint, - error: event.error, - }) satisfies Checkpoint, - step: ({ event }) => - ({ - ...event.step, - inputMessages: undefined, - }) satisfies Step, - }), - }, - completeRun: { - target: "Stopped", - actions: assign({ - checkpoint: ({ event }) => ({ ...event.checkpoint, retryCount: 0 }), - step: ({ event }) => ({ - ...event.step, - inputMessages: undefined, - }), - }), - }, - }, - }, - FinishingStep: { on: { continueToNextStep: { @@ -430,7 +369,6 @@ export const StateMachineLogics: Record< CallingDelegates: callingDelegatesLogic, CallingInteractiveTools: callingInteractiveToolsLogic, ResolvingToolResult: resolvingToolResultLogic, - GeneratingRunResult: generatingRunResultLogic, FinishingStep: finishingStepLogic, } diff --git a/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts b/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts index 0c3695a8..5dc864e7 100644 --- a/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts +++ b/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts @@ -324,7 +324,7 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { expect(event.type).toBe("resolveToolResults") }) - it("routes attemptCompletion to attemptCompletion handler when no remaining todos", async () => { + it("routes attemptCompletion to completeRun when no remaining todos", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ @@ -349,41 +349,53 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { skillManager, llmExecutor: mockLLMExecutor, }) - expect(event.type).toBe("attemptCompletion") + expect(event.type).toBe("completeRun") + if (event.type === "completeRun") { + expect(event.text).toBe("") + } }) - it("always routes attemptCompletion to GeneratingRunResult (no shortcut)", async () => { + it("includes result text in completeRun when args.result is provided", async () => { const setting = createRunSetting() - // Even with text in the last expert message, should go to GeneratingRunResult - const checkpoint = createCheckpoint({ - messages: [ + const checkpoint = createCheckpoint() + const step = createStep({ + toolCalls: [ { - id: createId(), - type: "expertMessage" as const, - contents: [ - { - id: createId(), - type: "textPart" as const, - text: "Some existing text alongside the tool call", - }, - { - id: createId(), - type: "toolCallPart" as const, - toolCallId: "tc_123", - toolName: "attemptCompletion", - args: {}, - }, - ], + id: "tc_123", + skillName: "@perstack/base", + toolName: "attemptCompletion", + args: { result: "Here is my final answer" }, }, ], }) + const emptyResult = [{ type: "textPart", text: JSON.stringify({}), id: createId() }] + const skillManager = createMockSkillManager({ + "@perstack/base": createMockMcpAdapter("@perstack/base", "attemptCompletion", emptyResult), + }) + const event = await callingMcpToolsLogic({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManager, + llmExecutor: mockLLMExecutor, + }) + expect(event.type).toBe("completeRun") + if (event.type === "completeRun") { + expect(event.text).toBe("Here is my final answer") + } + }) + + it("appends expertMessage with textPart as last checkpoint message for delegation compat", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() const step = createStep({ toolCalls: [ { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", - args: {}, + args: { result: "delegation result" }, }, ], }) @@ -399,7 +411,18 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { skillManager, llmExecutor: mockLLMExecutor, }) - expect(event.type).toBe("attemptCompletion") + expect(event.type).toBe("completeRun") + if (event.type === "completeRun") { + const lastMsg = event.checkpoint.messages[event.checkpoint.messages.length - 1] + expect(lastMsg.type).toBe("expertMessage") + if (lastMsg.type === "expertMessage") { + const textPart = lastMsg.contents.find((c) => c.type === "textPart") + expect(textPart).toBeDefined() + if (textPart?.type === "textPart") { + expect(textPart.text).toBe("delegation result") + } + } + } }) it("routes attemptCompletion to resolveToolResults when remaining todos exist", async () => { diff --git a/packages/runtime/src/state-machine/states/calling-mcp-tools.ts b/packages/runtime/src/state-machine/states/calling-mcp-tools.ts index 9bdc4263..fe8f848d 100644 --- a/packages/runtime/src/state-machine/states/calling-mcp-tools.ts +++ b/packages/runtime/src/state-machine/states/calling-mcp-tools.ts @@ -1,10 +1,13 @@ import { - attemptCompletion, + completeRun, finishMcpTools, type RunEvent, resolveToolResults, type ToolResult, } from "@perstack/core" +import { calculateContextWindowUsage } from "../../helpers/model.js" +import { createEmptyUsage, sumUsage } from "../../helpers/usage.js" +import { createExpertMessage, createToolMessage } from "../../messages/message.js" import { classifyToolCalls, toolExecutorFactory } from "../../tool-execution/index.js" import type { RunSnapshot } from "../machine.js" @@ -26,11 +29,11 @@ function hasRemainingTodos(toolResult: ToolResult): boolean { * * Responsibilities: * - Execute MCP tools in parallel - * - Handle attemptCompletion specially (→ GeneratingRunResult) + * - Handle attemptCompletion specially (→ completeRun directly) * - Classify remaining tool calls and route to appropriate state * * Transitions: - * - attemptCompletion → GeneratingRunResult (LLM generates final result text) + * - completeRun → Stopped (attemptCompletion with no remaining todos) * - finishMcpTools → CallingDelegates (has delegate/interactive tools remaining) * - resolveToolResults → ResolvingToolResult (MCP only, no other tools) */ @@ -61,8 +64,56 @@ export async function callingMcpToolsLogic({ return resolveToolResults(setting, checkpoint, { toolResults: [toolResult] }) } - // Transition to GeneratingRunResult to generate final result text - return attemptCompletion(setting, checkpoint, { toolResult }) + // Extract result from attemptCompletion args + const result = + typeof attemptCompletionTool.args?.result === "string" + ? attemptCompletionTool.args.result + : "" + + // Build toolMessage from attemptCompletion result + const toolResultParts = [ + { + type: "toolResultPart" as const, + toolCallId: toolResult.id, + toolName: attemptCompletionTool.toolName, + contents: toolResult.result.filter( + (part) => + part.type === "textPart" || + part.type === "imageInlinePart" || + part.type === "fileInlinePart", + ), + }, + ] + const toolMessage = createToolMessage(toolResultParts) + + // Build expertMessage with textPart — critical for delegation + // (DelegationExecutor.extractDelegationResult expects last message = expertMessage) + const expertMessage = createExpertMessage([{ type: "textPart" as const, text: result }]) + + const newMessages = [toolMessage, expertMessage] + const newUsage = sumUsage(checkpoint.usage, createEmptyUsage()) + + return completeRun(setting, checkpoint, { + checkpoint: { + ...checkpoint, + messages: [...checkpoint.messages, ...newMessages], + usage: newUsage, + contextWindowUsage: checkpoint.contextWindow + ? calculateContextWindowUsage(newUsage, checkpoint.contextWindow) + : undefined, + status: "completed", + pendingToolCalls: undefined, + partialToolResults: undefined, + }, + step: { + ...step, + newMessages: [...step.newMessages, ...newMessages], + finishedAt: Date.now(), + usage: sumUsage(step.usage, createEmptyUsage()), + }, + text: result, + usage: createEmptyUsage(), + }) } // Classify tool calls by type diff --git a/packages/runtime/src/state-machine/states/generating-run-result.test.ts b/packages/runtime/src/state-machine/states/generating-run-result.test.ts deleted file mode 100644 index abf31c05..00000000 --- a/packages/runtime/src/state-machine/states/generating-run-result.test.ts +++ /dev/null @@ -1,405 +0,0 @@ -import { createId } from "@paralleldrive/cuid2" -import type { GenerateTextResult, ToolSet } from "ai" -import { beforeEach, describe, expect, it } from "vitest" -import { - createCheckpoint, - createMockSkillManagerFromAdapters, - createRunSetting, - createStep, -} from "../../../test/run-params.js" -import type { LLMExecutor } from "../../llm/index.js" -import { createMockLLMExecutor, type MockLLMExecutor } from "../../llm/index.js" -import type { LLMExecutionResult } from "../../llm/types.js" -import { StateMachineLogics } from "../index.js" - -let mockLLMExecutor: MockLLMExecutor - -function createMockResult(text?: string): LLMExecutionResult { - return { - success: true, - result: { - text, - finishReason: "stop", - toolCalls: [], - usage: { promptTokens: 10, completionTokens: 20, totalTokens: 30 }, - response: { id: "mock", timestamp: new Date(), modelId: "mock", headers: {} }, - request: {}, - toolResults: [], - warnings: [], - sources: [], - providerMetadata: undefined, - reasoning: undefined, - reasoningDetails: [], - files: [], - logprobs: undefined, - toJsonResponse: () => new Response(), - experimental_output: undefined, - steps: [], - rawCall: {}, - } as unknown as GenerateTextResult, - } -} - -function createMockErrorResult( - error: { name: string; message: string; statusCode?: number }, - isRetryable: boolean, -): LLMExecutionResult { - return { - success: false, - error: { - name: error.name, - message: error.message, - statusCode: error.statusCode, - isRetryable, - provider: "anthropic", - }, - isRetryable, - } -} - -describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { - beforeEach(() => { - mockLLMExecutor = createMockLLMExecutor() - }) - - it("generates run result via LLM and completes", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult("Task completed successfully")) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("completeRun") - if (event.type === "completeRun") { - expect(event.text).toBe("Task completed successfully") - } - }) - - it("returns retry event on generation error", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult({ name: "Error", message: "Generation failed" }, true), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("retry") - }) - - it("returns stopRunByError event on non-retryable API error (401)", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult( - { name: "APICallError", message: "Unauthorized", statusCode: 401 }, - false, - ), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("stopRunByError") - if (event.type === "stopRunByError") { - expect(event.error.statusCode).toBe(401) - expect(event.error.isRetryable).toBe(false) - expect(event.checkpoint.status).toBe("stoppedByError") - } - }) - - it("returns retry event on retryable API error (429)", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult( - { name: "APICallError", message: "Rate limited", statusCode: 429 }, - true, - ), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("retry") - }) - - it("throws error when tool calls or results missing", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ toolCalls: undefined, toolResults: undefined }) - await expect( - StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }), - ).rejects.toThrow("No tool calls or tool results found") - }) - - it("includes proper event metadata", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult("Final result")) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.id).toBeDefined() - expect(typeof event.id).toBe("string") - }) - - it("returns stopRunByError when retryable error occurs but retryCount >= maxRetries", async () => { - const setting = createRunSetting({ maxRetries: 3 }) - const checkpoint = createCheckpoint({ retryCount: 3 }) - const step = createStep({ - toolCalls: [ - { - id: "tc_retry", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_retry", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult({ name: "RateLimitError", message: "Rate limited" }, true), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("stopRunByError") - if (event.type === "stopRunByError") { - expect(event.error.message).toContain("Max retries (3) exceeded") - } - }) - - it("completes with undefined text when LLM generates empty text and no prior expert text", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult(undefined)) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("completeRun") - if (event.type === "completeRun") { - expect(event.text).toBeUndefined() - } - }) - - it("extracts text from last expert message when LLM generates empty text", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint({ - messages: [ - { - id: createId(), - type: "instructionMessage" as const, - contents: [ - { id: createId(), type: "textPart" as const, text: "You are a helpful assistant." }, - ], - cache: true, - }, - { - id: createId(), - type: "expertMessage" as const, - contents: [ - { - id: createId(), - type: "textPart" as const, - text: "Here is a summary of the PDF content.", - }, - { - id: createId(), - type: "toolCallPart" as const, - toolCallId: "tc_123", - toolName: "attemptCompletion", - args: {}, - }, - ], - }, - ], - }) - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult(undefined)) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("completeRun") - if (event.type === "completeRun") { - expect(event.text).toBe("Here is a summary of the PDF content.") - } - }) -}) diff --git a/packages/runtime/src/state-machine/states/generating-run-result.ts b/packages/runtime/src/state-machine/states/generating-run-result.ts deleted file mode 100644 index 202507bc..00000000 --- a/packages/runtime/src/state-machine/states/generating-run-result.ts +++ /dev/null @@ -1,209 +0,0 @@ -import { - completeRun, - createStreamingEvent, - type RunEvent, - retry, - stopRunByError, - type TextPart, - type ThinkingPart, -} from "@perstack/core" -import { calculateContextWindowUsage } from "../../helpers/model.js" -import { - extractThinkingParts, - extractThinkingText, - type ReasoningPart, -} from "../../helpers/thinking.js" -import { createEmptyUsage, sumUsage, usageFromGenerateTextResult } from "../../helpers/usage.js" -import type { StreamCallbacks } from "../../llm/types.js" -import { - createExpertMessage, - createToolMessage, - createUserMessage, - messageToCoreMessage, -} from "../../messages/message.js" -import type { RunSnapshot } from "../machine.js" - -/** - * GeneratingRunResult state: Generate final result after attemptCompletion. - * - * Responsibilities: - * - Call LLM to generate final text response - * - Stream result with reasoning/result callbacks - * - Build tool message from attemptCompletion result - * - * Transitions: - * - completeRun → Stopped (successful completion) - * - retry → FinishingStep (retryable error) - * - stopRunByError → Stopped (non-retryable error) - */ -export async function generatingRunResultLogic({ - setting, - checkpoint, - step, - eventListener, - llmExecutor, -}: RunSnapshot["context"]): Promise { - if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) { - throw new Error("No tool calls or tool results found") - } - const toolResultParts = step.toolResults.map((toolResult) => { - const toolCall = step.toolCalls?.find((tc) => tc.id === toolResult.id) - return { - type: "toolResultPart" as const, - toolCallId: toolResult.id, - toolName: toolCall?.toolName ?? toolResult.toolName, - contents: toolResult.result.filter( - (part) => - part.type === "textPart" || - part.type === "imageInlinePart" || - part.type === "fileInlinePart", - ), - } - }) - const toolMessage = createToolMessage(toolResultParts) - const { messages } = checkpoint - const coreMessages = [...messages, toolMessage].map(messageToCoreMessage) - - // Track if reasoning/result was completed via callback (to avoid duplicate emissions) - let reasoningCompletedViaCallback = false - let _resultCompletedViaCallback = false - - // Create streaming callbacks for fire-and-forget event emission - const callbacks: StreamCallbacks = { - onReasoningStart: () => { - eventListener(createStreamingEvent("startStreamingReasoning", setting, checkpoint, {})) - }, - onReasoningDelta: (delta) => { - eventListener(createStreamingEvent("streamReasoning", setting, checkpoint, { delta })) - }, - onReasoningComplete: (text) => { - // Emit completeStreamingReasoning before result phase starts - eventListener( - createStreamingEvent("completeStreamingReasoning", setting, checkpoint, { text }), - ) - reasoningCompletedViaCallback = true - }, - onResultStart: () => { - eventListener(createStreamingEvent("startStreamingRunResult", setting, checkpoint, {})) - }, - onResultDelta: (delta) => { - eventListener(createStreamingEvent("streamRunResult", setting, checkpoint, { delta })) - }, - onResultComplete: (text) => { - eventListener( - createStreamingEvent("completeStreamingRunResult", setting, checkpoint, { text }), - ) - _resultCompletedViaCallback = true - }, - } - - const executionResult = await llmExecutor.streamText( - { - messages: coreMessages, - maxRetries: setting.maxRetries, - tools: {}, // No tools for run result generation - abortSignal: AbortSignal.timeout(setting.timeout), - reasoningBudget: setting.reasoningBudget, - }, - callbacks, - ) - - if (!executionResult.success) { - const { error, isRetryable } = executionResult - const currentRetryCount = checkpoint.retryCount ?? 0 - if (!isRetryable || currentRetryCount >= setting.maxRetries) { - return stopRunByError(setting, checkpoint, { - checkpoint: { - ...checkpoint, - status: "stoppedByError", - }, - step: { - ...step, - finishedAt: Date.now(), - }, - error: { - name: error.name ?? "Error", - message: - currentRetryCount >= setting.maxRetries - ? `Max retries (${setting.maxRetries}) exceeded: ${error.message}` - : error.message, - statusCode: error.statusCode, - isRetryable: false, - }, - }) - } - const reason = JSON.stringify({ error: error.name ?? "Error", message: error.message }) - return retry(setting, checkpoint, { - reason, - newMessages: [toolMessage, createUserMessage([{ type: "textPart", text: reason }])], - usage: createEmptyUsage(), - }) - } - - const generationResult = executionResult.result - const usage = usageFromGenerateTextResult(generationResult) - const { reasoning } = generationResult - - // If the final LLM call produced no text, extract from the last expert message. - // This handles the common case where the LLM generated result text alongside the - // attemptCompletion tool call — that text is in the checkpoint but the follow-up - // LLM call (with tools:{}) has nothing more to add. - let text = generationResult.text - if (!text) { - const lastExpertMessage = [...checkpoint.messages] - .reverse() - .find((m) => m.type === "expertMessage") - if (lastExpertMessage && lastExpertMessage.type === "expertMessage") { - const textPart = lastExpertMessage.contents.find((c) => c.type === "textPart") - if (textPart && textPart.type === "textPart" && textPart.text) { - text = textPart.text - } - } - } - - // Extract thinking from reasoning (Anthropic, Google) - const thinkingParts = extractThinkingParts(reasoning as ReasoningPart[] | undefined) - const thinkingText = extractThinkingText(reasoning as ReasoningPart[] | undefined) - - // Build ExpertMessage with ThinkingPart + TextPart - // Always include textPart even if empty - required for delegation result handling - const expertContents: Array | Omit> = [ - ...thinkingParts, - { type: "textPart" as const, text: text ?? "" }, - ] - const newMessages = [toolMessage, createExpertMessage(expertContents)] - - // Note: completeStreamingReasoning is emitted via onReasoningComplete callback during streaming - // Fallback emission only if callback wasn't triggered (should be rare) - if (thinkingText && !reasoningCompletedViaCallback) { - await eventListener( - createStreamingEvent("completeStreamingReasoning", setting, checkpoint, { - text: thinkingText, - }), - ) - } - - const newUsage = sumUsage(checkpoint.usage, usage) - return completeRun(setting, checkpoint, { - checkpoint: { - ...checkpoint, - messages: [...messages, ...newMessages], - usage: newUsage, - contextWindowUsage: checkpoint.contextWindow - ? calculateContextWindowUsage(newUsage, checkpoint.contextWindow) - : undefined, - status: "completed", - // Clear tool handling state on completion - pendingToolCalls: undefined, - partialToolResults: undefined, - }, - step: { - ...step, - newMessages: [...step.newMessages, ...newMessages], - finishedAt: Date.now(), - usage: sumUsage(step.usage, usage), - }, - text, - usage, - }) -}