diff --git a/.changeset/strict-attempt-completion-schema.md b/.changeset/strict-attempt-completion-schema.md new file mode 100644 index 00000000..f91df974 --- /dev/null +++ b/.changeset/strict-attempt-completion-schema.md @@ -0,0 +1,16 @@ +--- +"@perstack/base": patch +"@perstack/runtime": patch +"@perstack/core": patch +"@perstack/react": patch +--- + +fix: add result param to attemptCompletion and remove GeneratingRunResult + +- Add optional `result` string parameter to attemptCompletion tool schema so the LLM + can provide its final response text directly in the tool call +- Complete runs directly from CallingMcpTools when attemptCompletion succeeds, eliminating + the extra LLM round-trip in GeneratingRunResult +- Remove GeneratingRunResult state, its transitions, and the attemptCompletion event type +- Remove dead attemptCompletion event handling code from React event-to-activity utils +- Update E2E expert instructions to pass result via attemptCompletion args diff --git a/apps/base/src/tools/attempt-completion.ts b/apps/base/src/tools/attempt-completion.ts index a3b5f5f4..dc693b41 100644 --- a/apps/base/src/tools/attempt-completion.ts +++ b/apps/base/src/tools/attempt-completion.ts @@ -1,4 +1,5 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { z } from "zod" import { errorToolResult, successToolResult } from "../lib/tool-result.js" import { getRemainingTodos } from "./todo.js" @@ -19,8 +20,9 @@ export function registerAttemptCompletion(server: McpServer) { "attemptCompletion", { title: "Attempt completion", - description: "Signal task completion. Validates all todos are complete before ending.", - inputSchema: {}, + description: + "Signal task completion. Provide a result parameter with your final response text. Validates all todos are complete before ending.", + inputSchema: z.object({ result: z.string().optional() }).strict(), }, async () => { try { diff --git a/e2e/experts/bundled-base.toml b/e2e/experts/bundled-base.toml index e783d0be..b5d324a9 100644 --- a/e2e/experts/bundled-base.toml +++ b/e2e/experts/bundled-base.toml @@ -13,7 +13,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for bundled base with InMemoryTransport" instruction = """ -Call readTextFile on "perstack.toml" and then call attemptCompletion with result "OK" +Call readTextFile on "perstack.toml", then call attemptCompletion. """ [experts."e2e-bundled-base".skills."@perstack/base"] diff --git a/e2e/experts/cli-commands.toml b/e2e/experts/cli-commands.toml index 085986fa..e79b206f 100644 --- a/e2e/experts/cli-commands.toml +++ b/e2e/experts/cli-commands.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for CLI publish command validation" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-publish-test".skills."@perstack/base"] @@ -22,7 +22,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "E2E test expert for CLI log command validation" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-cli-simple".skills."@perstack/base"] diff --git a/e2e/experts/continue-resume.toml b/e2e/experts/continue-resume.toml index 18e8fc46..99f40532 100644 --- a/e2e/experts/continue-resume.toml +++ b/e2e/experts/continue-resume.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for continue functionality" instruction = """ 1. Call askUser with question "confirm?" -2. After user responds, call attemptCompletion with result "OK" +2. After user responds, call attemptCompletion """ [experts."e2e-continue".skills."user-input"] @@ -34,7 +34,7 @@ description = "E2E test expert for resume-from functionality" instruction = """ 1. Call think with thought "processing" 2. Call askUser with question "confirm?" -3. After user responds, call attemptCompletion with result "OK" +3. After user responds, call attemptCompletion """ [experts."e2e-resume".skills."user-input"] diff --git a/e2e/experts/delegate-chain.toml b/e2e/experts/delegate-chain.toml index 5eedff64..466b0497 100644 --- a/e2e/experts/delegate-chain.toml +++ b/e2e/experts/delegate-chain.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for delegate chain" instruction = """ 1. Delegate to "e2e-delegate-level1" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-delegate-level1"] @@ -25,7 +25,7 @@ version = "1.0.0" description = "First level delegate expert" instruction = """ 1. Delegate to "e2e-delegate-level2" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-delegate-level2"] @@ -39,7 +39,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "Second level delegate expert" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-delegate-level2".skills."@perstack/base"] diff --git a/e2e/experts/error-handling.toml b/e2e/experts/error-handling.toml index e52f496e..cd559be2 100644 --- a/e2e/experts/error-handling.toml +++ b/e2e/experts/error-handling.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for tool error recovery" instruction = """ 1. Use readTextFile with the exact path given by user -2. Call attemptCompletion reporting: success/error + content or error message +2. Call attemptCompletion """ [experts."e2e-tool-error-recovery".skills."@perstack/base"] diff --git a/e2e/experts/errors.toml b/e2e/experts/errors.toml index 771a20b5..b94f9461 100644 --- a/e2e/experts/errors.toml +++ b/e2e/experts/errors.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert with broken MCP skill" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-mcp-error".skills."broken-skill"] @@ -27,7 +27,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "E2E test expert for invalid provider testing" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-invalid-provider".skills."@perstack/base"] diff --git a/e2e/experts/global-runtime.toml b/e2e/experts/global-runtime.toml index a367b869..81c63cef 100644 --- a/e2e/experts/global-runtime.toml +++ b/e2e/experts/global-runtime.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for global runtime configuration" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-global-runtime".skills."@perstack/base"] diff --git a/e2e/experts/lockfile.toml b/e2e/experts/lockfile.toml index 67018a51..1b367bee 100644 --- a/e2e/experts/lockfile.toml +++ b/e2e/experts/lockfile.toml @@ -12,7 +12,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for lockfile functionality" instruction = """ -Call readTextFile on "perstack.toml" and then call attemptCompletion with result "lockfile-test-ok" +Call readTextFile on "perstack.toml", then call attemptCompletion. """ [experts."e2e-lockfile".skills."@perstack/base"] diff --git a/e2e/experts/mixed-tools.toml b/e2e/experts/mixed-tools.toml index 066b2939..c304e74f 100644 --- a/e2e/experts/mixed-tools.toml +++ b/e2e/experts/mixed-tools.toml @@ -42,7 +42,7 @@ pick = ["attemptCompletion", "think"] version = "1.0.0" description = "E2E test helper expert" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-helper".skills."@perstack/base"] diff --git a/e2e/experts/multi-modal.toml b/e2e/experts/multi-modal.toml index 8595784d..3f023739 100644 --- a/e2e/experts/multi-modal.toml +++ b/e2e/experts/multi-modal.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for PDF file reading" instruction = """ 1. Use readPdfFile to read the PDF at the specified path -2. Call attemptCompletion with a brief summary of the content +2. Call attemptCompletion with a brief summary of the content as the result """ [experts."e2e-pdf-reader".skills."@perstack/base"] @@ -24,7 +24,7 @@ version = "1.0.0" description = "E2E test expert for image file reading" instruction = """ 1. Use readImageFile to read the image at the specified path -2. Call attemptCompletion with a brief description of the image +2. Call attemptCompletion with a brief description of the image as the result """ [experts."e2e-image-reader".skills."@perstack/base"] diff --git a/e2e/experts/parallel-delegate.toml b/e2e/experts/parallel-delegate.toml index bb08ace9..584064dd 100644 --- a/e2e/experts/parallel-delegate.toml +++ b/e2e/experts/parallel-delegate.toml @@ -10,7 +10,7 @@ version = "1.0.0" description = "E2E test expert for parallel delegation" instruction = """ 1. In ONE response, delegate to BOTH "e2e-delegate-math" and "e2e-delegate-text" with "test" -2. When both return, call attemptCompletion with result "OK" +2. When both return, call attemptCompletion """ delegates = ["e2e-delegate-math", "e2e-delegate-text"] @@ -24,7 +24,7 @@ pick = ["attemptCompletion", "think"] version = "1.0.0" description = "Math delegate expert" instruction = """ -Call attemptCompletion with result "Math result: 5" +Call attemptCompletion. """ [experts."e2e-delegate-math".skills."@perstack/base"] @@ -37,7 +37,7 @@ pick = ["attemptCompletion"] version = "1.0.0" description = "Text processing delegate expert" instruction = """ -Call attemptCompletion with result "Text result: olleh" +Call attemptCompletion. """ [experts."e2e-delegate-text".skills."@perstack/base"] diff --git a/e2e/experts/reasoning-budget.toml b/e2e/experts/reasoning-budget.toml index 67533c11..583d9adc 100644 --- a/e2e/experts/reasoning-budget.toml +++ b/e2e/experts/reasoning-budget.toml @@ -12,7 +12,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with minimal budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-minimal".skills."@perstack/base"] @@ -26,7 +26,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with low budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-low".skills."@perstack/base"] @@ -40,7 +40,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with medium budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-medium".skills."@perstack/base"] @@ -54,7 +54,7 @@ version = "1.0.0" description = "E2E test expert for Anthropic reasoning with high budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-anthropic-high".skills."@perstack/base"] @@ -72,7 +72,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with minimal budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-minimal".skills."@perstack/base"] @@ -86,7 +86,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with low budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-low".skills."@perstack/base"] @@ -100,7 +100,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with medium budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-medium".skills."@perstack/base"] @@ -114,7 +114,7 @@ version = "1.0.0" description = "E2E test expert for OpenAI reasoning with high budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-openai-high".skills."@perstack/base"] @@ -132,7 +132,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with minimal budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-minimal".skills."@perstack/base"] @@ -146,7 +146,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with low budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-low".skills."@perstack/base"] @@ -160,7 +160,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with medium budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-medium".skills."@perstack/base"] @@ -174,7 +174,7 @@ version = "1.0.0" description = "E2E test expert for Google reasoning with high budget" instruction = """ Solve this step by step: What is 23 * 47? -Show your reasoning, then call attemptCompletion with the answer. +Show your reasoning, then call attemptCompletion. """ [experts."e2e-reasoning-google-high".skills."@perstack/base"] diff --git a/e2e/experts/runtime-version-future.toml b/e2e/experts/runtime-version-future.toml index b5e79f0d..d782af64 100644 --- a/e2e/experts/runtime-version-future.toml +++ b/e2e/experts/runtime-version-future.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] [experts."e2e-runtime-future"] version = "1.0.0" minRuntimeVersion = "v99.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-future".skills."@perstack/base"] type = "mcpStdioSkill" @@ -23,7 +23,7 @@ version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ 1. Delegate to "e2e-runtime-future" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-runtime-future"] diff --git a/e2e/experts/runtime-version.toml b/e2e/experts/runtime-version.toml index 55796b4e..6fc86fe6 100644 --- a/e2e/experts/runtime-version.toml +++ b/e2e/experts/runtime-version.toml @@ -9,7 +9,7 @@ envPath = [".env", ".env.local"] [experts."e2e-runtime-v1"] version = "1.0.0" minRuntimeVersion = "v1.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-v1".skills."@perstack/base"] type = "mcpStdioSkill" @@ -20,7 +20,7 @@ pick = ["attemptCompletion"] # Expert without minRuntimeVersion (default) [experts."e2e-runtime-default"] version = "1.0.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-default".skills."@perstack/base"] type = "mcpStdioSkill" @@ -34,7 +34,7 @@ version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ 1. Delegate to "e2e-runtime-chain-ok-l1" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-runtime-chain-ok-l1"] @@ -49,7 +49,7 @@ version = "1.0.0" minRuntimeVersion = "v1.0" instruction = """ 1. Delegate to "e2e-runtime-chain-ok-l2" with "test" -2. When done, call attemptCompletion with result "OK" +2. When done, call attemptCompletion """ delegates = ["e2e-runtime-chain-ok-l2"] @@ -62,7 +62,7 @@ pick = ["attemptCompletion"] [experts."e2e-runtime-chain-ok-l2"] version = "1.0.0" minRuntimeVersion = "v1.0" -instruction = "Call attemptCompletion with result 'OK'" +instruction = "Call attemptCompletion." [experts."e2e-runtime-chain-ok-l2".skills."@perstack/base"] type = "mcpStdioSkill" diff --git a/e2e/experts/skills.toml b/e2e/experts/skills.toml index bccc59d3..3afab724 100644 --- a/e2e/experts/skills.toml +++ b/e2e/experts/skills.toml @@ -12,8 +12,8 @@ instruction = """ Follow these steps exactly: 1. Use the todo tool to add the user's request as a task 2. Mark the task as completed using the todo tool -3. Call attemptCompletion -If asked to read a file, report that readTextFile is not available via attemptCompletion. +3. Summarize what you did, then call attemptCompletion +If asked to read a file, report that readTextFile is not available, then call attemptCompletion. """ [experts."e2e-pick-tools".skills."@perstack/base"] @@ -26,7 +26,7 @@ pick = ["attemptCompletion", "todo"] version = "1.0.0" description = "E2E test expert with omitted tools" instruction = """ -Call attemptCompletion with result "OK" +Call attemptCompletion. """ [experts."e2e-omit-tools".skills."@perstack/base"] @@ -40,7 +40,7 @@ version = "1.0.0" description = "E2E test expert with multiple skills" instruction = """ 1. Use todo to track tasks -2. Call attemptCompletion with result "OK" +2. Call attemptCompletion. """ [experts."e2e-multi-skill".skills."exa"] @@ -78,7 +78,7 @@ pick = ["attemptCompletion", "addSkill", "removeSkill"] [experts."e2e-delegate-target"] version = "1.0.0" description = "Simple target expert for delegate testing" -instruction = "Call attemptCompletion with result 'delegated OK'." +instruction = "Call attemptCompletion." [experts."e2e-delegate-target".skills."@perstack/base"] type = "mcpStdioSkill" diff --git a/e2e/experts/special-tools.toml b/e2e/experts/special-tools.toml index 64b00aad..0d550566 100644 --- a/e2e/experts/special-tools.toml +++ b/e2e/experts/special-tools.toml @@ -14,7 +14,7 @@ IMPORTANT: Call ALL 3 tools below IN PARALLEL (single tool call batch): - readImageFile: path="e2e/fixtures/test.gif" - web_search_exa: query="test" -After all 3 complete, call attemptCompletion with "OK". +After all 3 complete, call attemptCompletion. """ [experts."e2e-special-tools".skills."exa"] diff --git a/e2e/experts/versioned-base.toml b/e2e/experts/versioned-base.toml index 23f4c90a..9ff4b01f 100644 --- a/e2e/experts/versioned-base.toml +++ b/e2e/experts/versioned-base.toml @@ -13,7 +13,7 @@ envPath = [".env", ".env.local"] version = "1.0.0" description = "E2E test expert for versioned base with StdioTransport" instruction = """ -Call readTextFile on "perstack.toml" and then call attemptCompletion with result "OK" +Call readTextFile on "perstack.toml", then call attemptCompletion. """ [experts."e2e-versioned-base".skills."@perstack/base"] diff --git a/e2e/perstack-cli/delegate.test.ts b/e2e/perstack-cli/delegate.test.ts index 70218587..12155805 100644 --- a/e2e/perstack-cli/delegate.test.ts +++ b/e2e/perstack-cli/delegate.test.ts @@ -24,7 +24,7 @@ describe("Delegate to Expert", () => { * TOML: delegate-chain.toml defines 3 experts forming a delegation chain * Expected: * - Chain starts at root, delegates to level1, then level2 - * - Each expert calls attemptCompletion with "OK" + * - Each expert calls attemptCompletion * - Control flow: chain→level1→level2→(complete)→level1→(complete)→chain→(complete) * - Total 3 completeRun events (one per expert) */ diff --git a/packages/core/src/schemas/runtime.ts b/packages/core/src/schemas/runtime.ts index 901bd11e..58fc2dcc 100644 --- a/packages/core/src/schemas/runtime.ts +++ b/packages/core/src/schemas/runtime.ts @@ -276,9 +276,6 @@ type ExpertStatePayloads = { resolveToolResults: { toolResults: ToolResult[] } - attemptCompletion: { - toolResult: ToolResult - } finishToolCall: { newMessages: (UserMessage | ToolMessage)[] } @@ -443,7 +440,6 @@ export const callTools = createEvent("callTools") export const finishMcpTools = createEvent("finishMcpTools") export const skipDelegates = createEvent("skipDelegates") export const resolveToolResults = createEvent("resolveToolResults") -export const attemptCompletion = createEvent("attemptCompletion") export const finishToolCall = createEvent("finishToolCall") export const resumeToolCalls = createEvent("resumeToolCalls") export const completeRun = createEvent("completeRun") @@ -552,7 +548,6 @@ const EXPERT_STATE_EVENT_TYPES = new Set([ "finishMcpTools", "skipDelegates", "resolveToolResults", - "attemptCompletion", "finishToolCall", "resumeToolCalls", "continueToNextStep", diff --git a/packages/react/src/utils/event-to-activity.test.ts b/packages/react/src/utils/event-to-activity.test.ts index 1b1a312e..07ee6ed6 100644 --- a/packages/react/src/utils/event-to-activity.test.ts +++ b/packages/react/src/utils/event-to-activity.test.ts @@ -398,37 +398,6 @@ describe("processRunEventToActivity", () => { } }) - it("processes attemptCompletion event with single toolResult", () => { - const state = createInitialActivityProcessState() - const activities: Activity[] = [] - - // First add tool call - const toolCall = createToolCall({ id: "attempt-1", toolName: "attemptCompletion" }) - const callEvent = createBaseEvent({ - type: "callTools", - toolCalls: [toolCall], - newMessage: {} as RunEvent["newMessage"], - usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 }, - } as Partial) as RunEvent - processRunEventToActivity(state, callEvent, (a) => activities.push(a)) - - // Then resolve with attemptCompletion event (single toolResult) - const toolResult = createToolResult({ - id: "attempt-1", - toolName: "attemptCompletion", - result: [{ type: "textPart", id: "tp-1", text: "{}" }], - }) - const resultEvent = createBaseEvent({ - id: "e-2", - type: "attemptCompletion", - toolResult, - } as Partial) as RunEvent - processRunEventToActivity(state, resultEvent, (a) => activities.push(a)) - - expect(activities).toHaveLength(1) - expect(activities[0].type).toBe("attemptCompletion") - }) - it("does not log query when startRun has no user message", () => { const state = createInitialActivityProcessState() const activities: Activity[] = [] diff --git a/packages/react/src/utils/event-to-activity.ts b/packages/react/src/utils/event-to-activity.ts index 283b966e..1e009570 100644 --- a/packages/react/src/utils/event-to-activity.ts +++ b/packages/react/src/utils/event-to-activity.ts @@ -13,8 +13,6 @@ import { createGeneralToolActivity, } from "@perstack/core" -const TOOL_RESULT_EVENT_TYPES = new Set(["resolveToolResults", "attemptCompletion"]) - /** * Converts a tool call and result to an Activity. * Delegates to core's createBaseToolActivity/createGeneralToolActivity to avoid duplication. @@ -156,9 +154,6 @@ const isStopRunByInteractiveToolEvent = ( const isToolResultsEvent = (event: RunEvent): event is RunEvent & { toolResults: ToolResult[] } => event.type === "resolveToolResults" && "toolResults" in event -const isToolResultEvent = (event: RunEvent): event is RunEvent & { toolResult: ToolResult } => - TOOL_RESULT_EVENT_TYPES.has(event.type) && "toolResult" in event - /** * Wraps multiple activities in a ParallelActivitiesGroup with shared reasoning. * If only one activity, returns it directly. @@ -495,22 +490,5 @@ export function processRunEventToActivity( } runState.completedReasoning = undefined } - } else if (isToolResultEvent(event)) { - const { toolResult } = event - const tool = state.tools.get(toolResult.id) - if (tool && !tool.logged) { - const activityId = `action-${tool.id}` - const activity = toolToActivity(tool.toolCall, toolResult, runState.completedReasoning, { - id: activityId, - expertKey: event.expertKey, - runId: event.runId, - previousActivityId: runState.lastActivityId, - delegatedBy: runState.delegatedBy, - }) - addActivity(activity) - runState.lastActivityId = activityId - tool.logged = true - runState.completedReasoning = undefined - } } } diff --git a/packages/runtime/src/messages/instruction-message.ts b/packages/runtime/src/messages/instruction-message.ts index 999a9d3d..f3062207 100644 --- a/packages/runtime/src/messages/instruction-message.ts +++ b/packages/runtime/src/messages/instruction-message.ts @@ -5,8 +5,8 @@ import { dedent } from "ts-dedent" function getMetaInstruction(startedAt: number): string { return dedent` Call tools iteratively to complete the user's task. - When the task is complete, or when you cannot help, call attemptCompletion. - Call attemptCompletion ONLY as a tool call — do not include any text response with it. + When the task is complete, call attemptCompletion with a result parameter containing your final response. + When you cannot help, call attemptCompletion without a result. Environment: - Current time: ${new Date(startedAt).toISOString()} diff --git a/packages/runtime/src/state-machine/machine.ts b/packages/runtime/src/state-machine/machine.ts index e7ac6fa8..f2e8371e 100644 --- a/packages/runtime/src/state-machine/machine.ts +++ b/packages/runtime/src/state-machine/machine.ts @@ -8,7 +8,6 @@ import { callingDelegatesLogic } from "./states/calling-delegates.js" import { callingInteractiveToolsLogic } from "./states/calling-interactive-tools.js" import { callingMcpToolsLogic } from "./states/calling-mcp-tools.js" import { finishingStepLogic } from "./states/finishing-step.js" -import { generatingRunResultLogic } from "./states/generating-run-result.js" import { generatingToolCallLogic } from "./states/generating-tool-call.js" import { initLogic } from "./states/init.js" import { preparingForStepLogic } from "./states/preparing-for-step.js" @@ -237,16 +236,6 @@ export const runtimeStateMachine = setup({ }) satisfies Step, }), }, - attemptCompletion: { - target: "GeneratingRunResult", - actions: assign({ - step: ({ context, event }) => - ({ - ...context.step, - toolResults: [event.toolResult], - }) satisfies Step, - }), - }, finishMcpTools: { target: "CallingDelegates", actions: assign({ @@ -336,56 +325,6 @@ export const runtimeStateMachine = setup({ }, }, - GeneratingRunResult: { - on: { - retry: { - target: "FinishingStep", - actions: assign({ - checkpoint: ({ context, event }) => - ({ - ...context.checkpoint, - messages: [...context.checkpoint.messages, ...event.newMessages], - usage: sumUsage(context.checkpoint.usage, event.usage), - retryCount: (context.checkpoint.retryCount ?? 0) + 1, - }) satisfies Checkpoint, - step: ({ context, event }) => - ({ - ...context.step, - newMessages: event.newMessages, - toolCalls: event.toolCalls, - toolResults: event.toolResults, - usage: sumUsage(context.step.usage, event.usage), - }) satisfies Step, - }), - }, - stopRunByError: { - target: "Stopped", - actions: assign({ - checkpoint: ({ event }) => - ({ - ...event.checkpoint, - error: event.error, - }) satisfies Checkpoint, - step: ({ event }) => - ({ - ...event.step, - inputMessages: undefined, - }) satisfies Step, - }), - }, - completeRun: { - target: "Stopped", - actions: assign({ - checkpoint: ({ event }) => ({ ...event.checkpoint, retryCount: 0 }), - step: ({ event }) => ({ - ...event.step, - inputMessages: undefined, - }), - }), - }, - }, - }, - FinishingStep: { on: { continueToNextStep: { @@ -430,7 +369,6 @@ export const StateMachineLogics: Record< CallingDelegates: callingDelegatesLogic, CallingInteractiveTools: callingInteractiveToolsLogic, ResolvingToolResult: resolvingToolResultLogic, - GeneratingRunResult: generatingRunResultLogic, FinishingStep: finishingStepLogic, } diff --git a/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts b/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts index 0a041daf..5dc864e7 100644 --- a/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts +++ b/packages/runtime/src/state-machine/states/calling-mcp-tools.test.ts @@ -324,7 +324,7 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { expect(event.type).toBe("resolveToolResults") }) - it("routes attemptCompletion to attemptCompletion handler when no remaining todos", async () => { + it("routes attemptCompletion to completeRun when no remaining todos", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ @@ -349,55 +349,22 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { skillManager, llmExecutor: mockLLMExecutor, }) - expect(event.type).toBe("attemptCompletion") + expect(event.type).toBe("completeRun") + if (event.type === "completeRun") { + expect(event.text).toBe("") + } }) - it("routes attemptCompletion to completeRun when textPart exists in last message", async () => { + it("includes result text in completeRun when args.result is provided", async () => { const setting = createRunSetting() - // Create checkpoint with an expertMessage containing textPart - const checkpoint = createCheckpoint({ - messages: [ - { - id: createId(), - type: "instructionMessage" as const, - contents: [ - { id: createId(), type: "textPart" as const, text: "You are a helpful assistant." }, - ], - cache: true, - }, - { - id: createId(), - type: "expertMessage" as const, - contents: [ - { - id: createId(), - type: "thinkingPart" as const, - thinking: "Reasoning about the task...", - signature: "sig", - }, - { - id: createId(), - type: "textPart" as const, - text: "Hello! I am your assistant. How can I help you today?", - }, - { - id: createId(), - type: "toolCallPart" as const, - toolCallId: "tc_123", - toolName: "attemptCompletion", - args: {}, - }, - ], - }, - ], - }) + const checkpoint = createCheckpoint() const step = createStep({ toolCalls: [ { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", - args: {}, + args: { result: "Here is my final answer" }, }, ], }) @@ -414,51 +381,21 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { llmExecutor: mockLLMExecutor, }) expect(event.type).toBe("completeRun") - expect((event as { text: string }).text).toBe( - "Hello! I am your assistant. How can I help you today?", - ) + if (event.type === "completeRun") { + expect(event.text).toBe("Here is my final answer") + } }) - it("routes attemptCompletion to attemptCompletion when textPart is empty", async () => { + it("appends expertMessage with textPart as last checkpoint message for delegation compat", async () => { const setting = createRunSetting() - // Create checkpoint with an expertMessage containing empty textPart - const checkpoint = createCheckpoint({ - messages: [ - { - id: createId(), - type: "instructionMessage" as const, - contents: [ - { id: createId(), type: "textPart" as const, text: "You are a helpful assistant." }, - ], - cache: true, - }, - { - id: createId(), - type: "expertMessage" as const, - contents: [ - { - id: createId(), - type: "textPart" as const, - text: " ", // Whitespace only - }, - { - id: createId(), - type: "toolCallPart" as const, - toolCallId: "tc_123", - toolName: "attemptCompletion", - args: {}, - }, - ], - }, - ], - }) + const checkpoint = createCheckpoint() const step = createStep({ toolCalls: [ { id: "tc_123", skillName: "@perstack/base", toolName: "attemptCompletion", - args: {}, + args: { result: "delegation result" }, }, ], }) @@ -474,8 +411,18 @@ describe("@perstack/runtime: callingMcpToolsLogic", () => { skillManager, llmExecutor: mockLLMExecutor, }) - // Should transition to GeneratingRunResult since textPart is empty - expect(event.type).toBe("attemptCompletion") + expect(event.type).toBe("completeRun") + if (event.type === "completeRun") { + const lastMsg = event.checkpoint.messages[event.checkpoint.messages.length - 1] + expect(lastMsg.type).toBe("expertMessage") + if (lastMsg.type === "expertMessage") { + const textPart = lastMsg.contents.find((c) => c.type === "textPart") + expect(textPart).toBeDefined() + if (textPart?.type === "textPart") { + expect(textPart.text).toBe("delegation result") + } + } + } }) it("routes attemptCompletion to resolveToolResults when remaining todos exist", async () => { diff --git a/packages/runtime/src/state-machine/states/calling-mcp-tools.ts b/packages/runtime/src/state-machine/states/calling-mcp-tools.ts index 697a6923..fe8f848d 100644 --- a/packages/runtime/src/state-machine/states/calling-mcp-tools.ts +++ b/packages/runtime/src/state-machine/states/calling-mcp-tools.ts @@ -1,11 +1,11 @@ import { - attemptCompletion, completeRun, finishMcpTools, type RunEvent, resolveToolResults, type ToolResult, } from "@perstack/core" +import { calculateContextWindowUsage } from "../../helpers/model.js" import { createEmptyUsage, sumUsage } from "../../helpers/usage.js" import { createExpertMessage, createToolMessage } from "../../messages/message.js" import { classifyToolCalls, toolExecutorFactory } from "../../tool-execution/index.js" @@ -24,38 +24,16 @@ function hasRemainingTodos(toolResult: ToolResult): boolean { } } -/** - * Extract textPart from the last expert message. - * When LLM generates both text and attemptCompletion in one response, - * we should use that text as the final result instead of re-generating. - */ -function extractTextFromLastMessage( - checkpoint: RunSnapshot["context"]["checkpoint"], -): string | undefined { - const lastMessage = checkpoint.messages[checkpoint.messages.length - 1] - if (!lastMessage || lastMessage.type !== "expertMessage") { - return undefined - } - const textPart = lastMessage.contents.find((c) => c.type === "textPart") - if (!textPart || textPart.type !== "textPart") { - return undefined - } - // Only return if there's actual content (not just whitespace) - const text = textPart.text.trim() - return text.length > 0 ? text : undefined -} - /** * CallingMcpTools state: Execute MCP tool calls in parallel. * * Responsibilities: * - Execute MCP tools in parallel - * - Handle attemptCompletion specially (→ GeneratingRunResult or completeRun) + * - Handle attemptCompletion specially (→ completeRun directly) * - Classify remaining tool calls and route to appropriate state * * Transitions: - * - attemptCompletion → GeneratingRunResult (needs LLM to generate final result) - * - completeRun → Stopped (attemptCompletion with existing text) + * - completeRun → Stopped (attemptCompletion with no remaining todos) * - finishMcpTools → CallingDelegates (has delegate/interactive tools remaining) * - resolveToolResults → ResolvingToolResult (MCP only, no other tools) */ @@ -86,12 +64,15 @@ export async function callingMcpToolsLogic({ return resolveToolResults(setting, checkpoint, { toolResults: [toolResult] }) } - // Check if LLM already generated a text response along with attemptCompletion - // If so, use that text directly instead of transitioning to GeneratingRunResult - const existingText = extractTextFromLastMessage(checkpoint) - if (existingText) { - // Build tool message for the attemptCompletion result - const toolResultPart = { + // Extract result from attemptCompletion args + const result = + typeof attemptCompletionTool.args?.result === "string" + ? attemptCompletionTool.args.result + : "" + + // Build toolMessage from attemptCompletion result + const toolResultParts = [ + { type: "toolResultPart" as const, toolCallId: toolResult.id, toolName: attemptCompletionTool.toolName, @@ -101,38 +82,38 @@ export async function callingMcpToolsLogic({ part.type === "imageInlinePart" || part.type === "fileInlinePart", ), - } - const toolMessage = createToolMessage([toolResultPart]) - // Create expertMessage with the existing text for delegation result handling - const expertMessage = createExpertMessage([{ type: "textPart", text: existingText }]) - const newMessages = [toolMessage, expertMessage] - const newUsage = sumUsage(checkpoint.usage, createEmptyUsage()) + }, + ] + const toolMessage = createToolMessage(toolResultParts) - // Complete run directly with the existing text - return completeRun(setting, checkpoint, { - checkpoint: { - ...checkpoint, - messages: [...checkpoint.messages, ...newMessages], - usage: newUsage, - contextWindowUsage: checkpoint.contextWindowUsage, - status: "completed", - // Clear tool handling state on completion - pendingToolCalls: undefined, - partialToolResults: undefined, - }, - step: { - ...step, - newMessages: [...step.newMessages, ...newMessages], - toolResults: [toolResult], - finishedAt: Date.now(), - }, - text: existingText, - usage: createEmptyUsage(), - }) - } + // Build expertMessage with textPart — critical for delegation + // (DelegationExecutor.extractDelegationResult expects last message = expertMessage) + const expertMessage = createExpertMessage([{ type: "textPart" as const, text: result }]) - // No existing text - transition to GeneratingRunResult to generate final result - return attemptCompletion(setting, checkpoint, { toolResult }) + const newMessages = [toolMessage, expertMessage] + const newUsage = sumUsage(checkpoint.usage, createEmptyUsage()) + + return completeRun(setting, checkpoint, { + checkpoint: { + ...checkpoint, + messages: [...checkpoint.messages, ...newMessages], + usage: newUsage, + contextWindowUsage: checkpoint.contextWindow + ? calculateContextWindowUsage(newUsage, checkpoint.contextWindow) + : undefined, + status: "completed", + pendingToolCalls: undefined, + partialToolResults: undefined, + }, + step: { + ...step, + newMessages: [...step.newMessages, ...newMessages], + finishedAt: Date.now(), + usage: sumUsage(step.usage, createEmptyUsage()), + }, + text: result, + usage: createEmptyUsage(), + }) } // Classify tool calls by type diff --git a/packages/runtime/src/state-machine/states/generating-run-result.test.ts b/packages/runtime/src/state-machine/states/generating-run-result.test.ts deleted file mode 100644 index 79d43ef0..00000000 --- a/packages/runtime/src/state-machine/states/generating-run-result.test.ts +++ /dev/null @@ -1,349 +0,0 @@ -import { createId } from "@paralleldrive/cuid2" -import type { GenerateTextResult, ToolSet } from "ai" -import { beforeEach, describe, expect, it } from "vitest" -import { - createCheckpoint, - createMockSkillManagerFromAdapters, - createRunSetting, - createStep, -} from "../../../test/run-params.js" -import type { LLMExecutor } from "../../llm/index.js" -import { createMockLLMExecutor, type MockLLMExecutor } from "../../llm/index.js" -import type { LLMExecutionResult } from "../../llm/types.js" -import { StateMachineLogics } from "../index.js" - -let mockLLMExecutor: MockLLMExecutor - -function createMockResult(text?: string): LLMExecutionResult { - return { - success: true, - result: { - text, - finishReason: "stop", - toolCalls: [], - usage: { promptTokens: 10, completionTokens: 20, totalTokens: 30 }, - response: { id: "mock", timestamp: new Date(), modelId: "mock", headers: {} }, - request: {}, - toolResults: [], - warnings: [], - sources: [], - providerMetadata: undefined, - reasoning: undefined, - reasoningDetails: [], - files: [], - logprobs: undefined, - toJsonResponse: () => new Response(), - experimental_output: undefined, - steps: [], - rawCall: {}, - } as unknown as GenerateTextResult, - } -} - -function createMockErrorResult( - error: { name: string; message: string; statusCode?: number }, - isRetryable: boolean, -): LLMExecutionResult { - return { - success: false, - error: { - name: error.name, - message: error.message, - statusCode: error.statusCode, - isRetryable, - provider: "anthropic", - }, - isRetryable, - } -} - -describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { - beforeEach(() => { - mockLLMExecutor = createMockLLMExecutor() - }) - - it("generates run result via LLM and completes", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult("Task completed successfully")) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("completeRun") - if (event.type === "completeRun") { - expect(event.text).toBe("Task completed successfully") - } - }) - - it("returns retry event on generation error", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult({ name: "Error", message: "Generation failed" }, true), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("retry") - }) - - it("returns stopRunByError event on non-retryable API error (401)", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult( - { name: "APICallError", message: "Unauthorized", statusCode: 401 }, - false, - ), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("stopRunByError") - if (event.type === "stopRunByError") { - expect(event.error.statusCode).toBe(401) - expect(event.error.isRetryable).toBe(false) - expect(event.checkpoint.status).toBe("stoppedByError") - } - }) - - it("returns retry event on retryable API error (429)", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult( - { name: "APICallError", message: "Rate limited", statusCode: 429 }, - true, - ), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("retry") - }) - - it("throws error when tool calls or results missing", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ toolCalls: undefined, toolResults: undefined }) - await expect( - StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }), - ).rejects.toThrow("No tool calls or tool results found") - }) - - it("includes proper event metadata", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult("Final result")) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.id).toBeDefined() - expect(typeof event.id).toBe("string") - }) - - it("returns stopRunByError when retryable error occurs but retryCount >= maxRetries", async () => { - const setting = createRunSetting({ maxRetries: 3 }) - const checkpoint = createCheckpoint({ retryCount: 3 }) - const step = createStep({ - toolCalls: [ - { - id: "tc_retry", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_retry", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult( - createMockErrorResult({ name: "RateLimitError", message: "Rate limited" }, true), - ) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("stopRunByError") - if (event.type === "stopRunByError") { - expect(event.error.message).toContain("Max retries (3) exceeded") - } - }) - - it("falls back to 'OK' when LLM generates empty text", async () => { - const setting = createRunSetting() - const checkpoint = createCheckpoint() - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult(undefined)) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("completeRun") - if (event.type === "completeRun") { - expect(event.text).toBe("OK") - const lastMessage = event.checkpoint.messages[event.checkpoint.messages.length - 1] - expect(lastMessage.type).toBe("expertMessage") - if (lastMessage.type === "expertMessage") { - const textPart = lastMessage.contents.find((c) => c.type === "textPart") - expect(textPart).toBeDefined() - if (textPart?.type === "textPart") { - expect(textPart.text).toBe("OK") - } - } - } - }) -}) diff --git a/packages/runtime/src/state-machine/states/generating-run-result.ts b/packages/runtime/src/state-machine/states/generating-run-result.ts deleted file mode 100644 index 8e7eeafb..00000000 --- a/packages/runtime/src/state-machine/states/generating-run-result.ts +++ /dev/null @@ -1,196 +0,0 @@ -import { - completeRun, - createStreamingEvent, - type RunEvent, - retry, - stopRunByError, - type TextPart, - type ThinkingPart, -} from "@perstack/core" -import { calculateContextWindowUsage } from "../../helpers/model.js" -import { - extractThinkingParts, - extractThinkingText, - type ReasoningPart, -} from "../../helpers/thinking.js" -import { createEmptyUsage, sumUsage, usageFromGenerateTextResult } from "../../helpers/usage.js" -import type { StreamCallbacks } from "../../llm/types.js" -import { - createExpertMessage, - createToolMessage, - createUserMessage, - messageToCoreMessage, -} from "../../messages/message.js" -import type { RunSnapshot } from "../machine.js" - -/** - * GeneratingRunResult state: Generate final result after attemptCompletion. - * - * Responsibilities: - * - Call LLM to generate final text response - * - Stream result with reasoning/result callbacks - * - Build tool message from attemptCompletion result - * - * Transitions: - * - completeRun → Stopped (successful completion) - * - retry → FinishingStep (retryable error) - * - stopRunByError → Stopped (non-retryable error) - */ -export async function generatingRunResultLogic({ - setting, - checkpoint, - step, - eventListener, - llmExecutor, -}: RunSnapshot["context"]): Promise { - if (!step.toolCalls || !step.toolResults || step.toolResults.length === 0) { - throw new Error("No tool calls or tool results found") - } - const toolResultParts = step.toolResults.map((toolResult) => { - const toolCall = step.toolCalls?.find((tc) => tc.id === toolResult.id) - return { - type: "toolResultPart" as const, - toolCallId: toolResult.id, - toolName: toolCall?.toolName ?? toolResult.toolName, - contents: toolResult.result.filter( - (part) => - part.type === "textPart" || - part.type === "imageInlinePart" || - part.type === "fileInlinePart", - ), - } - }) - const toolMessage = createToolMessage(toolResultParts) - const { messages } = checkpoint - const coreMessages = [...messages, toolMessage].map(messageToCoreMessage) - - // Track if reasoning/result was completed via callback (to avoid duplicate emissions) - let reasoningCompletedViaCallback = false - let _resultCompletedViaCallback = false - - // Create streaming callbacks for fire-and-forget event emission - const callbacks: StreamCallbacks = { - onReasoningStart: () => { - eventListener(createStreamingEvent("startStreamingReasoning", setting, checkpoint, {})) - }, - onReasoningDelta: (delta) => { - eventListener(createStreamingEvent("streamReasoning", setting, checkpoint, { delta })) - }, - onReasoningComplete: (text) => { - // Emit completeStreamingReasoning before result phase starts - eventListener( - createStreamingEvent("completeStreamingReasoning", setting, checkpoint, { text }), - ) - reasoningCompletedViaCallback = true - }, - onResultStart: () => { - eventListener(createStreamingEvent("startStreamingRunResult", setting, checkpoint, {})) - }, - onResultDelta: (delta) => { - eventListener(createStreamingEvent("streamRunResult", setting, checkpoint, { delta })) - }, - onResultComplete: (text) => { - eventListener( - createStreamingEvent("completeStreamingRunResult", setting, checkpoint, { text }), - ) - _resultCompletedViaCallback = true - }, - } - - const executionResult = await llmExecutor.streamText( - { - messages: coreMessages, - maxRetries: setting.maxRetries, - tools: {}, // No tools for run result generation - abortSignal: AbortSignal.timeout(setting.timeout), - reasoningBudget: setting.reasoningBudget, - }, - callbacks, - ) - - if (!executionResult.success) { - const { error, isRetryable } = executionResult - const currentRetryCount = checkpoint.retryCount ?? 0 - if (!isRetryable || currentRetryCount >= setting.maxRetries) { - return stopRunByError(setting, checkpoint, { - checkpoint: { - ...checkpoint, - status: "stoppedByError", - }, - step: { - ...step, - finishedAt: Date.now(), - }, - error: { - name: error.name ?? "Error", - message: - currentRetryCount >= setting.maxRetries - ? `Max retries (${setting.maxRetries}) exceeded: ${error.message}` - : error.message, - statusCode: error.statusCode, - isRetryable: false, - }, - }) - } - const reason = JSON.stringify({ error: error.name ?? "Error", message: error.message }) - return retry(setting, checkpoint, { - reason, - newMessages: [toolMessage, createUserMessage([{ type: "textPart", text: reason }])], - usage: createEmptyUsage(), - }) - } - - const generationResult = executionResult.result - const usage = usageFromGenerateTextResult(generationResult) - const { text, reasoning } = generationResult - - // Extract thinking from reasoning (Anthropic, Google) - const thinkingParts = extractThinkingParts(reasoning as ReasoningPart[] | undefined) - const thinkingText = extractThinkingText(reasoning as ReasoningPart[] | undefined) - - // Fallback when LLM generates no visible text (e.g., only reasoning/thinking with extended thinking). - // Since GeneratingRunResult is only reached after a successful attemptCompletion (no remaining todos), - // the task is already complete — use "OK" as a minimal non-empty completion text. - const resultText = text || "OK" - - // Build ExpertMessage with ThinkingPart + TextPart - const expertContents: Array | Omit> = [ - ...thinkingParts, - { type: "textPart" as const, text: resultText }, - ] - const newMessages = [toolMessage, createExpertMessage(expertContents)] - - // Note: completeStreamingReasoning is emitted via onReasoningComplete callback during streaming - // Fallback emission only if callback wasn't triggered (should be rare) - if (thinkingText && !reasoningCompletedViaCallback) { - await eventListener( - createStreamingEvent("completeStreamingReasoning", setting, checkpoint, { - text: thinkingText, - }), - ) - } - - const newUsage = sumUsage(checkpoint.usage, usage) - return completeRun(setting, checkpoint, { - checkpoint: { - ...checkpoint, - messages: [...messages, ...newMessages], - usage: newUsage, - contextWindowUsage: checkpoint.contextWindow - ? calculateContextWindowUsage(usage, checkpoint.contextWindow) - : undefined, - status: "completed", - // Clear tool handling state on completion - pendingToolCalls: undefined, - partialToolResults: undefined, - }, - step: { - ...step, - newMessages: [...step.newMessages, ...newMessages], - finishedAt: Date.now(), - usage: sumUsage(step.usage, usage), - }, - text: resultText, - usage, - }) -}