diff --git a/.changeset/fix-empty-run-result-fallback.md b/.changeset/fix-empty-run-result-fallback.md new file mode 100644 index 00000000..3c87be83 --- /dev/null +++ b/.changeset/fix-empty-run-result-fallback.md @@ -0,0 +1,5 @@ +--- +"@perstack/runtime": patch +--- + +Use fallback text instead of retry when LLM generates empty text in GeneratingRunResult diff --git a/packages/runtime/src/state-machine/states/generating-run-result.test.ts b/packages/runtime/src/state-machine/states/generating-run-result.test.ts index 508a0212..79d43ef0 100644 --- a/packages/runtime/src/state-machine/states/generating-run-result.test.ts +++ b/packages/runtime/src/state-machine/states/generating-run-result.test.ts @@ -302,7 +302,7 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { } }) - it("retries when LLM generates empty text", async () => { + it("falls back to 'OK' when LLM generates empty text", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() const step = createStep({ @@ -332,46 +332,18 @@ describe("@perstack/runtime: StateMachineLogic['GeneratingRunResult']", () => { skillManager: createMockSkillManagerFromAdapters({}), llmExecutor: mockLLMExecutor as unknown as LLMExecutor, }) - expect(event.type).toBe("retry") - if (event.type === "retry") { - expect(event.reason).toContain("No text generated") - } - }) - - it("returns stopRunByError when empty text retries exceed maxRetries", async () => { - const setting = createRunSetting({ maxRetries: 2 }) - const checkpoint = createCheckpoint({ retryCount: 2 }) - const step = createStep({ - toolCalls: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - args: {}, - }, - ], - toolResults: [ - { - id: "tc_123", - skillName: "@perstack/base", - toolName: "attemptCompletion", - result: [{ type: "textPart", text: JSON.stringify({}), id: createId() }], - }, - ], - }) - mockLLMExecutor.setMockResult(createMockResult(undefined)) - const event = await StateMachineLogics.GeneratingRunResult({ - setting, - checkpoint, - step, - eventListener: async () => {}, - skillManager: createMockSkillManagerFromAdapters({}), - llmExecutor: mockLLMExecutor as unknown as LLMExecutor, - }) - expect(event.type).toBe("stopRunByError") - if (event.type === "stopRunByError") { - expect(event.error.name).toBe("EmptyRunResult") - expect(event.error.message).toContain("No text generated for run result") + expect(event.type).toBe("completeRun") + if (event.type === "completeRun") { + expect(event.text).toBe("OK") + const lastMessage = event.checkpoint.messages[event.checkpoint.messages.length - 1] + expect(lastMessage.type).toBe("expertMessage") + if (lastMessage.type === "expertMessage") { + const textPart = lastMessage.contents.find((c) => c.type === "textPart") + expect(textPart).toBeDefined() + if (textPart?.type === "textPart") { + expect(textPart.text).toBe("OK") + } + } } }) }) diff --git a/packages/runtime/src/state-machine/states/generating-run-result.ts b/packages/runtime/src/state-machine/states/generating-run-result.ts index a1a37a7c..e390f9dc 100644 --- a/packages/runtime/src/state-machine/states/generating-run-result.ts +++ b/packages/runtime/src/state-machine/states/generating-run-result.ts @@ -144,46 +144,19 @@ export async function generatingRunResultLogic({ const usage = usageFromGenerateTextResult(generationResult) const { text, reasoning } = generationResult - // Empty text = retry (LLM generated no visible text, possibly only reasoning/thinking) - if (!text) { - const currentRetryCount = checkpoint.retryCount ?? 0 - if (currentRetryCount >= setting.maxRetries) { - return stopRunByError(setting, checkpoint, { - checkpoint: { - ...checkpoint, - status: "stoppedByError", - }, - step: { - ...step, - finishedAt: Date.now(), - }, - error: { - name: "EmptyRunResult", - message: `Max retries (${setting.maxRetries}) exceeded: No text generated for run result`, - isRetryable: false, - }, - }) - } - const reason = JSON.stringify({ - error: "Error: No text generated", - message: "You must provide a text response summarizing the result. Try again.", - }) - return retry(setting, checkpoint, { - reason, - newMessages: [toolMessage, createUserMessage([{ type: "textPart", text: reason }])], - usage, - }) - } - // Extract thinking from reasoning (Anthropic, Google) const thinkingParts = extractThinkingParts(reasoning as ReasoningPart[] | undefined) const thinkingText = extractThinkingText(reasoning as ReasoningPart[] | undefined) + // Fallback when LLM generates no visible text (e.g., only reasoning/thinking with extended thinking). + // Since GeneratingRunResult is only reached after a successful attemptCompletion (no remaining todos), + // the task is already complete — use "OK" as a minimal non-empty completion text. + const resultText = text || "OK" + // Build ExpertMessage with ThinkingPart + TextPart - // Always include textPart even if empty - required for delegation result handling const expertContents: Array | Omit> = [ ...thinkingParts, - { type: "textPart" as const, text: text ?? "" }, + { type: "textPart" as const, text: resultText }, ] const newMessages = [toolMessage, createExpertMessage(expertContents)] @@ -217,7 +190,7 @@ export async function generatingRunResultLogic({ finishedAt: Date.now(), usage: sumUsage(step.usage, usage), }, - text, + text: resultText, usage, }) }