diff --git a/.changeset/filter-reasoning-parts.md b/.changeset/filter-reasoning-parts.md new file mode 100644 index 00000000..617d4f79 --- /dev/null +++ b/.changeset/filter-reasoning-parts.md @@ -0,0 +1,10 @@ +--- +"@perstack/runtime": patch +--- + +fix(runtime): filter reasoning parts for non-Anthropic providers + +Added filtering to remove reasoning parts from message history when sending +to OpenAI, Azure OpenAI, DeepSeek, or Ollama providers. This prevents the +"Non-OpenAI reasoning parts are not supported" warning from AI SDK when +resuming runs that were previously executed with Anthropic's extended thinking. diff --git a/.changeset/fix-duplicate-historical-events.md b/.changeset/fix-duplicate-historical-events.md new file mode 100644 index 00000000..a06ddd7e --- /dev/null +++ b/.changeset/fix-duplicate-historical-events.md @@ -0,0 +1,10 @@ +--- +"perstack": patch +--- + +fix(tui): prevent duplicate display of historical events on continue + +Changed the execution loop to only pass historical events on the first iteration. +Subsequent iterations no longer receive historical events since the previous TUI +output remains visible on screen, preventing duplicate display of query/complete +activities. diff --git a/apps/perstack/src/start.ts b/apps/perstack/src/start.ts index 9060b44c..b490e1e4 100644 --- a/apps/perstack/src/start.ts +++ b/apps/perstack/src/start.ts @@ -13,7 +13,7 @@ import { getAllJobs, getCheckpointById, getCheckpointsWithDetails, - getEventContents, + type getEventContents, getRecentExperts, } from "./lib/run-manager.js" import { dispatchToRuntime } from "./lib/runtime-dispatcher.js" @@ -160,16 +160,19 @@ export const startCommand = new Command() // Track if the next query should be treated as an interactive tool result let isNextQueryInteractiveToolResult = input.options.interactiveToolCallResult ?? false - // Track accumulated events across continues + // Track whether this is the first iteration (for historical events display) // On first iteration, load all events for the job up to the checkpoint - // On subsequent iterations, append new events from the previous run - let accumulatedEvents: ReturnType | undefined = currentCheckpoint - ? getAllEventContentsForJob(currentCheckpoint.jobId, currentCheckpoint.stepNumber) - : undefined + // On subsequent iterations, skip historical events (previous TUI already displayed them) + let isFirstIteration = true + const initialHistoricalEvents: ReturnType | undefined = + currentCheckpoint + ? getAllEventContentsForJob(currentCheckpoint.jobId, currentCheckpoint.stepNumber) + : undefined while (currentQuery !== null) { - // Use accumulated events for historical display - const historicalEvents = accumulatedEvents + // Only pass historical events on first iteration + // Subsequent iterations: previous TUI output remains on screen + const historicalEvents = isFirstIteration ? initialHistoricalEvents : undefined // Start execution TUI const { result: executionResult, eventListener } = renderExecution({ @@ -236,13 +239,8 @@ export const startCommand = new Command() // If the run stopped for interactive tool, the next query is an interactive tool result isNextQueryInteractiveToolResult = runResult.status === "stoppedByInteractiveTool" - // Accumulate events from the completed run for the next iteration - const newRunEvents = getEventContents(runResult.jobId, runResult.runId) - if (accumulatedEvents) { - accumulatedEvents = [...accumulatedEvents, ...newRunEvents] - } else { - accumulatedEvents = newRunEvents - } + // Mark first iteration as complete (subsequent TUIs won't show historical events) + isFirstIteration = false } else { currentQuery = null } diff --git a/apps/runtime/src/llm/executor.test.ts b/apps/runtime/src/llm/executor.test.ts index 8c7f5677..347e9d4d 100644 --- a/apps/runtime/src/llm/executor.test.ts +++ b/apps/runtime/src/llm/executor.test.ts @@ -1,3 +1,4 @@ +import type { ProviderName } from "@perstack/core" import type { ProviderAdapter } from "@perstack/provider-core" import * as aiModule from "ai" import { beforeEach, describe, expect, it, vi } from "vitest" @@ -14,8 +15,10 @@ vi.mock("ai", async (importOriginal) => { }) describe("LLMExecutor", () => { - const createMockAdapter = (overrides: Partial = {}): ProviderAdapter => ({ - providerName: "anthropic", + const createMockAdapter = ( + overrides: Partial = {}, + ): ProviderAdapter => ({ + providerName: "anthropic" as ProviderName, createModel: vi.fn(), getProviderTools: vi.fn().mockReturnValue({}), getProviderOptions: vi.fn().mockReturnValue(undefined), @@ -531,4 +534,128 @@ describe("LLMExecutor", () => { expect(adapter.getReasoningOptions).toHaveBeenCalledWith("medium") }) }) + + describe("reasoning part filtering", () => { + it("filters reasoning parts for OpenAI provider", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "openai" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + const messagesWithReasoning = [ + { role: "user" as const, content: "Hello" }, + { + role: "assistant" as const, + content: [ + { type: "reasoning" as const, text: "Let me think..." }, + { type: "text" as const, text: "Response" }, + ], + }, + ] + + await executor.generateText({ + messages: messagesWithReasoning, + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages).toBeDefined() + if (callArgs.messages) { + const assistantMessage = callArgs.messages[1] + expect(assistantMessage.content).toEqual([{ type: "text", text: "Response" }]) + } + }) + + it("preserves reasoning parts for Anthropic provider", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "anthropic" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + const messagesWithReasoning = [ + { role: "user" as const, content: "Hello" }, + { + role: "assistant" as const, + content: [ + { type: "reasoning" as const, text: "Let me think..." }, + { type: "text" as const, text: "Response" }, + ], + }, + ] + + await executor.generateText({ + messages: messagesWithReasoning, + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages).toBeDefined() + if (callArgs.messages) { + const assistantMessage = callArgs.messages[1] + expect(assistantMessage.content).toEqual([ + { type: "reasoning", text: "Let me think..." }, + { type: "text", text: "Response" }, + ]) + } + }) + + it("filters reasoning parts for DeepSeek provider", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "deepseek" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + const messagesWithReasoning = [ + { role: "user" as const, content: "Hello" }, + { + role: "assistant" as const, + content: [{ type: "reasoning" as const, text: "Thinking..." }], + }, + ] + + await executor.generateText({ + messages: messagesWithReasoning, + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages).toBeDefined() + if (callArgs.messages) { + const assistantMessage = callArgs.messages[1] + expect(assistantMessage.content).toEqual([]) + } + }) + + it("preserves non-array assistant content", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "openai" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + const messagesWithStringContent = [ + { role: "user" as const, content: "Hello" }, + { role: "assistant" as const, content: "Simple string response" }, + ] + + await executor.generateText({ + messages: messagesWithStringContent, + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages).toBeDefined() + if (callArgs.messages) { + const assistantMessage = callArgs.messages[1] + expect(assistantMessage.content).toBe("Simple string response") + } + }) + }) }) diff --git a/apps/runtime/src/llm/executor.ts b/apps/runtime/src/llm/executor.ts index 212ce4aa..9ff2d3e4 100644 --- a/apps/runtime/src/llm/executor.ts +++ b/apps/runtime/src/llm/executor.ts @@ -1,6 +1,12 @@ -import type { ReasoningBudget } from "@perstack/core" +import type { ProviderName, ReasoningBudget } from "@perstack/core" import type { ProviderAdapter, ProviderOptions } from "@perstack/provider-core" -import { generateText, type LanguageModel, streamText } from "ai" +import { + type AssistantModelMessage, + generateText, + type LanguageModel, + type ModelMessage, + streamText, +} from "ai" import type { GenerateTextParams, LLMExecutionResult, @@ -12,6 +18,43 @@ import type { const shouldEnableReasoning = (budget: ReasoningBudget | undefined): boolean => budget !== undefined && budget !== "none" && budget !== 0 +/** Providers that don't support Anthropic-style reasoning parts in message history */ +const PROVIDERS_WITHOUT_REASONING_HISTORY: ProviderName[] = [ + "openai", + "azure-openai", + "deepseek", + "ollama", +] + +/** + * Filter out reasoning parts from messages for providers that don't support them. + * This prevents "Non-OpenAI reasoning parts are not supported" warnings from AI SDK. + */ +function filterReasoningPartsForProvider( + messages: ModelMessage[], + providerName: ProviderName, +): ModelMessage[] { + if (!PROVIDERS_WITHOUT_REASONING_HISTORY.includes(providerName)) { + return messages + } + return messages.map((message) => { + if (message.role !== "assistant") { + return message + } + const assistantMessage = message as AssistantModelMessage + if (!Array.isArray(assistantMessage.content)) { + return message + } + const filteredContent = assistantMessage.content.filter( + (part) => !("type" in part && part.type === "reasoning"), + ) + if (filteredContent.length === assistantMessage.content.length) { + return message + } + return { ...assistantMessage, content: filteredContent } + }) +} + export class LLMExecutor { constructor( private readonly adapter: ProviderAdapter, @@ -30,11 +73,12 @@ export class LLMExecutor { ? this.adapter.getReasoningOptions(params.reasoningBudget) : undefined const providerOptions = this.mergeProviderOptions(baseProviderOptions, reasoningOptions) + const messages = filterReasoningPartsForProvider(params.messages, this.adapter.providerName) try { const result = await generateText({ model: this.model, - messages: params.messages, + messages, maxRetries: params.maxRetries, tools: { ...params.tools, ...providerTools }, toolChoice: params.toolChoice, @@ -79,11 +123,12 @@ export class LLMExecutor { ? this.adapter.getReasoningOptions(params.reasoningBudget) : undefined const providerOptions = this.mergeProviderOptions(baseProviderOptions, reasoningOptions) + const messages = filterReasoningPartsForProvider(params.messages, this.adapter.providerName) try { const result = await generateText({ model: this.model, - messages: params.messages, + messages, maxRetries: params.maxRetries, abortSignal: params.abortSignal, providerOptions, @@ -114,10 +159,11 @@ export class LLMExecutor { ? this.adapter.getReasoningOptions(params.reasoningBudget) : undefined const providerOptions = this.mergeProviderOptions(baseProviderOptions, reasoningOptions) + const messages = filterReasoningPartsForProvider(params.messages, this.adapter.providerName) const streamResult = streamText({ model: this.model, - messages: params.messages, + messages, maxRetries: params.maxRetries, tools: { ...params.tools, ...providerTools }, toolChoice: params.toolChoice,