diff --git a/.changeset/merge-system-messages-openai.md b/.changeset/merge-system-messages-openai.md new file mode 100644 index 00000000..13befc62 --- /dev/null +++ b/.changeset/merge-system-messages-openai.md @@ -0,0 +1,5 @@ +--- +"@perstack/runtime": patch +--- + +Merge consecutive system messages for OpenAI/Azure OpenAI providers to prevent empty responses from reasoning models diff --git a/packages/runtime/src/llm/executor.test.ts b/packages/runtime/src/llm/executor.test.ts index 762bac2c..ee8e260e 100644 --- a/packages/runtime/src/llm/executor.test.ts +++ b/packages/runtime/src/llm/executor.test.ts @@ -660,4 +660,138 @@ describe("LLMExecutor", () => { } }) }) + + describe("system message merging", () => { + it("merges consecutive system messages for OpenAI provider", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "openai" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + await executor.generateText({ + messages: [ + { role: "system" as const, content: "System part 1" }, + { role: "system" as const, content: "System part 2" }, + { role: "system" as const, content: "System part 3" }, + { role: "user" as const, content: "Hello" }, + ], + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages).toEqual([ + { + role: "system", + content: "System part 1\n\nSystem part 2\n\nSystem part 3", + providerOptions: undefined, + }, + { role: "user", content: "Hello" }, + ]) + }) + + it("preserves providerOptions from last system message in group", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "openai" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + await executor.generateText({ + messages: [ + { role: "system" as const, content: "Part 1" }, + { + role: "system" as const, + content: "Part 2", + providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, + }, + { role: "user" as const, content: "Hello" }, + ], + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages[0]).toEqual({ + role: "system", + content: "Part 1\n\nPart 2", + providerOptions: { anthropic: { cacheControl: { type: "ephemeral" } } }, + }) + }) + + it("does not merge system messages for Anthropic provider", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "anthropic" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + await executor.generateText({ + messages: [ + { role: "system" as const, content: "System part 1" }, + { role: "system" as const, content: "System part 2" }, + { role: "user" as const, content: "Hello" }, + ], + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages).toEqual([ + { role: "system", content: "System part 1" }, + { role: "system", content: "System part 2" }, + { role: "user", content: "Hello" }, + ]) + }) + + it("passes through single system message unchanged for OpenAI", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "openai" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + const opts = { anthropic: { cacheControl: { type: "ephemeral" } } } + await executor.generateText({ + messages: [ + { role: "system" as const, content: "Only system", providerOptions: opts }, + { role: "user" as const, content: "Hello" }, + ], + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages[0]).toEqual({ + role: "system", + content: "Only system", + providerOptions: opts, + }) + }) + + it("merges system messages for Azure OpenAI provider", async () => { + mockGenerateText.mockResolvedValue({ finishReason: "stop" } as never) + + const adapter = createMockAdapter({ providerName: "azure-openai" as ProviderName }) + const model = createMockModel() + const executor = new LLMExecutor(adapter, model) + + await executor.generateText({ + messages: [ + { role: "system" as const, content: "Part A" }, + { role: "system" as const, content: "Part B" }, + { role: "user" as const, content: "Hello" }, + ], + maxRetries: 3, + tools: {}, + }) + + const callArgs = mockGenerateText.mock.calls[0][0] + expect(callArgs.messages).toEqual([ + { role: "system", content: "Part A\n\nPart B", providerOptions: undefined }, + { role: "user", content: "Hello" }, + ]) + }) + }) }) diff --git a/packages/runtime/src/llm/executor.ts b/packages/runtime/src/llm/executor.ts index 9ff2d3e4..3fb6795f 100644 --- a/packages/runtime/src/llm/executor.ts +++ b/packages/runtime/src/llm/executor.ts @@ -26,6 +26,9 @@ const PROVIDERS_WITHOUT_REASONING_HISTORY: ProviderName[] = [ "ollama", ] +/** Providers that don't handle multiple consecutive system messages well */ +const PROVIDERS_REQUIRING_MERGED_SYSTEM_MESSAGES: ProviderName[] = ["openai", "azure-openai"] + /** * Filter out reasoning parts from messages for providers that don't support them. * This prevents "Non-OpenAI reasoning parts are not supported" warnings from AI SDK. @@ -55,6 +58,56 @@ function filterReasoningPartsForProvider( }) } +/** + * Merge consecutive system messages into a single message for providers that + * don't support multiple system/developer messages (e.g., OpenAI reasoning models). + * Preserves providerOptions from the last message in each group. + */ +function mergeConsecutiveSystemMessages( + messages: ModelMessage[], + providerName: ProviderName, +): ModelMessage[] { + if (!PROVIDERS_REQUIRING_MERGED_SYSTEM_MESSAGES.includes(providerName)) { + return messages + } + const result: ModelMessage[] = [] + let systemGroup: { content: string; providerOptions?: ModelMessage["providerOptions"] }[] = [] + + const flushSystemGroup = () => { + if (systemGroup.length === 0) return + if (systemGroup.length === 1) { + result.push({ + role: "system" as const, + content: systemGroup[0].content, + providerOptions: systemGroup[0].providerOptions, + }) + } else { + const lastOptions = systemGroup[systemGroup.length - 1].providerOptions + result.push({ + role: "system" as const, + content: systemGroup.map((m) => m.content).join("\n\n"), + providerOptions: lastOptions, + }) + } + systemGroup = [] + } + + for (const message of messages) { + if (message.role === "system") { + systemGroup.push({ + content: message.content as string, + providerOptions: message.providerOptions, + }) + } else { + flushSystemGroup() + result.push(message) + } + } + flushSystemGroup() + + return result +} + export class LLMExecutor { constructor( private readonly adapter: ProviderAdapter, @@ -73,7 +126,10 @@ export class LLMExecutor { ? this.adapter.getReasoningOptions(params.reasoningBudget) : undefined const providerOptions = this.mergeProviderOptions(baseProviderOptions, reasoningOptions) - const messages = filterReasoningPartsForProvider(params.messages, this.adapter.providerName) + const messages = mergeConsecutiveSystemMessages( + filterReasoningPartsForProvider(params.messages, this.adapter.providerName), + this.adapter.providerName, + ) try { const result = await generateText({ @@ -123,7 +179,10 @@ export class LLMExecutor { ? this.adapter.getReasoningOptions(params.reasoningBudget) : undefined const providerOptions = this.mergeProviderOptions(baseProviderOptions, reasoningOptions) - const messages = filterReasoningPartsForProvider(params.messages, this.adapter.providerName) + const messages = mergeConsecutiveSystemMessages( + filterReasoningPartsForProvider(params.messages, this.adapter.providerName), + this.adapter.providerName, + ) try { const result = await generateText({ @@ -159,7 +218,10 @@ export class LLMExecutor { ? this.adapter.getReasoningOptions(params.reasoningBudget) : undefined const providerOptions = this.mergeProviderOptions(baseProviderOptions, reasoningOptions) - const messages = filterReasoningPartsForProvider(params.messages, this.adapter.providerName) + const messages = mergeConsecutiveSystemMessages( + filterReasoningPartsForProvider(params.messages, this.adapter.providerName), + this.adapter.providerName, + ) const streamResult = streamText({ model: this.model,