From a56a3bed93d4cce0330441cb4d2436639b971f3f Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Tue, 31 Mar 2026 04:10:00 +0100 Subject: [PATCH 1/6] Increase default MaxTokens from 1024 to 2048 The previous 1024 limit was too low, causing LLM responses to be truncated mid-JSON when structured output was requested. Fixes #616. --- backend/src/Taskdeck.Application/Services/ILlmProvider.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/Taskdeck.Application/Services/ILlmProvider.cs b/backend/src/Taskdeck.Application/Services/ILlmProvider.cs index 946a91309..3c8e14ca0 100644 --- a/backend/src/Taskdeck.Application/Services/ILlmProvider.cs +++ b/backend/src/Taskdeck.Application/Services/ILlmProvider.cs @@ -10,7 +10,7 @@ public interface ILlmProvider public record ChatCompletionRequest( List Messages, - int MaxTokens = 1024, + int MaxTokens = 2048, double Temperature = 0.7, LlmRequestAttribution? Attribution = null, string? SystemPrompt = null, From ca2fabcbd3899a0cfd0ef24513f0d527aeb06d50 Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Tue, 31 Mar 2026 04:11:12 +0100 Subject: [PATCH 2/6] Add finish_reason and JSON truncation detection to OpenAI provider Extract finish_reason from the OpenAI response. When it is "length", mark the result as degraded with reason "Response was truncated". Also detect invalid JSON when JSON mode was requested, which indicates the response was cut off before the model could produce valid output. --- .../Services/OpenAiLlmProvider.cs | 58 ++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/backend/src/Taskdeck.Application/Services/OpenAiLlmProvider.cs b/backend/src/Taskdeck.Application/Services/OpenAiLlmProvider.cs index e77f590a4..035239379 100644 --- a/backend/src/Taskdeck.Application/Services/OpenAiLlmProvider.cs +++ b/backend/src/Taskdeck.Application/Services/OpenAiLlmProvider.cs @@ -52,12 +52,43 @@ public async Task CompleteAsync(ChatCompletionRequest reque return BuildFallbackResult(lastUserMessage, "Live provider request failed.", GetConfiguredModelOrDefault()); } - if (!TryParseResponse(body, out var content, out var tokensUsed)) + if (!TryParseResponse(body, out var content, out var tokensUsed, out var finishReason)) { _logger.LogWarning("OpenAI completion response could not be parsed."); return BuildFallbackResult(lastUserMessage, "Live provider response parsing failed.", GetConfiguredModelOrDefault()); } + // Detect truncation: OpenAI returns finish_reason "length" when the + // response was cut off by the max_tokens limit. + if (string.Equals(finishReason, "length", StringComparison.OrdinalIgnoreCase)) + { + _logger.LogWarning("OpenAI response was truncated (finish_reason=length)."); + return new LlmCompletionResult( + content, + tokensUsed, + IsActionable: false, + Provider: "OpenAI", + Model: GetConfiguredModelOrDefault(), + IsDegraded: true, + DegradedReason: "Response was truncated"); + } + + // When JSON mode was requested but the response is not valid JSON, + // the output was likely truncated before the model could finish. + var useInstructionExtraction = request.SystemPrompt is null; + if (useInstructionExtraction && !IsValidJson(content)) + { + _logger.LogWarning("OpenAI JSON-mode response is not valid JSON; treating as truncated."); + return new LlmCompletionResult( + content, + tokensUsed, + IsActionable: false, + Provider: "OpenAI", + Model: GetConfiguredModelOrDefault(), + IsDegraded: true, + DegradedReason: "Response was truncated"); + } + // Try to parse structured instruction extraction from the LLM response if (LlmInstructionExtractionPrompt.TryParseStructuredResponse( content, @@ -227,10 +258,11 @@ private object BuildRequestPayload(ChatCompletionRequest request) return payload; } - private static bool TryParseResponse(string responseBody, out string content, out int tokensUsed) + private static bool TryParseResponse(string responseBody, out string content, out int tokensUsed, out string? finishReason) { content = string.Empty; tokensUsed = 0; + finishReason = null; if (string.IsNullOrWhiteSpace(responseBody)) { @@ -259,6 +291,12 @@ private static bool TryParseResponse(string responseBody, out string content, ou return false; } + if (first.TryGetProperty("finish_reason", out var finishReasonElement) && + finishReasonElement.ValueKind == JsonValueKind.String) + { + finishReason = finishReasonElement.GetString(); + } + if (root.TryGetProperty("usage", out var usage) && usage.TryGetProperty("total_tokens", out var totalTokens) && totalTokens.TryGetInt32(out var parsedTokens)) @@ -308,6 +346,22 @@ private static LlmCompletionResult BuildFallbackResult(string userMessage, strin Instructions: instructions); } + private static bool IsValidJson(string text) + { + if (string.IsNullOrWhiteSpace(text)) + return false; + + try + { + using var doc = JsonDocument.Parse(text); + return true; + } + catch (JsonException) + { + return false; + } + } + private static int EstimateTokens(string text) { if (string.IsNullOrWhiteSpace(text)) From 1eb85488eefc09773a2b20b1c61634c50dcce105 Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Tue, 31 Mar 2026 04:12:13 +0100 Subject: [PATCH 3/6] Add finish_reason and JSON truncation detection to Gemini provider Extract finishReason from the Gemini response candidates. When it is "MAX_TOKENS", mark the result as degraded with reason "Response was truncated". Also detect invalid JSON when JSON mode was requested. --- .../Services/GeminiLlmProvider.cs | 58 ++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/backend/src/Taskdeck.Application/Services/GeminiLlmProvider.cs b/backend/src/Taskdeck.Application/Services/GeminiLlmProvider.cs index 534aeb381..911ba355d 100644 --- a/backend/src/Taskdeck.Application/Services/GeminiLlmProvider.cs +++ b/backend/src/Taskdeck.Application/Services/GeminiLlmProvider.cs @@ -81,12 +81,42 @@ public async Task CompleteAsync(ChatCompletionRequest reque return BuildFallbackResult(lastUserMessage, "Live provider request failed.", GetConfiguredModelOrDefault()); } - if (!TryParseResponse(body, out var content, out var tokensUsed)) + if (!TryParseResponse(body, out var content, out var tokensUsed, out var finishReason)) { _logger.LogWarning("Gemini completion response could not be parsed."); return BuildFallbackResult(lastUserMessage, "Live provider response parsing failed.", GetConfiguredModelOrDefault()); } + // Detect truncation: Gemini returns finishReason "MAX_TOKENS" when the + // response was cut off by the maxOutputTokens limit. + if (string.Equals(finishReason, "MAX_TOKENS", StringComparison.OrdinalIgnoreCase)) + { + _logger.LogWarning("Gemini response was truncated (finishReason=MAX_TOKENS)."); + return new LlmCompletionResult( + content, + tokensUsed, + IsActionable: false, + Provider: "Gemini", + Model: GetConfiguredModelOrDefault(), + IsDegraded: true, + DegradedReason: "Response was truncated"); + } + + // When JSON mode was requested but the response is not valid JSON, + // the output was likely truncated before the model could finish. + if (useInstructionExtraction && !IsValidJson(content)) + { + _logger.LogWarning("Gemini JSON-mode response is not valid JSON; treating as truncated."); + return new LlmCompletionResult( + content, + tokensUsed, + IsActionable: false, + Provider: "Gemini", + Model: GetConfiguredModelOrDefault(), + IsDegraded: true, + DegradedReason: "Response was truncated"); + } + // Try to parse structured instruction extraction from the LLM response if (LlmInstructionExtractionPrompt.TryParseStructuredResponse( content, @@ -225,10 +255,11 @@ private static object MapMessage(ChatCompletionMessage message) }; } - private static bool TryParseResponse(string responseBody, out string content, out int tokensUsed) + private static bool TryParseResponse(string responseBody, out string content, out int tokensUsed, out string? finishReason) { content = string.Empty; tokensUsed = 0; + finishReason = null; if (string.IsNullOrWhiteSpace(responseBody)) { @@ -248,6 +279,13 @@ private static bool TryParseResponse(string responseBody, out string content, ou } var firstCandidate = candidates[0]; + + if (firstCandidate.TryGetProperty("finishReason", out var finishReasonElement) && + finishReasonElement.ValueKind == JsonValueKind.String) + { + finishReason = finishReasonElement.GetString(); + } + if (!firstCandidate.TryGetProperty("content", out var candidateContent) || !candidateContent.TryGetProperty("parts", out var parts) || parts.ValueKind != JsonValueKind.Array || @@ -324,6 +362,22 @@ private static LlmCompletionResult BuildFallbackResult(string userMessage, strin Instructions: instructions); } + private static bool IsValidJson(string text) + { + if (string.IsNullOrWhiteSpace(text)) + return false; + + try + { + using var doc = JsonDocument.Parse(text); + return true; + } + catch (JsonException) + { + return false; + } + } + private static int EstimateTokens(string text) { if (string.IsNullOrWhiteSpace(text)) From d6bc4110ae73057532664968809d2f9d8cd8d193 Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Tue, 31 Mar 2026 04:13:59 +0100 Subject: [PATCH 4/6] Detect truncated JSON in chat and show friendly notice When an assistant message starts with '{' but is not valid JSON, the frontend now shows "This response was cut short. Try a simpler question or rephrase." instead of rendering the raw broken JSON. --- .../src/views/AutomationChatView.vue | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/frontend/taskdeck-web/src/views/AutomationChatView.vue b/frontend/taskdeck-web/src/views/AutomationChatView.vue index 33cc09f54..fd1a14176 100644 --- a/frontend/taskdeck-web/src/views/AutomationChatView.vue +++ b/frontend/taskdeck-web/src/views/AutomationChatView.vue @@ -28,6 +28,20 @@ function renderMarkdown(content: string): string { return DOMPurify.sanitize(marked.parse(content, { async: false })) } +function isTruncatedJson(content: string): boolean { + if (!content) return false + const trimmed = content.trim() + if (!trimmed.startsWith('{')) return false + try { + JSON.parse(trimmed) + return false + } catch { + return true + } +} + +const truncationNotice = 'This response was cut short. Try a simpler question or rephrase.' + const router = useRouter() const route = useRoute() const toast = useToastStore() @@ -661,7 +675,13 @@ watch(