From ec11452d73453d11314f91dc6c01b0cdd831a2d3 Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Sun, 29 Mar 2026 22:44:53 +0100 Subject: [PATCH 1/4] Add edge case tests for LlmIntentClassifier null, long string, and special character inputs --- .../Services/LlmIntentClassifierTests.cs | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs index d4011bce3..0fd7a77b2 100644 --- a/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs +++ b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs @@ -142,6 +142,85 @@ public void Classify_NonActionable_ShouldReturnFalse(string message) #endregion + #region Edge Cases — Input Extremes + + [Fact] + public void Classify_NullInput_ThrowsNullReferenceException() + { + // The classifier calls message.ToLowerInvariant() without a null guard. + // This documents that null input is not handled gracefully. + var act = () => LlmIntentClassifier.Classify(null!); + + act.Should().Throw(); + } + + [Fact] + public void Classify_VeryLongString_ReturnsNotActionable() + { + var longMessage = new string('x', 50_000); + + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(longMessage); + + isActionable.Should().BeFalse(); + actionIntent.Should().BeNull(); + } + + [Fact] + public void Classify_VeryLongStringContainingPattern_StillMatches() + { + var longMessage = new string('x', 25_000) + " create card for testing " + new string('x', 25_000); + + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(longMessage); + + isActionable.Should().BeTrue(); + actionIntent.Should().Be("card.create"); + } + + [Theory] + [InlineData(" ")] + [InlineData("\t\t")] + [InlineData("\n\n\n")] + public void Classify_WhitespaceOnly_ReturnsNotActionable(string message) + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(message); + + isActionable.Should().BeFalse(); + actionIntent.Should().BeNull(); + } + + [Theory] + [InlineData("Hello! @#$%^&*() special chars")] + [InlineData("Unicode: \u00e9\u00e8\u00ea\u00eb\u00fc\u00f6\u00e4")] + [InlineData("")] + [InlineData("SELECT * FROM cards; DROP TABLE boards;")] + public void Classify_SpecialCharacters_WithoutPattern_ReturnsNotActionable(string message) + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(message); + + isActionable.Should().BeFalse(); + actionIntent.Should().BeNull(); + } + + [Fact] + public void Classify_PatternWithSpecialCharsSurrounding_StillMatches() + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify("!!! create card !!! @#$ testing"); + + isActionable.Should().BeTrue(); + actionIntent.Should().Be("card.create"); + } + + [Fact] + public void Classify_PatternWithNewlines_StillMatches() + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify("line 1\ncreate card for testing\nline 3"); + + isActionable.Should().BeTrue(); + actionIntent.Should().Be("card.create"); + } + + #endregion + #region Known Gaps — Natural Language Misses (Documents #570/#571) /// From ab59b7628eeb569d9acb15e76c6271e19dd6ec46 Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Sun, 29 Mar 2026 22:44:56 +0100 Subject: [PATCH 2/4] Add chat-to-proposal integration tests covering classifier-parser pipeline --- .../Services/ChatServiceTests.cs | 174 ++++++++++++++++++ 1 file changed, 174 insertions(+) diff --git a/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs index 82ba4046a..07481da07 100644 --- a/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs +++ b/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs @@ -760,6 +760,180 @@ public async Task GetProviderHealthAsync_ShouldUseProbeStatus_WhenRequested() _llmProviderMock.Verify(p => p.GetHealthAsync(default), Times.Never); } + #region Chat-to-Proposal Flow — Classifier → Parser Integration (#577) + + /// + /// Full flow: structured syntax hits the LLM classifier (IsActionable=true), + /// then the planner parses successfully, yielding a proposal reference. + /// + [Fact] + public async Task SendMessageAsync_StructuredSyntax_ClassifierHit_ParserSuccess_ProposalCreated() + { + var userId = Guid.NewGuid(); + var boardId = Guid.NewGuid(); + var proposalId = Guid.NewGuid(); + var session = new ChatSession(userId, "Full flow session", boardId); + + _chatSessionRepoMock + .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) + .ReturnsAsync(session); + _llmProviderMock + .Setup(p => p.CompleteAsync(It.IsAny(), default)) + .ReturnsAsync(new LlmCompletionResult( + "I'll create that card.", 15, true, "card.create")); + _plannerMock + .Setup(p => p.ParseInstructionAsync( + It.IsAny(), userId, boardId, + It.IsAny(), ProposalSourceType.Chat, + session.Id.ToString(), It.IsAny())) + .ReturnsAsync(Result.Success(new ProposalDto( + proposalId, ProposalSourceType.Chat, null, boardId, userId, + ProposalStatus.PendingReview, RiskLevel.Low, + "create card 'Deploy script'", null, null, + DateTimeOffset.UtcNow, DateTimeOffset.UtcNow, + DateTime.UtcNow.AddHours(1), null, null, null, null, + "corr", new List()))); + + var result = await _service.SendMessageAsync( + session.Id, + userId, + new SendChatMessageDto("create card 'Deploy script'"), + default); + + result.IsSuccess.Should().BeTrue(); + result.Value.MessageType.Should().Be("proposal-reference"); + result.Value.ProposalId.Should().Be(proposalId); + result.Value.Content.Should().Contain("Proposal created for review"); + _plannerMock.Verify( + p => p.ParseInstructionAsync( + "create card 'Deploy script'", userId, boardId, + It.IsAny(), ProposalSourceType.Chat, + session.Id.ToString(), It.IsAny()), + Times.Once); + } + + /// + /// Natural language misses classifier (IsActionable=false), no RequestProposal set, + /// so the planner is never called — current behavior documents the gap. + /// + [Fact] + public async Task SendMessageAsync_NaturalLanguage_ClassifierMiss_NoPlannerCall() + { + var userId = Guid.NewGuid(); + var boardId = Guid.NewGuid(); + var session = new ChatSession(userId, "Classifier miss session", boardId); + + _chatSessionRepoMock + .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) + .ReturnsAsync(session); + _llmProviderMock + .Setup(p => p.CompleteAsync(It.IsAny(), default)) + .ReturnsAsync(new LlmCompletionResult( + "Sure, I can help with that.", 10, false, null)); + + var result = await _service.SendMessageAsync( + session.Id, + userId, + new SendChatMessageDto("set up some tasks for the sprint"), + default); + + result.IsSuccess.Should().BeTrue(); + result.Value.MessageType.Should().Be("text"); + _plannerMock.Verify( + p => p.ParseInstructionAsync( + It.IsAny(), It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny()), + Times.Never, + "planner should not be called when classifier reports non-actionable and RequestProposal is false"); + } + + /// + /// Explicit RequestProposal with natural language — parser receives the raw + /// message and fails because it only understands structured syntax. + /// + [Fact] + public async Task SendMessageAsync_ExplicitRequestProposal_NaturalLanguage_ParserFailsGracefully() + { + var userId = Guid.NewGuid(); + var boardId = Guid.NewGuid(); + var session = new ChatSession(userId, "Explicit NLP fail session", boardId); + + _chatSessionRepoMock + .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) + .ReturnsAsync(session); + _llmProviderMock + .Setup(p => p.CompleteAsync(It.IsAny(), default)) + .ReturnsAsync(new LlmCompletionResult( + "I understand you want tasks.", 15, false, null)); + _plannerMock + .Setup(p => p.ParseInstructionAsync( + It.IsAny(), userId, boardId, + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny())) + .ReturnsAsync(Result.Failure( + ErrorCodes.ValidationError, + "Could not parse instruction. Supported patterns: 'create card \"title\"'...")); + + var result = await _service.SendMessageAsync( + session.Id, + userId, + new SendChatMessageDto( + "please create some tasks for the deployment checklist", + RequestProposal: true), + default); + + result.IsSuccess.Should().BeTrue(); + result.Value.MessageType.Should().Be("status"); + result.Value.Content.Should().Contain("Could not create the requested proposal"); + _plannerMock.Verify( + p => p.ParseInstructionAsync( + It.IsAny(), userId, boardId, + It.IsAny(), ProposalSourceType.Chat, + session.Id.ToString(), It.IsAny()), + Times.Once); + } + + /// + /// Classifier detects actionable intent but parser fails on the raw message + /// (e.g., message says "create card for testing" but lacks quoted title). + /// Verifies the hint message is shown to the user. + /// + [Fact] + public async Task SendMessageAsync_ActionableClassification_ParserFails_ShowsParseHint() + { + var userId = Guid.NewGuid(); + var boardId = Guid.NewGuid(); + var session = new ChatSession(userId, "Actionable parse fail", boardId); + + _chatSessionRepoMock + .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) + .ReturnsAsync(session); + _llmProviderMock + .Setup(p => p.CompleteAsync(It.IsAny(), default)) + .ReturnsAsync(new LlmCompletionResult( + "I'll help you create that.", 10, true, "card.create")); + _plannerMock + .Setup(p => p.ParseInstructionAsync( + It.IsAny(), userId, boardId, + It.IsAny(), It.IsAny(), + It.IsAny(), It.IsAny())) + .ReturnsAsync(Result.Failure( + ErrorCodes.ValidationError, "Could not parse instruction")); + + var result = await _service.SendMessageAsync( + session.Id, + userId, + new SendChatMessageDto("create card for testing without quotes"), + default); + + result.IsSuccess.Should().BeTrue(); + result.Value.MessageType.Should().Be("status"); + result.Value.Content.Should().Contain("detected a task request but could not parse it"); + } + + #endregion + #region NLP Gap Tests — Documents #570 (Chat-to-Proposal NLP Gap) /// From 4c5fdb5f528f8d7476bfbe7a567e9abd6f6c0e29 Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Sun, 29 Mar 2026 23:05:01 +0100 Subject: [PATCH 3/4] Remove duplicate ChatService flow tests and harden null-input test - Remove 4 ChatServiceTests that duplicated existing tests covering identical code paths (structured-syntax success, classifier miss, explicit RequestProposal failure, actionable-but-parser-fails) - Change Classify_NullInput test to assert base Exception instead of NullReferenceException so it survives addition of a null guard --- .../Services/ChatServiceTests.cs | 174 ------------------ .../Services/LlmIntentClassifierTests.cs | 6 +- 2 files changed, 4 insertions(+), 176 deletions(-) diff --git a/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs index 07481da07..82ba4046a 100644 --- a/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs +++ b/backend/tests/Taskdeck.Application.Tests/Services/ChatServiceTests.cs @@ -760,180 +760,6 @@ public async Task GetProviderHealthAsync_ShouldUseProbeStatus_WhenRequested() _llmProviderMock.Verify(p => p.GetHealthAsync(default), Times.Never); } - #region Chat-to-Proposal Flow — Classifier → Parser Integration (#577) - - /// - /// Full flow: structured syntax hits the LLM classifier (IsActionable=true), - /// then the planner parses successfully, yielding a proposal reference. - /// - [Fact] - public async Task SendMessageAsync_StructuredSyntax_ClassifierHit_ParserSuccess_ProposalCreated() - { - var userId = Guid.NewGuid(); - var boardId = Guid.NewGuid(); - var proposalId = Guid.NewGuid(); - var session = new ChatSession(userId, "Full flow session", boardId); - - _chatSessionRepoMock - .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) - .ReturnsAsync(session); - _llmProviderMock - .Setup(p => p.CompleteAsync(It.IsAny(), default)) - .ReturnsAsync(new LlmCompletionResult( - "I'll create that card.", 15, true, "card.create")); - _plannerMock - .Setup(p => p.ParseInstructionAsync( - It.IsAny(), userId, boardId, - It.IsAny(), ProposalSourceType.Chat, - session.Id.ToString(), It.IsAny())) - .ReturnsAsync(Result.Success(new ProposalDto( - proposalId, ProposalSourceType.Chat, null, boardId, userId, - ProposalStatus.PendingReview, RiskLevel.Low, - "create card 'Deploy script'", null, null, - DateTimeOffset.UtcNow, DateTimeOffset.UtcNow, - DateTime.UtcNow.AddHours(1), null, null, null, null, - "corr", new List()))); - - var result = await _service.SendMessageAsync( - session.Id, - userId, - new SendChatMessageDto("create card 'Deploy script'"), - default); - - result.IsSuccess.Should().BeTrue(); - result.Value.MessageType.Should().Be("proposal-reference"); - result.Value.ProposalId.Should().Be(proposalId); - result.Value.Content.Should().Contain("Proposal created for review"); - _plannerMock.Verify( - p => p.ParseInstructionAsync( - "create card 'Deploy script'", userId, boardId, - It.IsAny(), ProposalSourceType.Chat, - session.Id.ToString(), It.IsAny()), - Times.Once); - } - - /// - /// Natural language misses classifier (IsActionable=false), no RequestProposal set, - /// so the planner is never called — current behavior documents the gap. - /// - [Fact] - public async Task SendMessageAsync_NaturalLanguage_ClassifierMiss_NoPlannerCall() - { - var userId = Guid.NewGuid(); - var boardId = Guid.NewGuid(); - var session = new ChatSession(userId, "Classifier miss session", boardId); - - _chatSessionRepoMock - .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) - .ReturnsAsync(session); - _llmProviderMock - .Setup(p => p.CompleteAsync(It.IsAny(), default)) - .ReturnsAsync(new LlmCompletionResult( - "Sure, I can help with that.", 10, false, null)); - - var result = await _service.SendMessageAsync( - session.Id, - userId, - new SendChatMessageDto("set up some tasks for the sprint"), - default); - - result.IsSuccess.Should().BeTrue(); - result.Value.MessageType.Should().Be("text"); - _plannerMock.Verify( - p => p.ParseInstructionAsync( - It.IsAny(), It.IsAny(), It.IsAny(), - It.IsAny(), It.IsAny(), - It.IsAny(), It.IsAny()), - Times.Never, - "planner should not be called when classifier reports non-actionable and RequestProposal is false"); - } - - /// - /// Explicit RequestProposal with natural language — parser receives the raw - /// message and fails because it only understands structured syntax. - /// - [Fact] - public async Task SendMessageAsync_ExplicitRequestProposal_NaturalLanguage_ParserFailsGracefully() - { - var userId = Guid.NewGuid(); - var boardId = Guid.NewGuid(); - var session = new ChatSession(userId, "Explicit NLP fail session", boardId); - - _chatSessionRepoMock - .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) - .ReturnsAsync(session); - _llmProviderMock - .Setup(p => p.CompleteAsync(It.IsAny(), default)) - .ReturnsAsync(new LlmCompletionResult( - "I understand you want tasks.", 15, false, null)); - _plannerMock - .Setup(p => p.ParseInstructionAsync( - It.IsAny(), userId, boardId, - It.IsAny(), It.IsAny(), - It.IsAny(), It.IsAny())) - .ReturnsAsync(Result.Failure( - ErrorCodes.ValidationError, - "Could not parse instruction. Supported patterns: 'create card \"title\"'...")); - - var result = await _service.SendMessageAsync( - session.Id, - userId, - new SendChatMessageDto( - "please create some tasks for the deployment checklist", - RequestProposal: true), - default); - - result.IsSuccess.Should().BeTrue(); - result.Value.MessageType.Should().Be("status"); - result.Value.Content.Should().Contain("Could not create the requested proposal"); - _plannerMock.Verify( - p => p.ParseInstructionAsync( - It.IsAny(), userId, boardId, - It.IsAny(), ProposalSourceType.Chat, - session.Id.ToString(), It.IsAny()), - Times.Once); - } - - /// - /// Classifier detects actionable intent but parser fails on the raw message - /// (e.g., message says "create card for testing" but lacks quoted title). - /// Verifies the hint message is shown to the user. - /// - [Fact] - public async Task SendMessageAsync_ActionableClassification_ParserFails_ShowsParseHint() - { - var userId = Guid.NewGuid(); - var boardId = Guid.NewGuid(); - var session = new ChatSession(userId, "Actionable parse fail", boardId); - - _chatSessionRepoMock - .Setup(r => r.GetByIdWithMessagesAsync(session.Id, default)) - .ReturnsAsync(session); - _llmProviderMock - .Setup(p => p.CompleteAsync(It.IsAny(), default)) - .ReturnsAsync(new LlmCompletionResult( - "I'll help you create that.", 10, true, "card.create")); - _plannerMock - .Setup(p => p.ParseInstructionAsync( - It.IsAny(), userId, boardId, - It.IsAny(), It.IsAny(), - It.IsAny(), It.IsAny())) - .ReturnsAsync(Result.Failure( - ErrorCodes.ValidationError, "Could not parse instruction")); - - var result = await _service.SendMessageAsync( - session.Id, - userId, - new SendChatMessageDto("create card for testing without quotes"), - default); - - result.IsSuccess.Should().BeTrue(); - result.Value.MessageType.Should().Be("status"); - result.Value.Content.Should().Contain("detected a task request but could not parse it"); - } - - #endregion - #region NLP Gap Tests — Documents #570 (Chat-to-Proposal NLP Gap) /// diff --git a/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs index 0fd7a77b2..c89e06930 100644 --- a/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs +++ b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs @@ -145,13 +145,15 @@ public void Classify_NonActionable_ShouldReturnFalse(string message) #region Edge Cases — Input Extremes [Fact] - public void Classify_NullInput_ThrowsNullReferenceException() + public void Classify_NullInput_Throws() { // The classifier calls message.ToLowerInvariant() without a null guard. // This documents that null input is not handled gracefully. + // Using base Exception type so the test survives if a null guard + // (ArgumentNullException) is added later. var act = () => LlmIntentClassifier.Classify(null!); - act.Should().Throw(); + act.Should().Throw(); } [Fact] From ca74a652a89350d452a70fdec4d74fa7716d336a Mon Sep 17 00:00:00 2001 From: Chris0Jeky Date: Sun, 29 Mar 2026 23:17:34 +0100 Subject: [PATCH 4/4] Add null guard to LlmIntentClassifier.Classify --- .../Services/LlmIntentClassifier.cs | 3 +++ .../Services/LlmIntentClassifierTests.cs | 11 ++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/src/Taskdeck.Application/Services/LlmIntentClassifier.cs b/backend/src/Taskdeck.Application/Services/LlmIntentClassifier.cs index 29372d56b..4ff0e1c83 100644 --- a/backend/src/Taskdeck.Application/Services/LlmIntentClassifier.cs +++ b/backend/src/Taskdeck.Application/Services/LlmIntentClassifier.cs @@ -4,6 +4,9 @@ public static class LlmIntentClassifier { public static (bool IsActionable, string? ActionIntent) Classify(string message) { + if (string.IsNullOrWhiteSpace(message)) + return (false, null); + var lower = message.ToLowerInvariant(); // Card creation — explicit commands and natural language diff --git a/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs index c89e06930..f4577031d 100644 --- a/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs +++ b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierTests.cs @@ -145,15 +145,12 @@ public void Classify_NonActionable_ShouldReturnFalse(string message) #region Edge Cases — Input Extremes [Fact] - public void Classify_NullInput_Throws() + public void Classify_NullInput_ReturnsNotActionable() { - // The classifier calls message.ToLowerInvariant() without a null guard. - // This documents that null input is not handled gracefully. - // Using base Exception type so the test survives if a null guard - // (ArgumentNullException) is added later. - var act = () => LlmIntentClassifier.Classify(null!); + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(null!); - act.Should().Throw(); + isActionable.Should().BeFalse(); + actionIntent.Should().BeNull(); } [Fact]