diff --git a/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierEdgeCaseTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierEdgeCaseTests.cs new file mode 100644 index 000000000..885ad5b8e --- /dev/null +++ b/backend/tests/Taskdeck.Application.Tests/Services/LlmIntentClassifierEdgeCaseTests.cs @@ -0,0 +1,249 @@ +using FluentAssertions; +using Xunit; +using Taskdeck.Application.Services; + +namespace Taskdeck.Application.Tests.Services; + +/// +/// Edge case tests for LlmIntentClassifier expanding on the existing fuzz tests. +/// Covers: negation filtering, other-tool questions, ambiguous inputs, +/// very long inputs, prompt injection patterns, mixed casing, +/// and multi-intent detection gaps. +/// +public class LlmIntentClassifierEdgeCaseTests +{ + // ── Negation filtering ─────────────────────────────────────── + + [Theory] + [InlineData("Don't add a card")] + [InlineData("do not create a new task")] + [InlineData("never move the card to done")] + [InlineData("stop create new tasks")] + [InlineData("cancel the delete of card 5")] + [InlineData("don't remove that task")] + [InlineData("avoid create a task please")] // negation regex: "avoid" followed by verbs within word distance + public void Classify_NegatedInput_IsNotActionable(string input) + { + var (isActionable, _) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeFalse( + $"negated input '{input}' should not be classified as actionable"); + } + + // ── Other-tool questions ───────────────────────────────────── + + [Theory] + [InlineData("How do I add a card in Trello?")] + [InlineData("How do I create a task in Jira?")] + [InlineData("Where do I move cards in Asana?")] + [InlineData("Can I create boards in Notion?")] + public void Classify_OtherToolQuestion_IsNotActionable(string input) + { + var (isActionable, _) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeFalse( + $"question about another tool '{input}' should not be actionable"); + } + + // ── Positive detection ─────────────────────────────────────── + + [Theory] + [InlineData("create a new card called Test", "card.create")] + [InlineData("add a task for the meeting", "card.create")] + [InlineData("make a new task for sprint review", "card.create")] + [InlineData("move card to done column", "card.move")] + [InlineData("archive the old task", "card.archive")] + [InlineData("delete card number 5", "card.archive")] + [InlineData("remove the finished task", "card.archive")] + [InlineData("update card title to new name", "card.update")] + [InlineData("rename task to better name", "card.update")] + [InlineData("edit card description", "card.update")] + [InlineData("create a new board for the project", "board.create")] + [InlineData("rename board to Sprint 42", "board.update")] + [InlineData("reorder columns on the board", "column.reorder")] + public void Classify_ActionableInput_DetectsCorrectIntent(string input, string expectedIntent) + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeTrue($"'{input}' should be detected as actionable"); + actionIntent.Should().Be(expectedIntent); + } + + // ── Non-actionable inputs ──────────────────────────────────── + + [Theory] + [InlineData("hello")] + [InlineData("what is the weather?")] + [InlineData("tell me about the project")] + [InlineData("how are my tasks doing?")] + [InlineData("show me a summary")] + [InlineData("what's the status?")] + public void Classify_NonActionableInput_ReturnsFalse(string input) + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeFalse( + $"non-actionable input '{input}' should not be classified as actionable"); + actionIntent.Should().BeNull(); + } + + // ── Edge cases ─────────────────────────────────────────────── + + [Fact] + public void Classify_NullInput_ReturnsFalse() + { + var (isActionable, _) = LlmIntentClassifier.Classify(null!); + + isActionable.Should().BeFalse(); + } + + [Fact] + public void Classify_EmptyString_ReturnsFalse() + { + var (isActionable, _) = LlmIntentClassifier.Classify(""); + + isActionable.Should().BeFalse(); + } + + [Fact] + public void Classify_WhitespaceOnly_ReturnsFalse() + { + var (isActionable, _) = LlmIntentClassifier.Classify(" \t\n "); + + isActionable.Should().BeFalse(); + } + + [Fact] + public void Classify_VeryLongInput_DoesNotThrow() + { + // 10,000 character message should be handled gracefully + var longInput = new string('a', 9990) + " create a card"; + + var act = () => LlmIntentClassifier.Classify(longInput); + + act.Should().NotThrow(); + } + + [Fact] + public void Classify_VeryLongInput_WithActionableContent_StillDetects() + { + // Actionable content at the start should be detected even in long messages + var longInput = "create a new task called test " + new string('x', 5000); + + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(longInput); + + isActionable.Should().BeTrue(); + actionIntent.Should().Be("card.create"); + } + + [Theory] + [InlineData("CREATE A NEW CARD")] + [InlineData("Create A New Card")] + [InlineData("cReAtE a NeW cArD")] + public void Classify_MixedCase_StillDetects(string input) + { + var (isActionable, _) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeTrue( + $"mixed case input '{input}' should still be detected"); + } + + [Theory] + [InlineData("create a card\nand some other text")] + [InlineData("create\na\ncard")] + public void Classify_NewlinesInInput_DoesNotThrow(string input) + { + // Verify that newlines in input do not cause exceptions. + // The classifier may or may not detect the intent depending on + // whether the regex matches across line boundaries, but it must + // never crash. + var act = () => LlmIntentClassifier.Classify(input); + act.Should().NotThrow("newlines in input must not cause exceptions"); + } + + [Fact] + public void Classify_NewlinesSeparatingActionablePhrase_DetectsWhenOnOneLine() + { + // "create a card" on a single line should be detected even with trailing newlines + var input = "create a card\nsome other text after"; + + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeTrue("actionable phrase on first line should be detected"); + actionIntent.Should().Be("card.create"); + } + + [Fact] + public void Classify_PromptInjection_DoesNotCrashAndStillClassifies() + { + // Injection payloads that contain "create a card" should still be classified + // as actionable — the classifier is a regex-based intent detector, not a + // sanitizer. The key guarantee is no crashes and correct classification. + var injections = new[] + { + "create a card'; DROP TABLE cards;--", + "create a card with ", + "create a card\0with null bytes", + "create a card\\nwith escaped newlines" + }; + + foreach (var input in injections) + { + var act = () => LlmIntentClassifier.Classify(input); + act.Should().NotThrow($"input '{input}' should not cause an exception"); + + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(input); + isActionable.Should().BeTrue( + $"injection input '{input}' still contains 'create a card' and should be actionable"); + actionIntent.Should().Be("card.create"); + } + } + + // ── Archive vs Move disambiguation ─────────────────────────── + + [Fact] + public void Classify_RemoveCard_ClassifiesAsArchive_NotMove() + { + // "remove" contains "move" as a substring. Verify archive takes priority. + var (isActionable, actionIntent) = LlmIntentClassifier.Classify("remove the task from backlog"); + + isActionable.Should().BeTrue(); + actionIntent.Should().Be("card.archive"); + } + + // ── Stemming/plural variations ─────────────────────────────── + + [Theory] + [InlineData("create new cards", "card.create")] + [InlineData("add tasks for the team", "card.create")] + [InlineData("move tasks to done", "card.move")] + [InlineData("archive cards", "card.archive")] + [InlineData("update tasks", "card.update")] + public void Classify_PluralNouns_StillDetects(string input, string expectedIntent) + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeTrue($"plural input '{input}' should be detected"); + actionIntent.Should().Be(expectedIntent); + } + + // ── Verb coverage ──────────────────────────────────────────── + + [Theory] + [InlineData("generate a card for testing", "card.create")] + [InlineData("build a task list", "card.create")] + [InlineData("prepare a new task", "card.create")] + [InlineData("set up a new board", "board.create")] + [InlineData("modify the card title", "card.update")] + [InlineData("change task priority", "card.update")] + [InlineData("sort the columns", "column.reorder")] + [InlineData("rearrange the columns", "column.reorder")] + [InlineData("reorganize the board columns", "column.reorder")] + public void Classify_AlternateVerbs_DetectedCorrectly(string input, string expectedIntent) + { + var (isActionable, actionIntent) = LlmIntentClassifier.Classify(input); + + isActionable.Should().BeTrue($"verb in '{input}' should be recognized"); + actionIntent.Should().Be(expectedIntent); + } +} diff --git a/backend/tests/Taskdeck.Application.Tests/Services/LlmProviderAbstractionEdgeCaseTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/LlmProviderAbstractionEdgeCaseTests.cs new file mode 100644 index 000000000..07944d351 --- /dev/null +++ b/backend/tests/Taskdeck.Application.Tests/Services/LlmProviderAbstractionEdgeCaseTests.cs @@ -0,0 +1,313 @@ +using System.Text.Json; +using FluentAssertions; +using Xunit; +using Taskdeck.Application.Services; + +namespace Taskdeck.Application.Tests.Services; + +/// +/// Edge case tests for LLM provider abstraction boundaries. +/// Covers: default CompleteWithToolsAsync throws NotSupportedException, +/// MockLlmProvider edge cases (empty messages, null, tool-calling patterns), +/// provider empty/degraded response handling, provider selection policy +/// edge cases, and kill switch settings validation. +/// +public class LlmProviderAbstractionEdgeCaseTests +{ + // ── ILlmProvider default interface implementation ────────────── + + [Fact] + public async Task DefaultCompleteWithToolsAsync_ThrowsNotSupportedException() + { + // A provider that does NOT override CompleteWithToolsAsync should throw. + // Must be called through the interface to access the default implementation. + ILlmProvider provider = new MinimalTestProvider(); + + var request = new ChatCompletionRequest( + new List { new("User", "test") }); + var tools = Array.Empty(); + + var act = async () => await provider.CompleteWithToolsAsync(request, tools); + + await act.Should().ThrowAsync() + .WithMessage("*MinimalTestProvider*"); + } + + // ── MockLlmProvider CompleteAsync edge cases ────────────────── + + [Fact] + public async Task MockProvider_CompleteAsync_EmptyMessageList_DoesNotThrow() + { + var provider = new MockLlmProvider(); + var request = new ChatCompletionRequest(new List()); + + var result = await provider.CompleteAsync(request); + + result.Should().NotBeNull(); + result.Provider.Should().Be("Mock"); + result.Content.Should().NotBeNull(); + } + + [Fact] + public async Task MockProvider_CompleteAsync_NonUserRoleOnly_FallsBackToEmpty() + { + var provider = new MockLlmProvider(); + var request = new ChatCompletionRequest( + new List { new("System", "You are a helpful assistant.") }); + + var result = await provider.CompleteAsync(request); + + result.Should().NotBeNull(); + result.IsActionable.Should().BeFalse(); // Empty user message should not be actionable + } + + [Fact] + public async Task MockProvider_CompleteAsync_ActionableMessage_ReturnsInstructions() + { + var provider = new MockLlmProvider(); + var request = new ChatCompletionRequest( + new List { new("User", "create a new card called Test Task") }); + + var result = await provider.CompleteAsync(request); + + result.IsActionable.Should().BeTrue(); + result.ActionIntent.Should().NotBeNullOrEmpty(); + result.Content.Should().Contain("proposal"); + } + + [Fact] + public async Task MockProvider_CompleteAsync_VeryLongInput_DoesNotThrow() + { + var provider = new MockLlmProvider(); + var longInput = new string('a', 10000); + var request = new ChatCompletionRequest( + new List { new("User", longInput) }); + + var act = async () => await provider.CompleteAsync(request); + + await act.Should().NotThrowAsync(); + var result = await provider.CompleteAsync(request); + result.TokensUsed.Should().BeGreaterThan(0); + } + + // ── MockLlmProvider CompleteWithToolsAsync edge cases ──────── + + [Fact] + public async Task MockProvider_ToolCalling_EmptyMessages_ReturnsFinalResponse() + { + var provider = new MockLlmProvider(); + var request = new ChatCompletionRequest(new List()); + var tools = Array.Empty(); + + var result = await provider.CompleteWithToolsAsync(request, tools); + + result.IsComplete.Should().BeTrue(); + result.ToolCalls.Should().BeNull(); + result.Provider.Should().Be("Mock"); + result.Model.Should().Be("mock-tool-v1"); + } + + [Fact] + public async Task MockProvider_ToolCalling_PreviousResults_ContainsToolNameInSummary() + { + var provider = new MockLlmProvider(); + var request = new ChatCompletionRequest( + new List { new("User", "What cards?") }); + + var previousResults = new List + { + new("call-1", "list_cards_in_column", "{\"cards\":[]}", false), + new("call-2", "get_board_labels", "{\"labels\":[]}", false) + }; + + var result = await provider.CompleteWithToolsAsync(request, Array.Empty(), previousResults); + + result.IsComplete.Should().BeTrue(); + result.Content.Should().Contain("list_cards_in_column"); + result.Content.Should().Contain("get_board_labels"); + } + + [Fact] + public async Task MockProvider_ToolCalling_PreviousResultsWithError_StillReturnsSummary() + { + var provider = new MockLlmProvider(); + var request = new ChatCompletionRequest( + new List { new("User", "Show cards") }); + + var previousResults = new List + { + new("call-1", "get_card_details", "{\"error\":\"not found\"}", true) + }; + + var result = await provider.CompleteWithToolsAsync(request, Array.Empty(), previousResults); + + result.IsComplete.Should().BeTrue(); + result.Content.Should().Contain("get_card_details"); + } + + [Fact] + public async Task MockProvider_ToolCalling_VeryLongToolResult_IsTruncatedInSummary() + { + var provider = new MockLlmProvider(); + var request = new ChatCompletionRequest( + new List { new("User", "Show cards") }); + + var longResult = new string('Z', 500); + var previousResults = new List + { + new("call-1", "list_cards_in_column", longResult, false) + }; + + var result = await provider.CompleteWithToolsAsync(request, Array.Empty(), previousResults); + + result.IsComplete.Should().BeTrue(); + result.Content.Should().NotBeNull(); + // The MockLlmProvider truncates tool results at 200 chars. + // The full 500-char 'Z' string should NOT appear in the summary. + result.Content!.Should().NotContain(longResult, + "tool result should be truncated in the summary, not included verbatim"); + // But the truncated version (first 200 chars) should appear + result.Content.Should().Contain(new string('Z', 200)); + } + + // ── MockLlmProvider Health ────────────────────────────────── + + [Fact] + public async Task MockProvider_GetHealthAsync_ReturnsIsMockTrue() + { + var provider = new MockLlmProvider(); + + var health = await provider.GetHealthAsync(); + + health.IsAvailable.Should().BeTrue(); + health.IsMock.Should().BeTrue(); + health.ProviderName.Should().Be("Mock"); + health.IsProbed.Should().BeFalse(); + } + + [Fact] + public async Task MockProvider_ProbeAsync_ReturnsIsProbedTrue() + { + var provider = new MockLlmProvider(); + + var health = await provider.ProbeAsync(); + + health.IsAvailable.Should().BeTrue(); + health.IsMock.Should().BeTrue(); + health.IsProbed.Should().BeTrue(); + } + + // ── Provider selection edge cases ─────────────────────────── + + [Fact] + public void ProviderSelection_NullSettings_FallsBackToMock() + { + var settings = new LlmProviderSettings(); + + var result = LlmProviderSelectionPolicy.Evaluate(settings, "Production"); + + result.ProviderKind.Should().Be(LlmProviderKind.Mock); + } + + [Fact] + public void ProviderSelection_EmptyProviderString_FallsBackToMock() + { + var settings = new LlmProviderSettings + { + EnableLiveProviders = true, + Provider = "" + }; + + var result = LlmProviderSelectionPolicy.Evaluate(settings, "Production"); + + result.ProviderKind.Should().Be(LlmProviderKind.Mock); + } + + [Fact] + public void ProviderSelection_CaseInsensitive_OpenAI() + { + var settings = new LlmProviderSettings + { + EnableLiveProviders = true, + Provider = "openai", // lowercase + OpenAi = new OpenAiProviderSettings + { + ApiKey = "test-key", + BaseUrl = "https://api.openai.com/v1", + Model = "gpt-4o-mini", + TimeoutSeconds = 30 + } + }; + + var result = LlmProviderSelectionPolicy.Evaluate(settings, "Production"); + + result.ProviderKind.Should().Be(LlmProviderKind.OpenAi); + } + + // ── LlmCompletionResult record behavior ───────────────────── + + [Fact] + public void LlmCompletionResult_DefaultValues_AreSensible() + { + var result = new LlmCompletionResult("test content", 10, false); + + result.Provider.Should().Be("Mock"); + result.Model.Should().Be("mock-default"); + result.IsDegraded.Should().BeFalse(); + result.DegradedReason.Should().BeNull(); + result.Instructions.Should().BeNull(); + } + + [Fact] + public void LlmToolCompletionResult_DegradedWithContent_IsAccessible() + { + var result = new LlmToolCompletionResult( + Content: "Degraded response", + TokensUsed: 0, + Provider: "OpenAI", + Model: "gpt-4o-mini", + ToolCalls: null, + IsComplete: true, + IsDegraded: true, + DegradedReason: "Rate limited"); + + result.IsDegraded.Should().BeTrue(); + result.DegradedReason.Should().Be("Rate limited"); + result.Content.Should().Be("Degraded response"); + result.IsComplete.Should().BeTrue(); + } + + // ── Kill switch settings ───────────────────────────────────── + + [Fact] + public void KillSwitchSettings_DefaultValues_AreInactive() + { + var settings = new LlmKillSwitchSettings(); + + settings.GlobalKill.Should().BeFalse(); + settings.KilledSurfaces.Should().BeEmpty(); + settings.KilledUserIds.Should().BeEmpty(); + } + + /// + /// Minimal ILlmProvider implementation that does NOT override CompleteWithToolsAsync, + /// ensuring the default interface method (which throws) is tested. + /// + private class MinimalTestProvider : ILlmProvider + { + public Task CompleteAsync(ChatCompletionRequest request, CancellationToken ct = default) + => Task.FromResult(new LlmCompletionResult("test", 0, false)); + + public async IAsyncEnumerable StreamAsync(ChatCompletionRequest request, CancellationToken ct = default) + { + yield return new LlmTokenEvent("test", true); + await Task.CompletedTask; + } + + public Task GetHealthAsync(CancellationToken ct = default) + => Task.FromResult(new LlmHealthStatus(true, "MinimalTest")); + + public Task ProbeAsync(CancellationToken ct = default) + => Task.FromResult(new LlmHealthStatus(true, "MinimalTest", IsProbed: true)); + } +} diff --git a/backend/tests/Taskdeck.Application.Tests/Services/ToolCallingChatOrchestratorEdgeCaseTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/ToolCallingChatOrchestratorEdgeCaseTests.cs new file mode 100644 index 000000000..d7ee3d808 --- /dev/null +++ b/backend/tests/Taskdeck.Application.Tests/Services/ToolCallingChatOrchestratorEdgeCaseTests.cs @@ -0,0 +1,745 @@ +using System.Text.Json; +using FluentAssertions; +using Microsoft.Extensions.Logging; +using Moq; +using Xunit; +using Taskdeck.Application.Services; +using Taskdeck.Application.Services.Tools; + +namespace Taskdeck.Application.Tests.Services; + +/// +/// Edge case and boundary tests for ToolCallingChatOrchestrator. +/// Covers: per-round timeout, total timeout at round 4, empty tool call lists, +/// concurrent tool calls within a round, tool-not-found with suggestion, +/// tool returning very large results, multiple tools with mixed errors, +/// cancellation token propagation, metadata JSON generation, and +/// incomplete result with no tool calls. +/// +public class ToolCallingChatOrchestratorEdgeCaseTests +{ + private readonly Guid _boardId = Guid.NewGuid(); + private readonly Guid _userId = Guid.NewGuid(); + + private static ChatCompletionRequest MakeRequest(string userMessage) => new( + new List { new("User", userMessage) }); + + private static Mock CreateMockExecutor(string toolName, string result) + { + var mock = new Mock(); + mock.SetupGet(e => e.ToolName).Returns(toolName); + mock.Setup(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(result); + mock.Setup(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(result); + return mock; + } + + [Fact] + public async Task ExecuteAsync_PerRoundTimeout_ReturnsDegradedResult() + { + // Simulate a per-round timeout by throwing OperationCanceledException + // with a non-cancelled external token. This exercises the orchestrator's + // catch(OperationCanceledException) when !ct.IsCancellationRequested path + // without waiting for the real 30-second timeout. + var mock = new Mock(); + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ThrowsAsync(new OperationCanceledException("Simulated per-round timeout")); + + var registry = new ToolExecutorRegistry(Array.Empty()); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("What cards?"), _boardId); + + result.IsDegraded.Should().BeTrue(); + result.DegradedReason.Should().Contain("timeout"); + } + + [Fact] + public async Task ExecuteAsync_IncompleteResultWithNoToolCalls_ReturnsDegraded() + { + // Provider returns IsComplete=false but with no tool calls + var mock = new Mock(); + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(new LlmToolCompletionResult( + Content: null, + TokensUsed: 10, + Provider: "Test", + Model: "test-v1", + ToolCalls: null, // No tool calls + IsComplete: false)); // But not complete + + var registry = new ToolExecutorRegistry(Array.Empty()); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Something"), _boardId); + + result.IsDegraded.Should().BeTrue(); + result.DegradedReason.Should().Contain("empty tool call list"); + result.Content.Should().NotBeNullOrEmpty(); + } + + [Fact] + public async Task ExecuteAsync_IncompleteResultWithEmptyToolCallList_ReturnsDegraded() + { + // Provider returns IsComplete=false with an empty (non-null) tool call list + var mock = new Mock(); + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(new LlmToolCompletionResult( + Content: null, + TokensUsed: 10, + Provider: "Test", + Model: "test-v1", + ToolCalls: Array.Empty(), + IsComplete: false)); + + var registry = new ToolExecutorRegistry(Array.Empty()); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Something"), _boardId); + + result.IsDegraded.Should().BeTrue(); + result.DegradedReason.Should().Contain("empty tool call list"); + } + + [Fact] + public async Task ExecuteAsync_MultipleToolCallsInOneRound_AllExecuted() + { + var mock = new Mock(); + var callSequence = 0; + + var args1 = JsonDocument.Parse("{}").RootElement; + var args2 = JsonDocument.Parse("{\"column_name\":\"Backlog\"}").RootElement; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence == 1) + { + // Return two tool calls in a single round + return new LlmToolCompletionResult( + Content: null, + TokensUsed: 50, + Provider: "Test", + Model: "test-v1", + ToolCalls: new[] + { + new ToolCallRequest("call-1", "list_board_columns", args1), + new ToolCallRequest("call-2", "list_cards_in_column", args2) + }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "Here is the board overview.", + TokensUsed: 80, + Provider: "Test", + Model: "test-v1", + ToolCalls: null, + IsComplete: true); + }); + + var colsExecutor = CreateMockExecutor("list_board_columns", "{\"columns\":[{\"name\":\"Backlog\"}]}"); + var cardsExecutor = CreateMockExecutor("list_cards_in_column", "{\"cards\":[],\"total\":0}"); + var registry = new ToolExecutorRegistry(new[] { colsExecutor.Object, cardsExecutor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Show board overview"), _boardId); + + result.IsDegraded.Should().BeFalse(); + result.Rounds.Should().Be(2); + result.ToolCallLog.Should().HaveCount(2); + result.ToolCallLog[0].ToolName.Should().Be("list_board_columns"); + result.ToolCallLog[1].ToolName.Should().Be("list_cards_in_column"); + + // Verify both executors were called + colsExecutor.Verify(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny()), Times.Once); + cardsExecutor.Verify(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny()), Times.Once); + } + + [Fact] + public async Task ExecuteAsync_MultipleToolCallsWithMixedErrors_ContinuesWithAll() + { + var mock = new Mock(); + var callSequence = 0; + + var args1 = JsonDocument.Parse("{}").RootElement; + var args2 = JsonDocument.Parse("{\"card_id\":\"bad-id\"}").RootElement; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence == 1) + { + return new LlmToolCompletionResult( + Content: null, + TokensUsed: 50, + Provider: "Test", + Model: "test-v1", + ToolCalls: new[] + { + new ToolCallRequest("call-1", "list_board_columns", args1), + new ToolCallRequest("call-2", "get_card_details", args2) + }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "I found some columns but couldn't get card details.", + TokensUsed: 80, + Provider: "Test", + Model: "test-v1", + ToolCalls: null, + IsComplete: true); + }); + + var colsExecutor = CreateMockExecutor("list_board_columns", "{\"columns\":[]}"); + var cardExecutor = new Mock(); + cardExecutor.SetupGet(e => e.ToolName).Returns("get_card_details"); + cardExecutor.Setup(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .ThrowsAsync(new ArgumentException("Invalid card ID format")); + + var registry = new ToolExecutorRegistry(new[] { colsExecutor.Object, cardExecutor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Show card and columns"), _boardId); + + result.IsDegraded.Should().BeFalse(); // LLM recovered with a final response + result.ToolCallLog.Should().HaveCount(2); + result.ToolCallLog[0].IsError.Should().BeFalse(); // columns succeeded + result.ToolCallLog[1].IsError.Should().BeTrue(); // card details failed + } + + [Fact] + public async Task ExecuteAsync_ToolNotFound_ProvidesAvailableToolSuggestion() + { + var args = JsonDocument.Parse("{}").RootElement; + var mock = new Mock(); + var callSequence = 0; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence == 1) + { + return new LlmToolCompletionResult( + Content: null, + TokensUsed: 50, + Provider: "Test", + Model: "test-v1", + ToolCalls: new[] { new ToolCallRequest("call-1", "invented_tool", args) }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "Let me try another approach.", + TokensUsed: 80, + Provider: "Test", + Model: "test-v1", + ToolCalls: null, + IsComplete: true); + }); + + var executor = CreateMockExecutor("list_board_columns", "{\"columns\":[]}"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Do something"), _boardId); + + result.ToolCallLog.Should().HaveCount(1); + result.ToolCallLog[0].IsError.Should().BeTrue(); + // The error response should mention available tools + result.ToolCallLog[0].ResultSummary.Should().Contain("list_board_columns"); + } + + [Fact] + public async Task ExecuteAsync_GenericProviderException_ReturnsDegraded() + { + var mock = new Mock(); + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ThrowsAsync(new HttpRequestException("HTTP 500 from provider")); + + var registry = new ToolExecutorRegistry(Array.Empty()); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Hello"), _boardId); + + result.IsDegraded.Should().BeTrue(); + result.DegradedReason.Should().Contain("internal error"); + } + + [Fact] + public async Task ExecuteAsync_CancellationTokenRespected_ThrowsOperationCanceled() + { + // When the external cancellation token is already cancelled before the call, + // the orchestrator should throw immediately at ct.ThrowIfCancellationRequested(). + var mock = new Mock(); + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(new LlmToolCompletionResult( + Content: "done", TokensUsed: 0, + Provider: "Test", Model: "test-v1", + ToolCalls: null, IsComplete: true)); + + var registry = new ToolExecutorRegistry(Array.Empty()); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + using var cts = new CancellationTokenSource(); + cts.Cancel(); // Already cancelled + + var act = async () => await orchestrator.ExecuteAsync( + MakeRequest("Hello"), _boardId, _userId, cts.Token); + + await act.Should().ThrowAsync(); + } + + [Fact] + public async Task ExecuteAsync_UserIdPassedToExecutor_ViaToolExecutionContext() + { + var args = JsonDocument.Parse("{}").RootElement; + var mock = new Mock(); + var callSequence = 0; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence == 1) + { + return new LlmToolCompletionResult( + Content: null, TokensUsed: 50, + Provider: "Test", Model: "test-v1", + ToolCalls: new[] { new ToolCallRequest("call-1", "list_board_columns", args) }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "Done", TokensUsed: 80, + Provider: "Test", Model: "test-v1", + ToolCalls: null, IsComplete: true); + }); + + ToolExecutionContext? capturedContext = null; + var executor = new Mock(); + executor.SetupGet(e => e.ToolName).Returns("list_board_columns"); + executor.Setup(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((ctx, _, _) => capturedContext = ctx) + .ReturnsAsync("{\"columns\":[]}"); + + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + await orchestrator.ExecuteAsync(MakeRequest("Show columns"), _boardId, _userId); + + capturedContext.Should().NotBeNull(); + capturedContext!.BoardId.Should().Be(_boardId); + capturedContext.UserId.Should().Be(_userId); + } + + [Fact] + public async Task ExecuteAsync_ToolReturnsLargeResult_TruncatedInLog() + { + var args = JsonDocument.Parse("{}").RootElement; + var mock = new Mock(); + var callSequence = 0; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence == 1) + { + return new LlmToolCompletionResult( + Content: null, TokensUsed: 50, + Provider: "Test", Model: "test-v1", + ToolCalls: new[] { new ToolCallRequest("call-1", "list_cards_in_column", args) }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "Here are many cards.", TokensUsed: 80, + Provider: "Test", Model: "test-v1", + ToolCalls: null, IsComplete: true); + }); + + // Return a large result (simulating 1000 cards) + var largeResult = new string('X', 5000); + var executor = CreateMockExecutor("list_cards_in_column", largeResult); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Show all cards"), _boardId); + + result.IsDegraded.Should().BeFalse(); + result.ToolCallLog.Should().HaveCount(1); + // The log should truncate large results (200 chars per TruncateForLog) + result.ToolCallLog[0].ResultSummary.Length.Should().BeLessThan(5000); + result.ToolCallLog[0].ResultSummary.Should().Contain("truncated"); + } + + [Fact] + public void BuildToolCallMetadataJson_EmptyLog_ReturnsNull() + { + var result = ToolCallingChatOrchestrator.BuildToolCallMetadataJson( + new List(), totalRounds: 0, totalTokens: 0); + + result.Should().BeNull(); + } + + [Fact] + public void BuildToolCallMetadataJson_WithEntries_ReturnsValidJson() + { + var args = JsonDocument.Parse("{\"column_name\":\"Backlog\"}").RootElement; + var log = new List + { + new(1, "list_cards_in_column", args, "{\"cards\":[]}", false), + new(2, "get_card_details", args, "error", true) + }; + + var result = ToolCallingChatOrchestrator.BuildToolCallMetadataJson(log, totalRounds: 2, totalTokens: 150); + + result.Should().NotBeNullOrEmpty(); + var doc = JsonDocument.Parse(result!); + doc.RootElement.GetProperty("rounds").GetInt32().Should().Be(2); + doc.RootElement.GetProperty("total_tokens").GetInt32().Should().Be(150); + doc.RootElement.GetProperty("tool_calls").GetArrayLength().Should().Be(2); + } + + [Fact] + public void ComputeToolCallFingerprint_EmptyList_DoesNotThrow() + { + var calls = new List(); + + var act = () => ToolCallingChatOrchestrator.ComputeToolCallFingerprint(calls); + + act.Should().NotThrow(); + var result = act(); + result.Should().NotBeNullOrEmpty(); + } + + [Fact] + public void ComputeToolCallFingerprint_SingleCall_ProducesDeterministicResult() + { + var args = JsonDocument.Parse("{\"q\":\"test\"}").RootElement; + var calls = new List { new("call-1", "search_cards", args) }; + + var fp1 = ToolCallingChatOrchestrator.ComputeToolCallFingerprint(calls); + var fp2 = ToolCallingChatOrchestrator.ComputeToolCallFingerprint(calls); + + fp1.Should().Be(fp2); + } + + [Fact] + public async Task ExecuteAsync_StatusNotifier_InvokedForEachToolInRound() + { + var mock = new Mock(); + var callSequence = 0; + var args = JsonDocument.Parse("{}").RootElement; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence == 1) + { + return new LlmToolCompletionResult( + Content: null, TokensUsed: 50, + Provider: "Test", Model: "test-v1", + ToolCalls: new[] + { + new ToolCallRequest("call-1", "list_board_columns", args), + new ToolCallRequest("call-2", "get_board_labels", args) + }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "Done.", TokensUsed: 80, + Provider: "Test", Model: "test-v1", + ToolCalls: null, IsComplete: true); + }); + + var colsExecutor = CreateMockExecutor("list_board_columns", "{\"columns\":[]}"); + var labelsExecutor = CreateMockExecutor("get_board_labels", "{\"labels\":[]}"); + var notifier = new Mock(); + var registry = new ToolExecutorRegistry(new[] { colsExecutor.Object, labelsExecutor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object, + notifier.Object); + + await orchestrator.ExecuteAsync(MakeRequest("Board overview"), _boardId); + + // Should be called once for each tool in the round (2 tools) + notifier.Verify(n => n.NotifyToolStatusAsync( + _boardId, It.IsAny(), It.IsAny(), + 1, ToolCallingChatOrchestrator.MaxRounds, + It.IsAny()), Times.Exactly(2)); + } + + [Fact] + public async Task ExecuteAsync_TokensAccumulated_AcrossAllRounds() + { + var mock = new Mock(); + var callSequence = 0; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence <= 3) + { + var colName = callSequence switch + { + 1 => "Backlog", + 2 => "InProgress", + _ => "Done" + }; + var args = JsonDocument.Parse($"{{\"column_name\":\"{colName}\"}}").RootElement; + return new LlmToolCompletionResult( + Content: null, + TokensUsed: 25, + Provider: "Test", + Model: "test-v1", + ToolCalls: new[] { new ToolCallRequest($"call-{callSequence}", "list_cards_in_column", args) }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "Found the cards.", + TokensUsed: 50, + Provider: "Test", + Model: "test-v1", + ToolCalls: null, + IsComplete: true); + }); + + var executor = CreateMockExecutor("list_cards_in_column", "{\"cards\":[],\"total\":0}"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Show everything"), _boardId); + + // 3 rounds * 25 tokens + 1 final round * 50 tokens = 125 + result.TokensUsed.Should().Be(125); + result.Rounds.Should().Be(4); + } + + [Fact] + public async Task ExecuteAsync_ProviderReturnsNullContent_OnComplete_UsesEmptyString() + { + var mock = new Mock(); + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(new LlmToolCompletionResult( + Content: null, // null content on complete + TokensUsed: 10, + Provider: "Test", + Model: "test-v1", + ToolCalls: null, + IsComplete: true)); + + var registry = new ToolExecutorRegistry(Array.Empty()); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Hi"), _boardId); + + result.IsDegraded.Should().BeFalse(); + result.Content.Should().Be(""); // Empty string, not null + } + + [Fact] + public async Task ExecuteAsync_ExhaustedRounds_PartialSummaryIncludesSuccessfulTools() + { + var mock = new Mock(); + var callSequence = 0; + var columnNames = new[] { "A", "B", "C", "D", "E" }; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + var colName = columnNames[(callSequence - 1) % columnNames.Length]; + var args = JsonDocument.Parse($"{{\"column_name\":\"{colName}\"}}").RootElement; + return new LlmToolCompletionResult( + Content: null, TokensUsed: 20, + Provider: "Test", Model: "test-v1", + ToolCalls: new[] { new ToolCallRequest($"call-{callSequence}", "list_cards_in_column", args) }, + IsComplete: false); + }); + + var executor = CreateMockExecutor("list_cards_in_column", "{\"cards\":[]}"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Show everything"), _boardId); + + result.IsDegraded.Should().BeTrue(); + result.Content.Should().Contain("[list_cards_in_column]"); + } + + [Fact] + public async Task ExecuteAsync_LoopDetection_AbortsOnIdenticalConsecutiveToolCalls() + { + // When the LLM issues the exact same tool calls in consecutive rounds + // (and the previous round had no errors), the orchestrator should abort + // with a loop-detected degraded result. + var args = JsonDocument.Parse("{\"column_name\":\"Backlog\"}").RootElement; + var mock = new Mock(); + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(new LlmToolCompletionResult( + Content: null, TokensUsed: 25, + Provider: "Test", Model: "test-v1", + ToolCalls: new[] { new ToolCallRequest("call-1", "list_cards_in_column", args) }, + IsComplete: false)); + + var executor = CreateMockExecutor("list_cards_in_column", "{\"cards\":[]}"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Show backlog cards"), _boardId); + + result.IsDegraded.Should().BeTrue(); + result.DegradedReason.Should().Contain("loop"); + result.Rounds.Should().Be(2, "loop should be detected on the second round with identical calls"); + } + + [Fact] + public async Task ExecuteAsync_LoopDetection_RetriesAllowedAfterErrors() + { + // When the previous round had errors, the LLM may legitimately retry + // the same tool calls. Loop detection should NOT trigger in this case. + var args = JsonDocument.Parse("{\"column_name\":\"Backlog\"}").RootElement; + var mock = new Mock(); + var callSequence = 0; + + mock.Setup(p => p.CompleteWithToolsAsync( + It.IsAny(), + It.IsAny>(), + It.IsAny?>(), + It.IsAny())) + .ReturnsAsync(() => + { + callSequence++; + if (callSequence <= 2) + { + return new LlmToolCompletionResult( + Content: null, TokensUsed: 25, + Provider: "Test", Model: "test-v1", + ToolCalls: new[] { new ToolCallRequest("call-1", "list_cards_in_column", args) }, + IsComplete: false); + } + return new LlmToolCompletionResult( + Content: "Found the cards.", TokensUsed: 50, + Provider: "Test", Model: "test-v1", + ToolCalls: null, IsComplete: true); + }); + + // First call throws (error), second call succeeds. This means the retry + // of identical tool calls should be allowed because the previous round had errors. + var executorCallCount = 0; + var executor = new Mock(); + executor.SetupGet(e => e.ToolName).Returns("list_cards_in_column"); + executor.Setup(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync(() => + { + executorCallCount++; + if (executorCallCount == 1) + throw new InvalidOperationException("Transient failure"); + return "{\"cards\":[]}"; + }); + + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + var orchestrator = new ToolCallingChatOrchestrator( + mock.Object, registry, + new Mock>().Object); + + var result = await orchestrator.ExecuteAsync(MakeRequest("Show backlog cards"), _boardId); + + result.IsDegraded.Should().BeFalse("retry after error should not trigger loop detection"); + result.Rounds.Should().Be(3); + } +} diff --git a/backend/tests/Taskdeck.Application.Tests/Services/ToolExecutorRegistryEdgeCaseTests.cs b/backend/tests/Taskdeck.Application.Tests/Services/ToolExecutorRegistryEdgeCaseTests.cs new file mode 100644 index 000000000..0cc128c46 --- /dev/null +++ b/backend/tests/Taskdeck.Application.Tests/Services/ToolExecutorRegistryEdgeCaseTests.cs @@ -0,0 +1,158 @@ +using System.Text.Json; +using FluentAssertions; +using Moq; +using Xunit; +using Taskdeck.Application.Services; +using Taskdeck.Application.Services.Tools; + +namespace Taskdeck.Application.Tests.Services; + +/// +/// Edge case tests for ToolExecutorRegistry. +/// Covers: empty registry, case-insensitive lookup, GetRegisteredToolNames, +/// duplicate tool names, and null/empty tool name lookup. +/// +public class ToolExecutorRegistryEdgeCaseTests +{ + private static Mock MakeExecutor(string name) + { + var mock = new Mock(); + mock.SetupGet(e => e.ToolName).Returns(name); + mock.Setup(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync("{}"); + mock.Setup(e => e.ExecuteAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .ReturnsAsync("{}"); + return mock; + } + + [Fact] + public void GetExecutor_EmptyRegistry_ReturnsNull() + { + var registry = new ToolExecutorRegistry(Array.Empty()); + + var result = registry.GetExecutor("list_board_columns"); + + result.Should().BeNull(); + } + + [Fact] + public void GetExecutor_CaseInsensitive_FindsTool() + { + var executor = MakeExecutor("list_board_columns"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + + var result = registry.GetExecutor("LIST_BOARD_COLUMNS"); + + result.Should().NotBeNull(); + result!.ToolName.Should().Be("list_board_columns"); + } + + [Fact] + public void GetExecutor_MixedCase_FindsTool() + { + var executor = MakeExecutor("Get_Card_Details"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + + var result = registry.GetExecutor("get_card_details"); + + result.Should().NotBeNull(); + } + + [Fact] + public void GetRegisteredToolNames_EmptyRegistry_ReturnsEmpty() + { + var registry = new ToolExecutorRegistry(Array.Empty()); + + var names = registry.GetRegisteredToolNames(); + + names.Should().BeEmpty(); + } + + [Fact] + public void GetRegisteredToolNames_MultipleExecutors_ReturnsAll() + { + var e1 = MakeExecutor("list_board_columns"); + var e2 = MakeExecutor("get_card_details"); + var e3 = MakeExecutor("search_cards"); + var registry = new ToolExecutorRegistry(new[] { e1.Object, e2.Object, e3.Object }); + + var names = registry.GetRegisteredToolNames(); + + names.Should().HaveCount(3); + names.Should().Contain("list_board_columns"); + names.Should().Contain("get_card_details"); + names.Should().Contain("search_cards"); + } + + [Fact] + public void GetExecutor_NonExistentTool_ReturnsNull() + { + var executor = MakeExecutor("list_board_columns"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + + var result = registry.GetExecutor("nonexistent_tool"); + + result.Should().BeNull(); + } + + [Fact] + public void GetExecutor_EmptyString_ReturnsNull() + { + var executor = MakeExecutor("list_board_columns"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + + var result = registry.GetExecutor(""); + + result.Should().BeNull(); + } + + [Fact] + public void ToolExecutionContext_Properties_AreAccessible() + { + var boardId = Guid.NewGuid(); + var userId = Guid.NewGuid(); + + var context = new ToolExecutionContext(boardId, userId); + + context.BoardId.Should().Be(boardId); + context.UserId.Should().Be(userId); + } + + [Fact] + public void ToolExecutionContext_Equality_WorksByValue() + { + var boardId = Guid.NewGuid(); + var userId = Guid.NewGuid(); + + var ctx1 = new ToolExecutionContext(boardId, userId); + var ctx2 = new ToolExecutionContext(boardId, userId); + + ctx1.Should().Be(ctx2); // Record equality + } + + [Fact] + public void Constructor_DuplicateToolNames_ThrowsOnCreation() + { + // ToolExecutorRegistry uses ToDictionary internally which throws + // ArgumentException on duplicate keys. This verifies the crash behavior + // is surfaced rather than silently losing executors. + var e1 = MakeExecutor("list_board_columns"); + var e2 = MakeExecutor("list_board_columns"); // duplicate + + var act = () => new ToolExecutorRegistry(new[] { e1.Object, e2.Object }); + + act.Should().Throw("duplicate tool names should not be silently accepted"); + } + + [Fact] + public void GetExecutor_NullToolName_ThrowsArgumentNullException() + { + var executor = MakeExecutor("list_board_columns"); + var registry = new ToolExecutorRegistry(new[] { executor.Object }); + + // Dictionary.TryGetValue throws ArgumentNullException on null key + var act = () => registry.GetExecutor(null!); + + act.Should().Throw(); + } +}