diff --git a/dotnet/test/Harness/CapiProxy.cs b/dotnet/test/Harness/CapiProxy.cs index c5a146ec..ae07ca09 100644 --- a/dotnet/test/Harness/CapiProxy.cs +++ b/dotnet/test/Harness/CapiProxy.cs @@ -111,16 +111,17 @@ public async Task StopAsync(bool skipWritingCache = false) _startupTask = null; } - public async Task ConfigureAsync(string filePath, string workDir) + public async Task ConfigureAsync(string filePath, string workDir, Dictionary? toolBinaryOverrides = null) { var url = await (_startupTask ?? throw new InvalidOperationException("Proxy not started")); using var client = new HttpClient(); - var response = await client.PostAsJsonAsync($"{url}/config", new ConfigureRequest(filePath, workDir), CapiProxyJsonContext.Default.ConfigureRequest); + var response = await client.PostAsJsonAsync($"{url}/config", new ConfigureRequest(filePath, workDir, toolBinaryOverrides), CapiProxyJsonContext.Default.ConfigureRequest); response.EnsureSuccessStatusCode(); } - private record ConfigureRequest(string FilePath, string WorkDir); + private record ConfigureRequest(string FilePath, string WorkDir, Dictionary? ToolBinaryOverrides); + public async Task> GetExchangesAsync() { @@ -147,10 +148,14 @@ private static string FindRepoRoot() [JsonSourceGenerationOptions(JsonSerializerDefaults.Web)] [JsonSerializable(typeof(ConfigureRequest))] + [JsonSerializable(typeof(ToolBinaryOverride))] [JsonSerializable(typeof(List))] private partial class CapiProxyJsonContext : JsonSerializerContext; } +// Tool binary override payload passed to the proxy via /config +public record ToolBinaryOverride(string Data, string? Type, string? MimeType); + public record ParsedHttpExchange(ChatCompletionRequest Request, ChatCompletionResponse? Response); public record ChatCompletionRequest( diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index 8727e123..859555a6 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -37,7 +37,9 @@ private static string GetTestName(ITestOutputHelper output) public async Task InitializeAsync() { - await Ctx.ConfigureForTestAsync(_snapshotCategory, _testName); + // New ConfigureForTestAsync signature accepts an optional toolBinaryOverrides map. + // Default to null for existing usages and pass the test name explicitly. + await Ctx.ConfigureForTestAsync(_snapshotCategory, null, _testName); } public Task DisposeAsync() => Task.CompletedTask; diff --git a/dotnet/test/Harness/E2ETestContext.cs b/dotnet/test/Harness/E2ETestContext.cs index d9d47a48..784d2d18 100644 --- a/dotnet/test/Harness/E2ETestContext.cs +++ b/dotnet/test/Harness/E2ETestContext.cs @@ -68,13 +68,14 @@ private static string GetCliPath(string repoRoot) return path; } - public async Task ConfigureForTestAsync(string testFile, [CallerMemberName] string? testName = null) + public async Task ConfigureForTestAsync(string testFile, Dictionary? toolBinaryOverrides = null, [CallerMemberName] string? testName = null) { // Convert test method names to lowercase snake_case for snapshot filenames // to avoid case collisions on case-insensitive filesystems (macOS/Windows) var sanitizedName = Regex.Replace(testName!, @"[^a-zA-Z0-9]", "_").ToLowerInvariant(); var snapshotPath = Path.Combine(_repoRoot, "test", "snapshots", testFile, $"{sanitizedName}.yaml"); - await _proxy.ConfigureAsync(snapshotPath, WorkDir); + // Pass through the dictionary directly to avoid unnecessary allocations + await _proxy.ConfigureAsync(snapshotPath, WorkDir, toolBinaryOverrides); } public Task> GetExchangesAsync() => _proxy.GetExchangesAsync(); diff --git a/dotnet/test/ToolsTests.cs b/dotnet/test/ToolsTests.cs index 3d7741c9..faf9dc04 100644 --- a/dotnet/test/ToolsTests.cs +++ b/dotnet/test/ToolsTests.cs @@ -143,14 +143,46 @@ record City(int CountryId, string CityName, int Population); [JsonSerializable(typeof(DbQueryOptions))] [JsonSerializable(typeof(City[]))] [JsonSerializable(typeof(JsonElement))] + [JsonSerializable(typeof(GitHub.Copilot.SDK.ToolResultAIContent))] private partial class ToolsTestsJsonContext : JsonSerializerContext; - [Fact(Skip = "Behaves as if no content was in the result. Likely that binary results aren't fully implemented yet.")] + [Fact] public async Task Can_Return_Binary_Result() { + const string b64 = "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAIAAAD91JpzAAAADklEQVR4nGP4/5/h/38GABkAA/0k+7UAAAAASUVORK5CYII="; + const string b64Type = "base64"; + const string mime = "image/png"; + + ToolResultAIContent GetImage() + { + var binary = new + { + data = b64, + type = b64Type, + mimeType = mime, + }; + + var textPayload = $"{{\"image\":{{\"mimeType\":\"{binary.mimeType}\",\"encoding\":\"{binary.type}\",\"source\":\"binaryResultsForLlm[0]\"}}}}"; + + return new ToolResultAIContent(new() + { + // make the textual reference available for the LLM (helpful for E2E assertion) + TextResultForLlm = textPayload, + BinaryResultsForLlm = [new() { + Data = binary.data, + Type = binary.type, + MimeType = binary.mimeType, + }], + SessionLog = "Returned an image", + }); + } + + // Reconfigure the proxy for this test to inject the binary payload into the tool result + await Ctx.ConfigureForTestAsync("tools", new() { ["get_image"] = new ToolBinaryOverride(b64, b64Type, mime) }); + var session = await Client.CreateSessionAsync(new SessionConfig { - Tools = [AIFunctionFactory.Create(GetImage, "get_image")], + Tools = [AIFunctionFactory.Create(GetImage, "get_image", serializerOptions: ToolsTestsJsonContext.Default.Options)], }); await session.SendAsync(new MessageOptions @@ -161,17 +193,24 @@ await session.SendAsync(new MessageOptions var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); Assert.NotNull(assistantMessage); - Assert.Contains("yellow", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); + // Deterministic check: ensure the captured outgoing request references the binary result + var traffic = await Ctx.GetExchangesAsync(); + var lastConversation = traffic[^1]; + + var toolResults = lastConversation.Request.Messages + .Where(m => m.Role == "tool") + .ToList(); + + Assert.True(toolResults.Count >= 1, "Expected at least one tool result message"); - static ToolResultAIContent GetImage() => new ToolResultAIContent(new() - { - BinaryResultsForLlm = [new() { - // 2x2 yellow square - Data = "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAIAAAD91JpzAAAADklEQVR4nGP4/5/h/38GABkAA/0k+7UAAAAASUVORK5CYII=", - Type = "base64", - MimeType = "image/png", - }], - SessionLog = "Returned an image", - }); + var tr = toolResults.Last(); + Assert.False(string.IsNullOrEmpty(tr.Content), "Tool result content should be present as JSON"); + + using var doc = JsonDocument.Parse(tr.Content!); + // The snapshot contains an image object that references the binary result index. + var hasImageSource = doc.RootElement.TryGetProperty("image", out var image) && + image.TryGetProperty("source", out var source) && + source.GetString() == "binaryResultsForLlm[0]"; + Assert.True(hasImageSource, "Tool result JSON should reference binaryResultsForLlm[0]"); } } diff --git a/test/harness/replayingCapiProxy.ts b/test/harness/replayingCapiProxy.ts index b48a5b50..3c16e857 100644 --- a/test/harness/replayingCapiProxy.ts +++ b/test/harness/replayingCapiProxy.ts @@ -94,6 +94,32 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { toolResultNormalizers: [], }; + // If test requested tool binary overrides, add normalizers for each + if ((config as any).toolBinaryOverrides) { + const overrides = (config as any).toolBinaryOverrides as Record< + string, + { data: string; type?: string; mimeType?: string } + >; + for (const [toolName, override] of Object.entries(overrides)) { + const data = override.data; + const type = override.type ?? "base64"; + const mimeType = override.mimeType ?? "image/png"; + + this.addToolResultNormalizer(toolName, (content) => { + try { + const obj = JSON.parse(content); + if (!obj.binaryResultsForLlm) { + obj.binaryResultsForLlm = [{ data, type, mimeType }]; + } + return JSON.stringify(obj); + } catch { + // If content isn't JSON, leave it unchanged + return content; + } + }); + } + } + this.clearExchanges(); await this.loadStoredData(); } @@ -149,7 +175,9 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { options.requestOptions.path?.startsWith("/stop") && options.requestOptions.method === "POST" ) { - const skipWritingCache = options.requestOptions.path.includes("skipWritingCache=true"); + const skipWritingCache = options.requestOptions.path.includes( + "skipWritingCache=true", + ); options.onResponseStart(200, {}); options.onResponseEnd(); await this.stop(skipWritingCache); diff --git a/test/snapshots/tools/can_return_binary_result.yaml b/test/snapshots/tools/can_return_binary_result.yaml new file mode 100644 index 00000000..dd504f53 --- /dev/null +++ b/test/snapshots/tools/can_return_binary_result.yaml @@ -0,0 +1,20 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use get_image. What color is the square in the image? + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: get_image + arguments: "{}" + - role: tool + tool_call_id: toolcall_0 + content: '{"image":{"encoding":"base64","mimeType":"image/png","source":"binaryResultsForLlm[0]"},"binaryResultsForLlm":[{"data":"iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAIAAAD91JpzAAAADklEQVR4nGP4/5/h/38GABkAA/0k+7UAAAAASUVORK5CYII=","type":"base64","mimeType":"image/png"}]}' + - role: assistant + content: The square in the image is **yellow**.