From 5ef6c4efb16b14508114f08c7a8e11423a0a8c36 Mon Sep 17 00:00:00 2001 From: vivganes Date: Wed, 28 Jan 2026 05:33:31 +0530 Subject: [PATCH 1/5] Fix skipped test in dotnet --- dotnet/test/ToolsTests.cs | 7 +-- .../tools/can_return_binary_result.yaml | 46 +++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 test/snapshots/tools/can_return_binary_result.yaml diff --git a/dotnet/test/ToolsTests.cs b/dotnet/test/ToolsTests.cs index 3d7741c9..3e777e45 100644 --- a/dotnet/test/ToolsTests.cs +++ b/dotnet/test/ToolsTests.cs @@ -143,14 +143,15 @@ record City(int CountryId, string CityName, int Population); [JsonSerializable(typeof(DbQueryOptions))] [JsonSerializable(typeof(City[]))] [JsonSerializable(typeof(JsonElement))] + [JsonSerializable(typeof(GitHub.Copilot.SDK.ToolResultAIContent))] private partial class ToolsTestsJsonContext : JsonSerializerContext; - [Fact(Skip = "Behaves as if no content was in the result. Likely that binary results aren't fully implemented yet.")] + [Fact] public async Task Can_Return_Binary_Result() { var session = await Client.CreateSessionAsync(new SessionConfig { - Tools = [AIFunctionFactory.Create(GetImage, "get_image")], + Tools = [AIFunctionFactory.Create(GetImage, "get_image", serializerOptions: ToolsTestsJsonContext.Default.Options)], }); await session.SendAsync(new MessageOptions @@ -161,7 +162,7 @@ await session.SendAsync(new MessageOptions var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); Assert.NotNull(assistantMessage); - Assert.Contains("yellow", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); + Assert.Matches("yellow", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); static ToolResultAIContent GetImage() => new ToolResultAIContent(new() { diff --git a/test/snapshots/tools/can_return_binary_result.yaml b/test/snapshots/tools/can_return_binary_result.yaml new file mode 100644 index 00000000..d25c2200 --- /dev/null +++ b/test/snapshots/tools/can_return_binary_result.yaml @@ -0,0 +1,46 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use get_image. What color is the square in the image? + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Retrieving and analyzing image"}' + - role: assistant + tool_calls: + - id: toolcall_1 + type: function + function: + name: get_image + arguments: "{}" + - messages: + - role: system + content: ${system} + - role: user + content: Use get_image. What color is the square in the image? + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Retrieving and analyzing image"}' + - id: toolcall_1 + type: function + function: + name: get_image + arguments: "{}" + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + - role: assistant + content: The square in the image is **yellow**. From 2d2d7df04ea63f21115c51da785da0628d2fa595 Mon Sep 17 00:00:00 2001 From: vivganes Date: Wed, 28 Jan 2026 05:43:15 +0530 Subject: [PATCH 2/5] fix review comments --- dotnet/test/ToolsTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dotnet/test/ToolsTests.cs b/dotnet/test/ToolsTests.cs index 3e777e45..9e91a19d 100644 --- a/dotnet/test/ToolsTests.cs +++ b/dotnet/test/ToolsTests.cs @@ -162,7 +162,7 @@ await session.SendAsync(new MessageOptions var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); Assert.NotNull(assistantMessage); - Assert.Matches("yellow", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); + Assert.Contains("yellow", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); static ToolResultAIContent GetImage() => new ToolResultAIContent(new() { From 9ffdfc35050c88982c55e6b9346da940d9490add Mon Sep 17 00:00:00 2001 From: vivganes Date: Wed, 28 Jan 2026 18:16:19 +0530 Subject: [PATCH 3/5] Make test deterministic and meaningful --- dotnet/test/Harness/CapiProxy.cs | 11 +++- dotnet/test/Harness/E2ETestBase.cs | 4 +- dotnet/test/Harness/E2ETestContext.cs | 4 +- dotnet/test/ToolsTests.cs | 60 +++++++++++++++---- test/harness/replayingCapiProxy.ts | 30 +++++++++- .../tools/can_return_binary_result.yaml | 28 +-------- 6 files changed, 92 insertions(+), 45 deletions(-) diff --git a/dotnet/test/Harness/CapiProxy.cs b/dotnet/test/Harness/CapiProxy.cs index c5a146ec..ac98aac8 100644 --- a/dotnet/test/Harness/CapiProxy.cs +++ b/dotnet/test/Harness/CapiProxy.cs @@ -111,16 +111,18 @@ public async Task StopAsync(bool skipWritingCache = false) _startupTask = null; } - public async Task ConfigureAsync(string filePath, string workDir) + public async Task ConfigureAsync(string filePath, string workDir, Dictionary? toolBinaryOverrides = null) { var url = await (_startupTask ?? throw new InvalidOperationException("Proxy not started")); using var client = new HttpClient(); - var response = await client.PostAsJsonAsync($"{url}/config", new ConfigureRequest(filePath, workDir), CapiProxyJsonContext.Default.ConfigureRequest); + var response = await client.PostAsJsonAsync($"{url}/config", new ConfigureRequest(filePath, workDir, toolBinaryOverrides), CapiProxyJsonContext.Default.ConfigureRequest); response.EnsureSuccessStatusCode(); } - private record ConfigureRequest(string FilePath, string WorkDir); + private record ConfigureRequest(string FilePath, string WorkDir, Dictionary? ToolBinaryOverrides); + + public async Task> GetExchangesAsync() { @@ -151,6 +153,9 @@ private static string FindRepoRoot() private partial class CapiProxyJsonContext : JsonSerializerContext; } +// Tool binary override payload passed to the proxy via /config +public record ToolBinaryOverride(string Data, string? Type, string? MimeType); + public record ParsedHttpExchange(ChatCompletionRequest Request, ChatCompletionResponse? Response); public record ChatCompletionRequest( diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index 8727e123..859555a6 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -37,7 +37,9 @@ private static string GetTestName(ITestOutputHelper output) public async Task InitializeAsync() { - await Ctx.ConfigureForTestAsync(_snapshotCategory, _testName); + // New ConfigureForTestAsync signature accepts an optional toolBinaryOverrides map. + // Default to null for existing usages and pass the test name explicitly. + await Ctx.ConfigureForTestAsync(_snapshotCategory, null, _testName); } public Task DisposeAsync() => Task.CompletedTask; diff --git a/dotnet/test/Harness/E2ETestContext.cs b/dotnet/test/Harness/E2ETestContext.cs index d9d47a48..b3bb7050 100644 --- a/dotnet/test/Harness/E2ETestContext.cs +++ b/dotnet/test/Harness/E2ETestContext.cs @@ -68,13 +68,13 @@ private static string GetCliPath(string repoRoot) return path; } - public async Task ConfigureForTestAsync(string testFile, [CallerMemberName] string? testName = null) + public async Task ConfigureForTestAsync(string testFile, Dictionary? toolBinaryOverrides = null, [CallerMemberName] string? testName = null) { // Convert test method names to lowercase snake_case for snapshot filenames // to avoid case collisions on case-insensitive filesystems (macOS/Windows) var sanitizedName = Regex.Replace(testName!, @"[^a-zA-Z0-9]", "_").ToLowerInvariant(); var snapshotPath = Path.Combine(_repoRoot, "test", "snapshots", testFile, $"{sanitizedName}.yaml"); - await _proxy.ConfigureAsync(snapshotPath, WorkDir); + await _proxy.ConfigureAsync(snapshotPath, WorkDir, toolBinaryOverrides?.ToDictionary(kv => kv.Key, kv => new ToolBinaryOverride(kv.Value.Data, kv.Value.Type, kv.Value.MimeType))); } public Task> GetExchangesAsync() => _proxy.GetExchangesAsync(); diff --git a/dotnet/test/ToolsTests.cs b/dotnet/test/ToolsTests.cs index 9e91a19d..f02c25cc 100644 --- a/dotnet/test/ToolsTests.cs +++ b/dotnet/test/ToolsTests.cs @@ -149,6 +149,37 @@ private partial class ToolsTestsJsonContext : JsonSerializerContext; [Fact] public async Task Can_Return_Binary_Result() { + const string b64 = "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAIAAAD91JpzAAAADklEQVR4nGP4/5/h/38GABkAA/0k+7UAAAAASUVORK5CYII="; + const string b64Type = "base64"; + const string mime = "image/png"; + + ToolResultAIContent GetImage() + { + var binary = new + { + data = b64, + type = b64Type, + mimeType = mime, + }; + + var textPayload = $"{{\"image\":{{\"mimeType\":\"{binary.mimeType}\",\"encoding\":\"{binary.type}\",\"source\":\"binaryResultsForLlm[0]\"}}}}"; + + return new ToolResultAIContent(new() + { + // make the textual reference available for the LLM (helpful for E2E assertion) + TextResultForLlm = textPayload, + BinaryResultsForLlm = [new() { + Data = binary.data, + Type = binary.type, + MimeType = binary.mimeType, + }], + SessionLog = "Returned an image", + }); + } + + // Reconfigure the proxy for this test to inject the binary payload into the tool result + await Ctx.ConfigureForTestAsync("tools", new() { ["get_image"] = new ToolBinaryOverride(b64, b64Type, mime) }); + var session = await Client.CreateSessionAsync(new SessionConfig { Tools = [AIFunctionFactory.Create(GetImage, "get_image", serializerOptions: ToolsTestsJsonContext.Default.Options)], @@ -162,17 +193,24 @@ await session.SendAsync(new MessageOptions var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); Assert.NotNull(assistantMessage); - Assert.Contains("yellow", assistantMessage!.Data.Content?.ToLowerInvariant() ?? string.Empty); + // Deterministic check: ensure the captured outgoing request references the binary result + var traffic = await Ctx.GetExchangesAsync(); + var lastConversation = traffic[^1]; - static ToolResultAIContent GetImage() => new ToolResultAIContent(new() - { - BinaryResultsForLlm = [new() { - // 2x2 yellow square - Data = "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAIAAAD91JpzAAAADklEQVR4nGP4/5/h/38GABkAA/0k+7UAAAAASUVORK5CYII=", - Type = "base64", - MimeType = "image/png", - }], - SessionLog = "Returned an image", - }); + var toolResults = lastConversation.Request.Messages + .Where(m => m.Role == "tool") + .ToList(); + + Assert.True(toolResults.Count >= 1, "Expected at least one tool result message"); + + var tr = toolResults.Last(); + Assert.False(string.IsNullOrEmpty(tr.Content), "Tool result content should be present as JSON"); + + using var doc = JsonDocument.Parse(tr.Content!); + // The snapshot contains an image object that references the binary result index. + var hasImageSource = doc.RootElement.TryGetProperty("image", out var image) && + image.TryGetProperty("source", out var source) && + source.GetString() == "binaryResultsForLlm[0]"; + Assert.True(hasImageSource, "Tool result JSON should reference binaryResultsForLlm[0]"); } } diff --git a/test/harness/replayingCapiProxy.ts b/test/harness/replayingCapiProxy.ts index b48a5b50..3c16e857 100644 --- a/test/harness/replayingCapiProxy.ts +++ b/test/harness/replayingCapiProxy.ts @@ -94,6 +94,32 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { toolResultNormalizers: [], }; + // If test requested tool binary overrides, add normalizers for each + if ((config as any).toolBinaryOverrides) { + const overrides = (config as any).toolBinaryOverrides as Record< + string, + { data: string; type?: string; mimeType?: string } + >; + for (const [toolName, override] of Object.entries(overrides)) { + const data = override.data; + const type = override.type ?? "base64"; + const mimeType = override.mimeType ?? "image/png"; + + this.addToolResultNormalizer(toolName, (content) => { + try { + const obj = JSON.parse(content); + if (!obj.binaryResultsForLlm) { + obj.binaryResultsForLlm = [{ data, type, mimeType }]; + } + return JSON.stringify(obj); + } catch { + // If content isn't JSON, leave it unchanged + return content; + } + }); + } + } + this.clearExchanges(); await this.loadStoredData(); } @@ -149,7 +175,9 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { options.requestOptions.path?.startsWith("/stop") && options.requestOptions.method === "POST" ) { - const skipWritingCache = options.requestOptions.path.includes("skipWritingCache=true"); + const skipWritingCache = options.requestOptions.path.includes( + "skipWritingCache=true", + ); options.onResponseStart(200, {}); options.onResponseEnd(); await this.stop(skipWritingCache); diff --git a/test/snapshots/tools/can_return_binary_result.yaml b/test/snapshots/tools/can_return_binary_result.yaml index d25c2200..dd504f53 100644 --- a/test/snapshots/tools/can_return_binary_result.yaml +++ b/test/snapshots/tools/can_return_binary_result.yaml @@ -9,38 +9,12 @@ conversations: - role: assistant tool_calls: - id: toolcall_0 - type: function - function: - name: report_intent - arguments: '{"intent":"Retrieving and analyzing image"}' - - role: assistant - tool_calls: - - id: toolcall_1 - type: function - function: - name: get_image - arguments: "{}" - - messages: - - role: system - content: ${system} - - role: user - content: Use get_image. What color is the square in the image? - - role: assistant - tool_calls: - - id: toolcall_0 - type: function - function: - name: report_intent - arguments: '{"intent":"Retrieving and analyzing image"}' - - id: toolcall_1 type: function function: name: get_image arguments: "{}" - role: tool tool_call_id: toolcall_0 - content: Intent logged - - role: tool - tool_call_id: toolcall_1 + content: '{"image":{"encoding":"base64","mimeType":"image/png","source":"binaryResultsForLlm[0]"},"binaryResultsForLlm":[{"data":"iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAIAAAD91JpzAAAADklEQVR4nGP4/5/h/38GABkAA/0k+7UAAAAASUVORK5CYII=","type":"base64","mimeType":"image/png"}]}' - role: assistant content: The square in the image is **yellow**. From 0a936ebcf1fccbc395d34be36d648d8f5a79620d Mon Sep 17 00:00:00 2001 From: vivganes Date: Wed, 28 Jan 2026 18:23:31 +0530 Subject: [PATCH 4/5] fix review comments --- dotnet/test/Harness/CapiProxy.cs | 1 + dotnet/test/Harness/E2ETestContext.cs | 3 ++- dotnet/test/ToolsTests.cs | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dotnet/test/Harness/CapiProxy.cs b/dotnet/test/Harness/CapiProxy.cs index ac98aac8..490d1658 100644 --- a/dotnet/test/Harness/CapiProxy.cs +++ b/dotnet/test/Harness/CapiProxy.cs @@ -149,6 +149,7 @@ private static string FindRepoRoot() [JsonSourceGenerationOptions(JsonSerializerDefaults.Web)] [JsonSerializable(typeof(ConfigureRequest))] + [JsonSerializable(typeof(ToolBinaryOverride))] [JsonSerializable(typeof(List))] private partial class CapiProxyJsonContext : JsonSerializerContext; } diff --git a/dotnet/test/Harness/E2ETestContext.cs b/dotnet/test/Harness/E2ETestContext.cs index b3bb7050..784d2d18 100644 --- a/dotnet/test/Harness/E2ETestContext.cs +++ b/dotnet/test/Harness/E2ETestContext.cs @@ -74,7 +74,8 @@ public async Task ConfigureForTestAsync(string testFile, Dictionary kv.Key, kv => new ToolBinaryOverride(kv.Value.Data, kv.Value.Type, kv.Value.MimeType))); + // Pass through the dictionary directly to avoid unnecessary allocations + await _proxy.ConfigureAsync(snapshotPath, WorkDir, toolBinaryOverrides); } public Task> GetExchangesAsync() => _proxy.GetExchangesAsync(); diff --git a/dotnet/test/ToolsTests.cs b/dotnet/test/ToolsTests.cs index f02c25cc..faf9dc04 100644 --- a/dotnet/test/ToolsTests.cs +++ b/dotnet/test/ToolsTests.cs @@ -169,10 +169,10 @@ ToolResultAIContent GetImage() // make the textual reference available for the LLM (helpful for E2E assertion) TextResultForLlm = textPayload, BinaryResultsForLlm = [new() { - Data = binary.data, - Type = binary.type, - MimeType = binary.mimeType, - }], + Data = binary.data, + Type = binary.type, + MimeType = binary.mimeType, + }], SessionLog = "Returned an image", }); } From bd5308ef62005a19b44781689c5401e7a456bfb1 Mon Sep 17 00:00:00 2001 From: vivganes Date: Wed, 28 Jan 2026 18:24:59 +0530 Subject: [PATCH 5/5] fix extra blank line --- dotnet/test/Harness/CapiProxy.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/dotnet/test/Harness/CapiProxy.cs b/dotnet/test/Harness/CapiProxy.cs index 490d1658..ae07ca09 100644 --- a/dotnet/test/Harness/CapiProxy.cs +++ b/dotnet/test/Harness/CapiProxy.cs @@ -123,7 +123,6 @@ public async Task ConfigureAsync(string filePath, string workDir, Dictionary? ToolBinaryOverrides); - public async Task> GetExchangesAsync() { var url = await (_startupTask ?? throw new InvalidOperationException("Proxy not started"));