Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions samples/cs/GettingStarted/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
<PropertyGroup>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
<OnnxRuntimeGenAIVersion>0.11.2</OnnxRuntimeGenAIVersion>
<OnnxRuntimeVersion>1.23.2</OnnxRuntimeVersion>
<OnnxRuntimeVersion>1.23.2</OnnxRuntimeVersion>
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="Microsoft.AI.Foundry.Local" Version="0.8.2.1" />
<PackageVersion Include="Microsoft.AI.Foundry.Local.WinML" Version="0.8.2.1" />
<PackageVersion Include="Microsoft.ML.OnnxRuntime.Gpu" Version="$(OnnxRuntimeVersion)" />
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="$(OnnxRuntimeGenAIVersion)" />
<PackageVersion Include="Microsoft.Extensions.Logging" Version="9.0.10" />
<PackageVersion Include="OpenAI" Version="2.5.0" />
<PackageVersion Include="OpenAI" Version="2.8.0" />
</ItemGroup>
</Project>
</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using Microsoft.AI.Foundry.Local;
using OpenAI.Chat;
using System.ClientModel;

/// <summary>
/// A simple, derived <see cref="ChatClient"/> that applies a custom <see cref="System.ClientModel.Primitives.PipelineTransport"/>
/// to redirect traffic via Foundry Local Model CoreInterop.
/// </summary>
/// <remarks>
/// For externally-applied demonstration, this derived client is made public. In an integrated form, Foundry Local could instead
/// return the parent <see cref="ChatClient"/> type from the OpenAI library and use this type internally for the concrete instance.
/// </remarks>
public class FoundryLocalChatClient : ChatClient
{
public FoundryLocalChatClient(Model foundryLocalModel)
: base(foundryLocalModel.Id, new ApiKeyCredential("placeholder"), CreateClientOptions(foundryLocalModel))
{
}

private static OpenAI.OpenAIClientOptions CreateClientOptions(Model foundryLocalModel)
{
return new()
{
Transport = new FoundryLocalPipelineTransport(foundryLocalModel)
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
using Microsoft.AI.Foundry.Local;
using Microsoft.AI.Foundry.Local.Detail;
using System.ClientModel;
using System.ClientModel.Primitives;
using System.Reflection;

/// <summary>
/// A custom <see cref="PipelineTransport"/> that conducts pipeline traffic via Foundry Local CoreInterop instead of HTTP
/// network traffic.
/// </summary>
/// <remarks>
/// As written, this relies on Reflection for access to non-public CoreInterop fields and methods. In an integrated form,
/// CoreInterop could instead be used directly via the <see cref="Model"/>'s CoreInterop instance (or another CoreInterop
/// instance).
/// </remarks>
internal class FoundryLocalPipelineTransport : PipelineTransport
{
private readonly FoundryLocalInteropWrapper _interopWrapper;

public FoundryLocalPipelineTransport(Model foundryLocalModel)
{
_interopWrapper = new(foundryLocalModel);
}

protected override PipelineMessage CreateMessageCore()
{
return new FoundryLocalPipelineMessage(new FoundryLocalPipelineRequest());
}

protected override void ProcessCore(PipelineMessage message)
{
if (message is FoundryLocalPipelineMessage foundryLocalMessage)
{
BinaryData interopResultBytes = _interopWrapper.ExecuteInteropChat(message);
foundryLocalMessage.SetResponse(new FoundryLocalPipelineResponse(interopResultBytes));
}
else
{
throw new NotImplementedException();
}
}

protected override async ValueTask ProcessCoreAsync(PipelineMessage message)
{
if (message is FoundryLocalPipelineMessage foundryLocalMessage)
{
BinaryData interopResultBytes = await _interopWrapper.ExecuteInteropChatAsync(message);
foundryLocalMessage.SetResponse(new FoundryLocalPipelineResponse(interopResultBytes));
}
else
{
throw new NotImplementedException();
}
}

private class FoundryLocalPipelineRequestHeaders : PipelineRequestHeaders
{
public override void Add(string name, string value) => throw new NotImplementedException();
public override IEnumerator<KeyValuePair<string, string>> GetEnumerator() => throw new NotImplementedException();
public override bool Remove(string name) => throw new NotImplementedException();

public override void Set(string name, string value)
{
}

public override bool TryGetValue(string name, out string? value)
{
value = null;
return false;
}

public override bool TryGetValues(string name, out IEnumerable<string>? values) => throw new NotImplementedException();
}

private class FoundryLocalPipelineRequest : PipelineRequest
{
protected override string MethodCore { get; set; } = "POST";
protected override Uri? UriCore { get; set; }
protected override PipelineRequestHeaders HeadersCore { get; } = new FoundryLocalPipelineRequestHeaders();
protected override BinaryContent? ContentCore { get; set; }

public override void Dispose()
{
}
}

private class FoundryLocalPipelineResponse : PipelineResponse
{
public FoundryLocalPipelineResponse(BinaryData interopResultBytes)
{
ContentStream = interopResultBytes.ToStream();
}

public override int Status => 200;
public override string ReasonPhrase => throw new NotImplementedException();
public override Stream? ContentStream { get; set; }

public override BinaryData Content
{
get
{
if (_content is null && ContentStream is not null)
{
_content = BinaryData.FromStream(ContentStream);
ContentStream.Position = 0;
}
return _content ??= BinaryData.Empty;
}
}
private BinaryData? _content;

protected override PipelineResponseHeaders HeadersCore => throw new NotImplementedException();
public override BinaryData BufferContent(CancellationToken cancellationToken = default) => Content;
public override ValueTask<BinaryData> BufferContentAsync(CancellationToken cancellationToken = default) => ValueTask.FromResult(Content);
public override void Dispose()
{
}
}

private class FoundryLocalPipelineMessage : PipelineMessage
{
public FoundryLocalPipelineMessage(PipelineRequest request)
: base(request)
{
}

public void SetResponse(FoundryLocalPipelineResponse response)
{
Response = response;
}
}

private class FoundryLocalInteropWrapper
{
private readonly object _coreInteropField;
private readonly MethodInfo _executeCommandInfo;
private readonly MethodInfo _executeCommandAsyncInfo;

public FoundryLocalInteropWrapper(Model foundryLocalModel)
{
_coreInteropField = typeof(ModelVariant)
.GetField("_coreInterop", BindingFlags.Instance | BindingFlags.NonPublic)
?.GetValue(foundryLocalModel.SelectedVariant)
?? throw new InvalidOperationException();
_executeCommandInfo = _coreInteropField
.GetType()
?.GetMethod("ExecuteCommand", BindingFlags.Instance | BindingFlags.Public, [typeof(string), typeof(CoreInteropRequest)])
?? throw new InvalidOperationException();
_executeCommandAsyncInfo = _coreInteropField
.GetType()
?.GetMethod("ExecuteCommandAsync", BindingFlags.Instance | BindingFlags.Public, [typeof(string), typeof(CoreInteropRequest), typeof(CancellationToken?)])
?? throw new InvalidOperationException();
}

public BinaryData ExecuteInteropChat(PipelineMessage? message)
{
if (message is FoundryLocalPipelineMessage foundryLocalMessage
&& message?.Request?.Content is BinaryContent requestContent)
{
CoreInteropRequest interopRequest = CreateInteropRequest(requestContent);
object reflectedInteropResult = _executeCommandInfo.Invoke(_coreInteropField, ["chat_completions", interopRequest]) ?? throw new InvalidOperationException();
return GetInteropResultBytes(reflectedInteropResult);
}
else
{
throw new NotImplementedException();
}
}

public async Task<BinaryData> ExecuteInteropChatAsync(PipelineMessage? message)
{
if (message is FoundryLocalPipelineMessage foundryLocalMessage
&& message?.Request?.Content is BinaryContent requestContent)
{
CoreInteropRequest interopRequest = CreateInteropRequest(requestContent);
dynamic interopResultTask = _executeCommandAsyncInfo.Invoke(_coreInteropField, ["chat_completions", interopRequest, message.CancellationToken]) ?? throw new InvalidOperationException();
await interopResultTask;
object reflectedInteropResult = interopResultTask.GetType().GetProperty("Result").GetValue(interopResultTask);
return GetInteropResultBytes(reflectedInteropResult);
}
else
{
throw new NotImplementedException();
}
}

private static CoreInteropRequest CreateInteropRequest(BinaryContent content)
{
using MemoryStream contentStream = new();
content.WriteTo(contentStream);
contentStream.Flush();
contentStream.Position = 0;
using StreamReader contentReader = new(contentStream);
string rawContent = contentReader.ReadToEnd();

return new CoreInteropRequest()
{
Params = new()
{
["OpenAICreateRequest"] = rawContent
}
};
}

private static BinaryData GetInteropResultBytes(object reflectedInteropResult)
{
object? reflectedData = reflectedInteropResult
?.GetType()
?.GetField("Data", BindingFlags.Instance | BindingFlags.NonPublic)
?.GetValue(reflectedInteropResult);
if (reflectedData is string rawReflectedData)
{
return BinaryData.FromString(rawReflectedData);
}
return BinaryData.Empty;
}
}
}
39 changes: 39 additions & 0 deletions samples/cs/GettingStarted/src/OfficialLibrarySdkDemo/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using Microsoft.AI.Foundry.Local;
using OpenAI.Chat;

Configuration appConfiguration = new()
{
AppName = "foundry_local_samples",
LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
};
await FoundryLocalManager.CreateAsync(appConfiguration, Utils.GetAppLogger());

// Ensure that any Execution Provider (EP) downloads run and are completed.
// EP packages include dependencies and may be large.
// Download is only required again if a new version of the EP is released.
// For cross platform builds there is no dynamic EP download and this will return immediately.
await Utils.RunWithSpinner("Registering execution providers", FoundryLocalManager.Instance.EnsureEpsDownloadedAsync());

// Use the model catalog to find a model via alias and download it, if not already cached
ICatalog modelCatalog = await FoundryLocalManager.Instance.GetCatalogAsync();
Model model = await modelCatalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
await model.DownloadAsync(progress =>
{
Console.Write($"\rDownloading model: {progress:F2}%");
if (progress >= 100f)
{
Console.WriteLine();
}
});

Console.Write($"Loading model {model.Id}...");
await model.LoadAsync();
Console.WriteLine("done.");

// Use chat completions with the official OpenAI library, via a Foundry Local derived client
ChatClient localChatClient = new FoundryLocalChatClient(model);
ChatCompletion localCompletion = localChatClient.CompleteChat("Say hello!");
Console.WriteLine(localCompletion.Content[0].Text);

// Tidy up - unload the model
await model.UnloadAsync();
10 changes: 10 additions & 0 deletions samples/cs/GettingStarted/windows/FoundrySamplesWinML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{8EC462FD
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ModelManagementExample", "ModelManagementExample\ModelManagementExample.csproj", "{6BBA4217-6798-4629-AF27-6526FCC5FA5B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OfficialLibrarySdkDemo", "OfficialLibrarySdkDemo\OfficialLibrarySdkDemo.csproj", "{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|ARM64 = Debug|ARM64
Expand Down Expand Up @@ -58,6 +60,14 @@ Global
{6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Release|ARM64.Build.0 = Release|Any CPU
{6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Release|x64.ActiveCfg = Release|x64
{6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Release|x64.Build.0 = Release|x64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Debug|ARM64.ActiveCfg = Debug|ARM64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Debug|ARM64.Build.0 = Debug|ARM64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Debug|x64.ActiveCfg = Debug|x64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Debug|x64.Build.0 = Debug|x64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Release|ARM64.ActiveCfg = Release|ARM64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Release|ARM64.Build.0 = Release|ARM64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Release|x64.ActiveCfg = Release|x64
{C5ACDF0F-B380-0967-3DAD-7D15A8A8674F}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<!-- For Windows use the following -->
<TargetFramework>net10.0-windows10.0.26100</TargetFramework>
<WindowsAppSDKSelfContained>false</WindowsAppSDKSelfContained>
<Platforms>ARM64;x64</Platforms>
<WindowsPackageType>None</WindowsPackageType>
<EnableCoreMrtTooling>false</EnableCoreMrtTooling>
</PropertyGroup>

<ItemGroup>
<Compile Include="../../src/OfficialLibrarySdkDemo/Program.cs" />
<Compile Include="../../src/OfficialLibrarySdkDemo/FoundryLocalChatClient.cs" />
<Compile Include="../../src/OfficialLibrarySdkDemo/FoundryLocalPipelineTransport.cs" />
<Compile Include="../../src/Shared/*.cs" />
</ItemGroup>


<!-- Use WinML package for local Foundry SDK on Windows -->
<ItemGroup>
<PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
<PackageReference Include="OpenAI" />
</ItemGroup>

<!-- On Publish: Exclude superfluous ORT and IHV libs -->
<Import Project="../../ExcludeExtraLibs.props" />

</Project>