Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion InferenceEngine/ModelBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public enum BackendType
Cpu,
GgmlCpu,
GgmlMetal,
GgmlCuda,
}

public class ModelConfig
Expand Down Expand Up @@ -124,6 +125,10 @@ protected ModelBase(string ggufPath, BackendType backend)
_ggmlContext = new GgmlContext(new[] { 0 }, GgmlBackendType.Metal);
_allocator = new GgmlAllocator(_ggmlContext, 0);
break;
case BackendType.GgmlCuda:
_ggmlContext = new GgmlContext(new[] { 0 }, GgmlBackendType.Cuda);
_allocator = new GgmlAllocator(_ggmlContext, 0);
break;
case BackendType.Cpu:
_allocator = new CpuAllocator(BlasEnum.DotNet);
break;
Expand All @@ -135,7 +140,7 @@ protected ModelBase(string ggufPath, BackendType backend)
_gguf = new GgufFile(ggufPath);
}

protected bool IsGgmlBackend => _backend == BackendType.GgmlCpu || _backend == BackendType.GgmlMetal;
protected bool IsGgmlBackend => _backend == BackendType.GgmlCpu || _backend == BackendType.GgmlMetal || _backend == BackendType.GgmlCuda;

protected void ParseBaseConfig()
{
Expand Down
19 changes: 14 additions & 5 deletions TensorSharp.GGML.Native/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.20)
project(GgmlOps LANGUAGES C CXX OBJC OBJCXX)
project(GgmlOps LANGUAGES C CXX)

set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
Expand All @@ -19,10 +19,11 @@ add_compile_definitions(GGML_VERSION=\"0.0.0\" GGML_COMMIT=\"unknown\")
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
set(GGML_BACKEND_DL OFF CACHE BOOL "" FORCE)
set(GGML_CPU ON CACHE BOOL "" FORCE)
set(GGML_METAL ON CACHE BOOL "" FORCE)
set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE)
set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE)
set(GGML_CUDA OFF CACHE BOOL "" FORCE)
set(GGML_METAL OFF CACHE BOOL "" FORCE)
set(GGML_METAL_NDEBUG OFF CACHE BOOL "" FORCE)
set(GGML_METAL_EMBED_LIBRARY OFF CACHE BOOL "" FORCE)
option(TSG_ENABLE_CUDA "Build ggml with CUDA backend support" OFF)
set(GGML_CUDA ${TSG_ENABLE_CUDA} CACHE BOOL "" FORCE)
set(GGML_HIP OFF CACHE BOOL "" FORCE)
set(GGML_VULKAN OFF CACHE BOOL "" FORCE)
set(GGML_OPENCL OFF CACHE BOOL "" FORCE)
Expand All @@ -39,6 +40,14 @@ set(GGML_ALL_WARNINGS OFF CACHE BOOL "" FORCE)
set(GGML_FATAL_WARNINGS OFF CACHE BOOL "" FORCE)
set(GGML_CCACHE OFF CACHE BOOL "" FORCE)

if(APPLE)
enable_language(OBJC)
enable_language(OBJCXX)
set(GGML_METAL ON CACHE BOOL "" FORCE)
set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE)
set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE)
endif()

add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../ExternalProjects/ggml/src" ggml-build)

add_library(GgmlOps SHARED ggml_ops.cpp)
Expand Down
81 changes: 81 additions & 0 deletions TensorSharp.GGML.Native/build-linux.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BUILD_TYPE="${BUILD_TYPE:-Release}"
PLATFORM="${PLATFORM:-$(uname -m)}"
ENABLE_CUDA="${TSG_ENABLE_CUDA:-auto}"
BUILD_DIR="${SCRIPT_DIR}/build/${PLATFORM}"

has_cuda_toolchain() {
if command -v nvcc >/dev/null 2>&1; then
return 0
fi

if [[ -x "/usr/local/cuda/bin/nvcc" ]]; then
return 0
fi

if command -v nvidia-smi >/dev/null 2>&1; then
return 0
fi

if command -v ldconfig >/dev/null 2>&1 && ldconfig -p 2>/dev/null | grep -q "libcudart\\.so"; then
return 0
fi

return 1
}

normalize_cuda_toggle() {
local value="${1,,}"
case "${value}" in
1|on|true|yes) echo "ON" ;;
0|off|false|no) echo "OFF" ;;
auto)
if has_cuda_toolchain; then
echo "ON"
else
echo "OFF"
fi
;;
*)
echo "Invalid CUDA toggle '${1}'. Use ON/OFF/auto." >&2
exit 1
;;
esac
}

while [[ $# -gt 0 ]]; do
case "$1" in
--platform)
PLATFORM="${2:?missing platform value}"
BUILD_DIR="${SCRIPT_DIR}/build/${PLATFORM}"
shift 2
;;
--cuda)
ENABLE_CUDA="${2:?missing cuda value}"
shift 2
;;
--build-type)
BUILD_TYPE="${2:?missing build type value}"
shift 2
;;
*)
echo "Unknown argument: $1" >&2
exit 1
;;
esac
done

ENABLE_CUDA="$(normalize_cuda_toggle "${ENABLE_CUDA}")"
echo "Building GgmlOps for platform='${PLATFORM}', buildType='${BUILD_TYPE}', cuda='${ENABLE_CUDA}'."

mkdir -p "${BUILD_DIR}"
cmake -S "${SCRIPT_DIR}" -B "${BUILD_DIR}" \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
-DTSG_ENABLE_CUDA="${ENABLE_CUDA}"
cmake --build "${BUILD_DIR}" --config "${BUILD_TYPE}" --target GgmlOps

# Keep a stable path for managed probing/copy steps.
cp -f "${BUILD_DIR}/libGgmlOps.so" "${SCRIPT_DIR}/build/libGgmlOps.so"
25 changes: 23 additions & 2 deletions TensorSharp.GGML.Native/ggml_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
#include "ggml-metal.h"
#include "ggml-cpu.h"
#include "ggml-quants.h"
#if defined(__has_include)
#if __has_include("ggml-cuda.h")
#include "ggml-cuda.h"
#define TSG_GGML_HAS_CUDA_BACKEND 1
#endif
#endif

// GGML context memory pool: reuse mem_buffers to avoid per-op allocation overhead
namespace ggml_pool
Expand Down Expand Up @@ -265,6 +271,7 @@ namespace

constexpr int BACKEND_TYPE_METAL = 1;
constexpr int BACKEND_TYPE_CPU = 2;
constexpr int BACKEND_TYPE_CUDA = 3;

void initialize_backend()
{
Expand All @@ -288,6 +295,20 @@ namespace
return;
}
}
else if (g_backend_type == BACKEND_TYPE_CUDA)
{
#if defined(TSG_GGML_HAS_CUDA_BACKEND)
g_backend = ggml_backend_cuda_init(0);
if (g_backend == nullptr)
{
set_last_error("ggml-cuda backend initialization failed.");
return;
}
#else
set_last_error("ggml-cuda backend is not compiled into this native bridge.");
return;
#endif
}
else
{
set_last_error("Unknown GGML backend type requested.");
Expand All @@ -299,7 +320,7 @@ namespace

bool ensure_backend(int backend_type)
{
if (backend_type != BACKEND_TYPE_METAL && backend_type != BACKEND_TYPE_CPU)
if (backend_type != BACKEND_TYPE_METAL && backend_type != BACKEND_TYPE_CPU && backend_type != BACKEND_TYPE_CUDA)
{
set_last_error("Invalid GGML backend type.");
return false;
Expand All @@ -321,7 +342,7 @@ namespace

bool ensure_backend()
{
const int backend_type = (g_backend_type == 0) ? BACKEND_TYPE_METAL : g_backend_type;
const int backend_type = (g_backend_type == 0) ? BACKEND_TYPE_CPU : g_backend_type;
return ensure_backend(backend_type);
}

Expand Down
7 changes: 6 additions & 1 deletion TensorSharp.GGML/GgmlAllocator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ public GgmlAllocator(GgmlContext context, int deviceId)
this.deviceId = deviceId;
}

public BlasEnum BlasEnum => context.BackendType == GgmlBackendType.Metal ? BlasEnum.GGML_METAL : BlasEnum.GGML_CPU;
public BlasEnum BlasEnum => context.BackendType switch
{
GgmlBackendType.Metal => BlasEnum.GGML_METAL,
GgmlBackendType.Cuda => BlasEnum.CUDA,
_ => BlasEnum.GGML_CPU,
};

public int DeviceId => deviceId;

Expand Down
37 changes: 31 additions & 6 deletions TensorSharp.GGML/GgmlNative.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public enum GgmlBackendType
{
Metal = 1,
Cpu = 2,
Cuda = 3,
}

[StructLayout(LayoutKind.Sequential)]
Expand Down Expand Up @@ -532,7 +533,12 @@ public static void EnsureAvailable(GgmlBackendType backendType)
{
if (TSGgml_IsBackendAvailable((int)backendType) == 0)
{
string backendName = backendType == GgmlBackendType.Metal ? "ggml-metal" : "ggml-cpu";
string backendName = backendType switch
{
GgmlBackendType.Metal => "ggml-metal",
GgmlBackendType.Cuda => "ggml-cuda",
_ => "ggml-cpu",
};
throw new InvalidOperationException($"Failed to initialize {backendName}. {GetLastErrorMessage("Build the native GGML bridge and ensure the requested GGML backend is available.")}");
}
}
Expand Down Expand Up @@ -938,13 +944,32 @@ private static IntPtr ImportResolver(string libraryName, Assembly assembly, DllI
private static IEnumerable<string> GetCandidatePaths(Assembly assembly)
{
string baseDirectory = AppContext.BaseDirectory;
yield return Path.Combine(baseDirectory, "libGgmlOps.dylib");
yield return Path.Combine(Path.GetDirectoryName(assembly.Location) ?? baseDirectory, "libGgmlOps.dylib");
foreach (string nativeLibraryName in GetNativeLibraryFileNames())
{
yield return Path.Combine(baseDirectory, nativeLibraryName);
yield return Path.Combine(Path.GetDirectoryName(assembly.Location) ?? baseDirectory, nativeLibraryName);

foreach (string root in EnumerateRepoRoots(baseDirectory))
{
yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", nativeLibraryName);
yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", "Release", nativeLibraryName);
}
}
}

foreach (string root in EnumerateRepoRoots(baseDirectory))
private static IEnumerable<string> GetNativeLibraryFileNames()
{
if (OperatingSystem.IsWindows())
{
yield return "GgmlOps.dll";
}
else if (OperatingSystem.IsMacOS())
{
yield return "libGgmlOps.dylib";
}
else
{
yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", "libGgmlOps.dylib");
yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", "Release", "libGgmlOps.dylib");
yield return "libGgmlOps.so";
}
}

Expand Down
22 changes: 18 additions & 4 deletions TensorSharp.GGML/TensorSharp.GGML.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,26 @@
<ProjectReference Include="..\AdvUtils\AdvUtils.csproj" />
<ProjectReference Include="..\TensorSharp\TensorSharp.csproj" />
</ItemGroup>
<Target Name="BuildGgmlNative" BeforeTargets="BeforeBuild" Condition="$([MSBuild]::IsOSPlatform('OSX')) And !Exists('$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build/libGgmlOps.dylib')">
<Exec Command="bash &quot;$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build-macos.sh&quot;" />

<PropertyGroup>
<GgmlNativeProjectDir>$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native</GgmlNativeProjectDir>
<GgmlNativeBuildPlatform Condition="'$(Platform)' != '' and '$(Platform)' != 'AnyCPU'">$(Platform)</GgmlNativeBuildPlatform>
<GgmlNativeBuildPlatform Condition="'$(GgmlNativeBuildPlatform)' == ''">x64</GgmlNativeBuildPlatform>
<GgmlNativeLibraryName Condition="$([MSBuild]::IsOSPlatform('OSX'))">libGgmlOps.dylib</GgmlNativeLibraryName>
<GgmlNativeLibraryName Condition="$([MSBuild]::IsOSPlatform('Linux'))">libGgmlOps.so</GgmlNativeLibraryName>
</PropertyGroup>

<Target Name="BuildGgmlNativeMac" BeforeTargets="BeforeBuild" Condition="$([MSBuild]::IsOSPlatform('OSX')) And !Exists('$(GgmlNativeProjectDir)/build/$(GgmlNativeLibraryName)')">
<Exec Command="bash &quot;$(GgmlNativeProjectDir)/build-macos.sh&quot;" />
</Target>

<Target Name="BuildGgmlNativeLinux" BeforeTargets="BeforeBuild" Condition="$([MSBuild]::IsOSPlatform('Linux')) And !Exists('$(GgmlNativeProjectDir)/build/$(GgmlNativeLibraryName)')">
<Exec Command="bash &quot;$(GgmlNativeProjectDir)/build-linux.sh&quot; --platform &quot;$(GgmlNativeBuildPlatform)&quot;" />
</Target>
<Target Name="CopyGgmlNativeBinary" AfterTargets="Build" Condition="$([MSBuild]::IsOSPlatform('OSX'))">

<Target Name="CopyGgmlNativeBinary" AfterTargets="Build" Condition="'$(GgmlNativeLibraryName)' != '' And Exists('$(GgmlNativeProjectDir)/build/$(GgmlNativeLibraryName)')">
<ItemGroup>
<GgmlNativeBinary Include="$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build/libGgmlOps.dylib" />
<GgmlNativeBinary Include="$(GgmlNativeProjectDir)/build/$(GgmlNativeLibraryName)" />
</ItemGroup>
<Copy SourceFiles="@(GgmlNativeBinary)" DestinationFolder="$(OutputPath)" SkipUnchangedFiles="true" />
</Target>
Expand Down