diff --git a/InferenceEngine/ModelBase.cs b/InferenceEngine/ModelBase.cs index 4e96d7e..20e23c0 100644 --- a/InferenceEngine/ModelBase.cs +++ b/InferenceEngine/ModelBase.cs @@ -24,6 +24,7 @@ public enum BackendType Cpu, GgmlCpu, GgmlMetal, + GgmlCuda, } public class ModelConfig @@ -124,6 +125,10 @@ protected ModelBase(string ggufPath, BackendType backend) _ggmlContext = new GgmlContext(new[] { 0 }, GgmlBackendType.Metal); _allocator = new GgmlAllocator(_ggmlContext, 0); break; + case BackendType.GgmlCuda: + _ggmlContext = new GgmlContext(new[] { 0 }, GgmlBackendType.Cuda); + _allocator = new GgmlAllocator(_ggmlContext, 0); + break; case BackendType.Cpu: _allocator = new CpuAllocator(BlasEnum.DotNet); break; @@ -135,7 +140,7 @@ protected ModelBase(string ggufPath, BackendType backend) _gguf = new GgufFile(ggufPath); } - protected bool IsGgmlBackend => _backend == BackendType.GgmlCpu || _backend == BackendType.GgmlMetal; + protected bool IsGgmlBackend => _backend == BackendType.GgmlCpu || _backend == BackendType.GgmlMetal || _backend == BackendType.GgmlCuda; protected void ParseBaseConfig() { diff --git a/TensorSharp.GGML.Native/CMakeLists.txt b/TensorSharp.GGML.Native/CMakeLists.txt index 04d51db..5461211 100644 --- a/TensorSharp.GGML.Native/CMakeLists.txt +++ b/TensorSharp.GGML.Native/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.20) -project(GgmlOps LANGUAGES C CXX OBJC OBJCXX) +project(GgmlOps LANGUAGES C CXX) set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -19,10 +19,11 @@ add_compile_definitions(GGML_VERSION=\"0.0.0\" GGML_COMMIT=\"unknown\") set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) set(GGML_BACKEND_DL OFF CACHE BOOL "" FORCE) set(GGML_CPU ON CACHE BOOL "" FORCE) -set(GGML_METAL ON CACHE BOOL "" FORCE) -set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE) -set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE) -set(GGML_CUDA OFF CACHE BOOL "" FORCE) +set(GGML_METAL OFF CACHE BOOL "" FORCE) +set(GGML_METAL_NDEBUG OFF CACHE BOOL "" FORCE) +set(GGML_METAL_EMBED_LIBRARY OFF CACHE BOOL "" FORCE) +option(TSG_ENABLE_CUDA "Build ggml with CUDA backend support" OFF) +set(GGML_CUDA ${TSG_ENABLE_CUDA} CACHE BOOL "" FORCE) set(GGML_HIP OFF CACHE BOOL "" FORCE) set(GGML_VULKAN OFF CACHE BOOL "" FORCE) set(GGML_OPENCL OFF CACHE BOOL "" FORCE) @@ -39,6 +40,14 @@ set(GGML_ALL_WARNINGS OFF CACHE BOOL "" FORCE) set(GGML_FATAL_WARNINGS OFF CACHE BOOL "" FORCE) set(GGML_CCACHE OFF CACHE BOOL "" FORCE) +if(APPLE) + enable_language(OBJC) + enable_language(OBJCXX) + set(GGML_METAL ON CACHE BOOL "" FORCE) + set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE) + set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE) +endif() + add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../ExternalProjects/ggml/src" ggml-build) add_library(GgmlOps SHARED ggml_ops.cpp) diff --git a/TensorSharp.GGML.Native/build-linux.sh b/TensorSharp.GGML.Native/build-linux.sh new file mode 100755 index 0000000..177ca2f --- /dev/null +++ b/TensorSharp.GGML.Native/build-linux.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BUILD_TYPE="${BUILD_TYPE:-Release}" +PLATFORM="${PLATFORM:-$(uname -m)}" +ENABLE_CUDA="${TSG_ENABLE_CUDA:-auto}" +BUILD_DIR="${SCRIPT_DIR}/build/${PLATFORM}" + +has_cuda_toolchain() { + if command -v nvcc >/dev/null 2>&1; then + return 0 + fi + + if [[ -x "/usr/local/cuda/bin/nvcc" ]]; then + return 0 + fi + + if command -v nvidia-smi >/dev/null 2>&1; then + return 0 + fi + + if command -v ldconfig >/dev/null 2>&1 && ldconfig -p 2>/dev/null | grep -q "libcudart\\.so"; then + return 0 + fi + + return 1 +} + +normalize_cuda_toggle() { + local value="${1,,}" + case "${value}" in + 1|on|true|yes) echo "ON" ;; + 0|off|false|no) echo "OFF" ;; + auto) + if has_cuda_toolchain; then + echo "ON" + else + echo "OFF" + fi + ;; + *) + echo "Invalid CUDA toggle '${1}'. Use ON/OFF/auto." >&2 + exit 1 + ;; + esac +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --platform) + PLATFORM="${2:?missing platform value}" + BUILD_DIR="${SCRIPT_DIR}/build/${PLATFORM}" + shift 2 + ;; + --cuda) + ENABLE_CUDA="${2:?missing cuda value}" + shift 2 + ;; + --build-type) + BUILD_TYPE="${2:?missing build type value}" + shift 2 + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac +done + +ENABLE_CUDA="$(normalize_cuda_toggle "${ENABLE_CUDA}")" +echo "Building GgmlOps for platform='${PLATFORM}', buildType='${BUILD_TYPE}', cuda='${ENABLE_CUDA}'." + +mkdir -p "${BUILD_DIR}" +cmake -S "${SCRIPT_DIR}" -B "${BUILD_DIR}" \ + -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \ + -DTSG_ENABLE_CUDA="${ENABLE_CUDA}" +cmake --build "${BUILD_DIR}" --config "${BUILD_TYPE}" --target GgmlOps + +# Keep a stable path for managed probing/copy steps. +cp -f "${BUILD_DIR}/libGgmlOps.so" "${SCRIPT_DIR}/build/libGgmlOps.so" diff --git a/TensorSharp.GGML.Native/ggml_ops.cpp b/TensorSharp.GGML.Native/ggml_ops.cpp index 13fb21a..5435b8d 100644 --- a/TensorSharp.GGML.Native/ggml_ops.cpp +++ b/TensorSharp.GGML.Native/ggml_ops.cpp @@ -24,6 +24,12 @@ #include "ggml-metal.h" #include "ggml-cpu.h" #include "ggml-quants.h" +#if defined(__has_include) +#if __has_include("ggml-cuda.h") +#include "ggml-cuda.h" +#define TSG_GGML_HAS_CUDA_BACKEND 1 +#endif +#endif // GGML context memory pool: reuse mem_buffers to avoid per-op allocation overhead namespace ggml_pool @@ -265,6 +271,7 @@ namespace constexpr int BACKEND_TYPE_METAL = 1; constexpr int BACKEND_TYPE_CPU = 2; + constexpr int BACKEND_TYPE_CUDA = 3; void initialize_backend() { @@ -288,6 +295,20 @@ namespace return; } } + else if (g_backend_type == BACKEND_TYPE_CUDA) + { +#if defined(TSG_GGML_HAS_CUDA_BACKEND) + g_backend = ggml_backend_cuda_init(0); + if (g_backend == nullptr) + { + set_last_error("ggml-cuda backend initialization failed."); + return; + } +#else + set_last_error("ggml-cuda backend is not compiled into this native bridge."); + return; +#endif + } else { set_last_error("Unknown GGML backend type requested."); @@ -299,7 +320,7 @@ namespace bool ensure_backend(int backend_type) { - if (backend_type != BACKEND_TYPE_METAL && backend_type != BACKEND_TYPE_CPU) + if (backend_type != BACKEND_TYPE_METAL && backend_type != BACKEND_TYPE_CPU && backend_type != BACKEND_TYPE_CUDA) { set_last_error("Invalid GGML backend type."); return false; @@ -321,7 +342,7 @@ namespace bool ensure_backend() { - const int backend_type = (g_backend_type == 0) ? BACKEND_TYPE_METAL : g_backend_type; + const int backend_type = (g_backend_type == 0) ? BACKEND_TYPE_CPU : g_backend_type; return ensure_backend(backend_type); } diff --git a/TensorSharp.GGML/GgmlAllocator.cs b/TensorSharp.GGML/GgmlAllocator.cs index 5169dd7..4bc340d 100644 --- a/TensorSharp.GGML/GgmlAllocator.cs +++ b/TensorSharp.GGML/GgmlAllocator.cs @@ -23,7 +23,12 @@ public GgmlAllocator(GgmlContext context, int deviceId) this.deviceId = deviceId; } - public BlasEnum BlasEnum => context.BackendType == GgmlBackendType.Metal ? BlasEnum.GGML_METAL : BlasEnum.GGML_CPU; + public BlasEnum BlasEnum => context.BackendType switch + { + GgmlBackendType.Metal => BlasEnum.GGML_METAL, + GgmlBackendType.Cuda => BlasEnum.CUDA, + _ => BlasEnum.GGML_CPU, + }; public int DeviceId => deviceId; diff --git a/TensorSharp.GGML/GgmlNative.cs b/TensorSharp.GGML/GgmlNative.cs index c53b76c..98f8f36 100644 --- a/TensorSharp.GGML/GgmlNative.cs +++ b/TensorSharp.GGML/GgmlNative.cs @@ -20,6 +20,7 @@ public enum GgmlBackendType { Metal = 1, Cpu = 2, + Cuda = 3, } [StructLayout(LayoutKind.Sequential)] @@ -532,7 +533,12 @@ public static void EnsureAvailable(GgmlBackendType backendType) { if (TSGgml_IsBackendAvailable((int)backendType) == 0) { - string backendName = backendType == GgmlBackendType.Metal ? "ggml-metal" : "ggml-cpu"; + string backendName = backendType switch + { + GgmlBackendType.Metal => "ggml-metal", + GgmlBackendType.Cuda => "ggml-cuda", + _ => "ggml-cpu", + }; throw new InvalidOperationException($"Failed to initialize {backendName}. {GetLastErrorMessage("Build the native GGML bridge and ensure the requested GGML backend is available.")}"); } } @@ -938,13 +944,32 @@ private static IntPtr ImportResolver(string libraryName, Assembly assembly, DllI private static IEnumerable GetCandidatePaths(Assembly assembly) { string baseDirectory = AppContext.BaseDirectory; - yield return Path.Combine(baseDirectory, "libGgmlOps.dylib"); - yield return Path.Combine(Path.GetDirectoryName(assembly.Location) ?? baseDirectory, "libGgmlOps.dylib"); + foreach (string nativeLibraryName in GetNativeLibraryFileNames()) + { + yield return Path.Combine(baseDirectory, nativeLibraryName); + yield return Path.Combine(Path.GetDirectoryName(assembly.Location) ?? baseDirectory, nativeLibraryName); + + foreach (string root in EnumerateRepoRoots(baseDirectory)) + { + yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", nativeLibraryName); + yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", "Release", nativeLibraryName); + } + } + } - foreach (string root in EnumerateRepoRoots(baseDirectory)) + private static IEnumerable GetNativeLibraryFileNames() + { + if (OperatingSystem.IsWindows()) + { + yield return "GgmlOps.dll"; + } + else if (OperatingSystem.IsMacOS()) + { + yield return "libGgmlOps.dylib"; + } + else { - yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", "libGgmlOps.dylib"); - yield return Path.Combine(root, "TensorSharp.GGML.Native", "build", "Release", "libGgmlOps.dylib"); + yield return "libGgmlOps.so"; } } diff --git a/TensorSharp.GGML/TensorSharp.GGML.csproj b/TensorSharp.GGML/TensorSharp.GGML.csproj index 20dfe3e..a6097e6 100644 --- a/TensorSharp.GGML/TensorSharp.GGML.csproj +++ b/TensorSharp.GGML/TensorSharp.GGML.csproj @@ -25,12 +25,26 @@ - - + + + $(MSBuildProjectDirectory)/../TensorSharp.GGML.Native + $(Platform) + x64 + libGgmlOps.dylib + libGgmlOps.so + + + + + + + + - + + - +