From 02e39c5f230f3e08aeeab845df9114a7ab6d9114 Mon Sep 17 00:00:00 2001 From: Zhongkai Fu Date: Sun, 5 Apr 2026 12:41:09 -0700 Subject: [PATCH 1/4] Add Linux build script for TensorSharp.GGML.Native --- README.md | 10 ++++++++++ TensorSharp.GGML.Native/CMakeLists.txt | 16 ++++++++++++---- TensorSharp.GGML.Native/build-linux.sh | 8 ++++++++ readme_cn.md | 10 ++++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100755 TensorSharp.GGML.Native/build-linux.sh diff --git a/README.md b/README.md index b560812..0ee4dc7 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,16 @@ bash build-macos.sh This compiles `libGgmlOps.dylib` with Metal GPU support. The build output is automatically copied to the application's output directory. +### Build the native GGML library (Linux) + +The Linux script builds a CPU-only `libGgmlOps.so`: + +```bash +cd TensorSharp.GGML.Native +bash build-linux.sh +``` + + ## Usage ### Console Application diff --git a/TensorSharp.GGML.Native/CMakeLists.txt b/TensorSharp.GGML.Native/CMakeLists.txt index 04d51db..8bbe330 100644 --- a/TensorSharp.GGML.Native/CMakeLists.txt +++ b/TensorSharp.GGML.Native/CMakeLists.txt @@ -1,5 +1,9 @@ cmake_minimum_required(VERSION 3.20) -project(GgmlOps LANGUAGES C CXX OBJC OBJCXX) +if(APPLE) + project(GgmlOps LANGUAGES C CXX OBJC OBJCXX) +else() + project(GgmlOps LANGUAGES C CXX) +endif() set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -19,9 +23,13 @@ add_compile_definitions(GGML_VERSION=\"0.0.0\" GGML_COMMIT=\"unknown\") set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) set(GGML_BACKEND_DL OFF CACHE BOOL "" FORCE) set(GGML_CPU ON CACHE BOOL "" FORCE) -set(GGML_METAL ON CACHE BOOL "" FORCE) -set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE) -set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE) +if(APPLE) + set(GGML_METAL ON CACHE BOOL "" FORCE) + set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE) + set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE) +else() + set(GGML_METAL OFF CACHE BOOL "" FORCE) +endif() set(GGML_CUDA OFF CACHE BOOL "" FORCE) set(GGML_HIP OFF CACHE BOOL "" FORCE) set(GGML_VULKAN OFF CACHE BOOL "" FORCE) diff --git a/TensorSharp.GGML.Native/build-linux.sh b/TensorSharp.GGML.Native/build-linux.sh new file mode 100755 index 0000000..0b1990d --- /dev/null +++ b/TensorSharp.GGML.Native/build-linux.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +BUILD_DIR="${SCRIPT_DIR}/build-linux" + +cmake -S "${SCRIPT_DIR}" -B "${BUILD_DIR}" -DCMAKE_BUILD_TYPE=Release +cmake --build "${BUILD_DIR}" --config Release --target GgmlOps diff --git a/readme_cn.md b/readme_cn.md index b4715a9..964214f 100644 --- a/readme_cn.md +++ b/readme_cn.md @@ -105,6 +105,16 @@ bash build-macos.sh 该过程会编译带 Metal GPU 支持的 `libGgmlOps.dylib`。构建产物会自动复制到应用输出目录。 +### 构建原生 GGML 库(Linux) + +Linux 脚本会编译 CPU-only 的 `libGgmlOps.so`: + +```bash +cd TensorSharp.GGML.Native +bash build-linux.sh +``` + + ## 使用方法 ### 控制台应用 From b29ddba74a7e3a8edf0bd11f2f2e0ff54304d0d5 Mon Sep 17 00:00:00 2001 From: Zhongkai Fu Date: Sun, 5 Apr 2026 19:56:05 -0700 Subject: [PATCH 2/4] Build and copy GGML native library on Linux --- TensorSharp.GGML/TensorSharp.GGML.csproj | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/TensorSharp.GGML/TensorSharp.GGML.csproj b/TensorSharp.GGML/TensorSharp.GGML.csproj index 20dfe3e..f398a4f 100644 --- a/TensorSharp.GGML/TensorSharp.GGML.csproj +++ b/TensorSharp.GGML/TensorSharp.GGML.csproj @@ -28,10 +28,19 @@ + + + + + + + + + From 4cf4a38d77d0ecb75ff5670f8753147945753383 Mon Sep 17 00:00:00 2001 From: Zhongkai Fu Date: Sun, 5 Apr 2026 20:18:33 -0700 Subject: [PATCH 3/4] Guard host-pointer GGML buffers with alignment checks --- TensorSharp.GGML.Native/ggml_ops.cpp | 96 ++++++++++++++++------------ 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/TensorSharp.GGML.Native/ggml_ops.cpp b/TensorSharp.GGML.Native/ggml_ops.cpp index 13fb21a..35d255a 100644 --- a/TensorSharp.GGML.Native/ggml_ops.cpp +++ b/TensorSharp.GGML.Native/ggml_ops.cpp @@ -875,6 +875,36 @@ namespace ggml_backend_tensor_set(binding.storage, data, 0, size); } + bool is_pointer_aligned_for_backend(ggml_backend_t backend, const void* ptr) + { + if (backend == nullptr || ptr == nullptr) + return false; + std::size_t alignment = ggml_backend_get_alignment(backend); + if (alignment == 0) + alignment = GGML_MEM_ALIGN; + return (reinterpret_cast(ptr) % alignment) == 0; + } + + bool try_create_host_ptr_buffer( + ggml_backend_t backend, + ggml_backend_dev_t dev, + void* data, + std::size_t raw_bytes, + ggml_backend_buffer_t& out_buffer) + { + out_buffer = nullptr; + if (backend == nullptr || dev == nullptr || data == nullptr || raw_bytes == 0) + return false; + ggml_backend_dev_props props; + ggml_backend_dev_get_props(dev, &props); + if (!props.caps.buffer_from_host_ptr) + return false; + if (!is_pointer_aligned_for_backend(backend, data)) + return false; + out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, data, raw_bytes, raw_bytes); + return out_buffer != nullptr; + } + // Create a binding that uses host ptr directly as Metal shared memory (zero host-device copies on Apple Silicon). // Returns empty binding on failure. Caller must keep buffer_handle alive until compute completes. bool create_binding_from_host_ptr_2d( @@ -886,13 +916,9 @@ namespace { ggml_backend_dev_t dev = ggml_backend_get_device(backend); if (dev == nullptr) return false; - ggml_backend_dev_props props; - ggml_backend_dev_get_props(dev, &props); - if (!props.caps.buffer_from_host_ptr) return false; std::size_t raw_bytes = static_cast(desc.raw_bytes); - out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes); - if (out_buffer == nullptr) return false; + if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false; ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast(sizeof(float))); if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; } @@ -917,13 +943,9 @@ namespace { ggml_backend_dev_t dev = ggml_backend_get_device(backend); if (dev == nullptr) return false; - ggml_backend_dev_props props; - ggml_backend_dev_get_props(dev, &props); - if (!props.caps.buffer_from_host_ptr) return false; std::size_t raw_bytes = static_cast(desc.raw_bytes); - out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes); - if (out_buffer == nullptr) return false; + if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false; ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast(sizeof(float))); if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; } @@ -947,13 +969,9 @@ namespace { ggml_backend_dev_t dev = ggml_backend_get_device(backend); if (dev == nullptr) return false; - ggml_backend_dev_props props; - ggml_backend_dev_get_props(dev, &props); - if (!props.caps.buffer_from_host_ptr) return false; std::size_t raw_bytes = static_cast(desc.raw_bytes); - out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes); - if (out_buffer == nullptr) return false; + if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false; ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast(sizeof(float))); if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; } @@ -979,13 +997,9 @@ namespace { ggml_backend_dev_t dev = ggml_backend_get_device(backend); if (dev == nullptr) return false; - ggml_backend_dev_props props; - ggml_backend_dev_get_props(dev, &props); - if (!props.caps.buffer_from_host_ptr) return false; std::size_t raw_bytes = static_cast(desc.raw_bytes); - out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes); - if (out_buffer == nullptr) return false; + if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false; ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast(sizeof(float))); if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; } @@ -1011,13 +1025,9 @@ namespace { ggml_backend_dev_t dev = ggml_backend_get_device(backend); if (dev == nullptr) return false; - ggml_backend_dev_props props; - ggml_backend_dev_get_props(dev, &props); - if (!props.caps.buffer_from_host_ptr) return false; std::size_t raw_bytes = static_cast(desc.raw_bytes); - out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes); - if (out_buffer == nullptr) return false; + if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false; ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast(sizeof(float))); if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; } @@ -1044,15 +1054,11 @@ namespace { ggml_backend_dev_t dev = ggml_backend_get_device(backend); if (dev == nullptr) return false; - ggml_backend_dev_props props; - ggml_backend_dev_get_props(dev, &props); - if (!props.caps.buffer_from_host_ptr) return false; std::size_t raw_bytes = static_cast(desc.element_count) * sizeof(float); if (raw_bytes == 0) return false; - out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes); - if (out_buffer == nullptr) return false; + if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false; ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, desc.element_count); if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; } @@ -1399,9 +1405,12 @@ namespace } else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, m2_quant.data, - static_cast(m2_quant.raw_bytes), - static_cast(m2_quant.raw_bytes)); + (void)try_create_host_ptr_buffer( + g_backend, + dev, + m2_quant.data, + static_cast(m2_quant.raw_bytes), + buf); if (buf != nullptr) g_host_buffer_cache[m2_quant.data] = {buf, static_cast(m2_quant.raw_bytes)}; } @@ -1558,9 +1567,12 @@ namespace } else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, src_quant.data, - static_cast(src_quant.raw_bytes), - static_cast(src_quant.raw_bytes)); + (void)try_create_host_ptr_buffer( + g_backend, + dev, + src_quant.data, + static_cast(src_quant.raw_bytes), + buf); if (buf != nullptr) g_host_buffer_cache[src_quant.data] = {buf, static_cast(src_quant.raw_bytes)}; } @@ -7018,14 +7030,14 @@ namespace } else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes); + (void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf); if (buf != nullptr) g_host_buffer_cache[data] = {buf, bytes}; } } else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes); + (void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf); if (buf != nullptr) ephemeral_bufs.emplace_back(buf); } @@ -7393,14 +7405,14 @@ TSG_EXPORT int TSGgml_TransformerModelDecode( buf = it->second.buffer; else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes); + (void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf); if (buf != nullptr) g_host_buffer_cache[data] = {buf, bytes}; } } else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes); + (void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf); if (buf != nullptr) ephemeral_bufs.emplace_back(buf); } @@ -7987,14 +7999,14 @@ TSG_EXPORT int TSGgml_Gemma4ModelDecode( buf = it->second.buffer; else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes); + (void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf); if (buf != nullptr) g_host_buffer_cache[data] = {buf, bytes}; } } else { - buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes); + (void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf); if (buf != nullptr) ephemeral_bufs.emplace_back(buf); } From 51c58623862f51f85c99ad7b02b4f483477e88dd Mon Sep 17 00:00:00 2001 From: Zhongkai Fu Date: Sun, 5 Apr 2026 20:27:23 -0700 Subject: [PATCH 4/4] Fix addmm_quant fallback when zero-copy m1 binding fails --- TensorSharp.GGML.Native/ggml_ops.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/TensorSharp.GGML.Native/ggml_ops.cpp b/TensorSharp.GGML.Native/ggml_ops.cpp index 35d255a..5f75cdf 100644 --- a/TensorSharp.GGML.Native/ggml_ops.cpp +++ b/TensorSharp.GGML.Native/ggml_ops.cpp @@ -1365,7 +1365,16 @@ namespace { ggml_backend_buffer_t buf = nullptr; if (!create_binding_from_host_ptr_2d(context.value, g_backend, m1_desc, m1_binding, buf)) + { + // Zero-copy requires both result and m1 bindings to succeed. + // If m1 cannot be host-mapped (e.g., alignment constraints), fall back both tensors + // to regular backend-managed buffers to keep upload/download logic consistent. use_zero_copy = false; + result_binding = create_standard_binding(context.value, result_desc); + m1_binding = can_map_standard_view(m1_desc) + ? create_standard_binding(context.value, m1_desc) + : create_packed_standard_binding(context.value, m1_desc, packed_m1); + } else host_ptr_buffers.emplace_back(buf); }