Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@ bash build-macos.sh

This compiles `libGgmlOps.dylib` with Metal GPU support. The build output is automatically copied to the application's output directory.

### Build the native GGML library (Linux)

The Linux script builds a CPU-only `libGgmlOps.so`:

```bash
cd TensorSharp.GGML.Native
bash build-linux.sh
```


## Usage

### Console Application
Expand Down
16 changes: 12 additions & 4 deletions TensorSharp.GGML.Native/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
cmake_minimum_required(VERSION 3.20)
project(GgmlOps LANGUAGES C CXX OBJC OBJCXX)
if(APPLE)
project(GgmlOps LANGUAGES C CXX OBJC OBJCXX)
else()
project(GgmlOps LANGUAGES C CXX)
endif()

set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
Expand All @@ -19,9 +23,13 @@ add_compile_definitions(GGML_VERSION=\"0.0.0\" GGML_COMMIT=\"unknown\")
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
set(GGML_BACKEND_DL OFF CACHE BOOL "" FORCE)
set(GGML_CPU ON CACHE BOOL "" FORCE)
set(GGML_METAL ON CACHE BOOL "" FORCE)
set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE)
set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE)
if(APPLE)
set(GGML_METAL ON CACHE BOOL "" FORCE)
set(GGML_METAL_NDEBUG ON CACHE BOOL "" FORCE)
set(GGML_METAL_EMBED_LIBRARY ON CACHE BOOL "" FORCE)
else()
set(GGML_METAL OFF CACHE BOOL "" FORCE)
endif()
set(GGML_CUDA OFF CACHE BOOL "" FORCE)
set(GGML_HIP OFF CACHE BOOL "" FORCE)
set(GGML_VULKAN OFF CACHE BOOL "" FORCE)
Expand Down
8 changes: 8 additions & 0 deletions TensorSharp.GGML.Native/build-linux.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BUILD_DIR="${SCRIPT_DIR}/build-linux"

cmake -S "${SCRIPT_DIR}" -B "${BUILD_DIR}" -DCMAKE_BUILD_TYPE=Release
cmake --build "${BUILD_DIR}" --config Release --target GgmlOps
105 changes: 63 additions & 42 deletions TensorSharp.GGML.Native/ggml_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,36 @@ namespace
ggml_backend_tensor_set(binding.storage, data, 0, size);
}

bool is_pointer_aligned_for_backend(ggml_backend_t backend, const void* ptr)
{
if (backend == nullptr || ptr == nullptr)
return false;
std::size_t alignment = ggml_backend_get_alignment(backend);
if (alignment == 0)
alignment = GGML_MEM_ALIGN;
return (reinterpret_cast<std::uintptr_t>(ptr) % alignment) == 0;
}

bool try_create_host_ptr_buffer(
ggml_backend_t backend,
ggml_backend_dev_t dev,
void* data,
std::size_t raw_bytes,
ggml_backend_buffer_t& out_buffer)
{
out_buffer = nullptr;
if (backend == nullptr || dev == nullptr || data == nullptr || raw_bytes == 0)
return false;
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
if (!props.caps.buffer_from_host_ptr)
return false;
if (!is_pointer_aligned_for_backend(backend, data))
return false;
out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, data, raw_bytes, raw_bytes);
return out_buffer != nullptr;
}

// Create a binding that uses host ptr directly as Metal shared memory (zero host-device copies on Apple Silicon).
// Returns empty binding on failure. Caller must keep buffer_handle alive until compute completes.
bool create_binding_from_host_ptr_2d(
Expand All @@ -886,13 +916,9 @@ namespace
{
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (dev == nullptr) return false;
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
if (!props.caps.buffer_from_host_ptr) return false;

std::size_t raw_bytes = static_cast<std::size_t>(desc.raw_bytes);
out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes);
if (out_buffer == nullptr) return false;
if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false;

ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast<std::int64_t>(sizeof(float)));
if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; }
Expand All @@ -917,13 +943,9 @@ namespace
{
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (dev == nullptr) return false;
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
if (!props.caps.buffer_from_host_ptr) return false;

std::size_t raw_bytes = static_cast<std::size_t>(desc.raw_bytes);
out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes);
if (out_buffer == nullptr) return false;
if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false;

ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast<std::int64_t>(sizeof(float)));
if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; }
Expand All @@ -947,13 +969,9 @@ namespace
{
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (dev == nullptr) return false;
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
if (!props.caps.buffer_from_host_ptr) return false;

std::size_t raw_bytes = static_cast<std::size_t>(desc.raw_bytes);
out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes);
if (out_buffer == nullptr) return false;
if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false;

ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast<std::int64_t>(sizeof(float)));
if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; }
Expand All @@ -979,13 +997,9 @@ namespace
{
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (dev == nullptr) return false;
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
if (!props.caps.buffer_from_host_ptr) return false;

std::size_t raw_bytes = static_cast<std::size_t>(desc.raw_bytes);
out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes);
if (out_buffer == nullptr) return false;
if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false;

ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast<std::int64_t>(sizeof(float)));
if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; }
Expand All @@ -1011,13 +1025,9 @@ namespace
{
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (dev == nullptr) return false;
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
if (!props.caps.buffer_from_host_ptr) return false;

std::size_t raw_bytes = static_cast<std::size_t>(desc.raw_bytes);
out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes);
if (out_buffer == nullptr) return false;
if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false;

ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, raw_bytes / static_cast<std::int64_t>(sizeof(float)));
if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; }
Expand All @@ -1044,15 +1054,11 @@ namespace
{
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
if (dev == nullptr) return false;
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
if (!props.caps.buffer_from_host_ptr) return false;

std::size_t raw_bytes = static_cast<std::size_t>(desc.element_count) * sizeof(float);
if (raw_bytes == 0) return false;

out_buffer = ggml_backend_dev_buffer_from_host_ptr(dev, desc.data, raw_bytes, raw_bytes);
if (out_buffer == nullptr) return false;
if (!try_create_host_ptr_buffer(backend, dev, desc.data, raw_bytes, out_buffer)) return false;

ggml_tensor* base = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, desc.element_count);
if (base == nullptr) { ggml_backend_buffer_free(out_buffer); out_buffer = nullptr; return false; }
Expand Down Expand Up @@ -1359,7 +1365,16 @@ namespace
{
ggml_backend_buffer_t buf = nullptr;
if (!create_binding_from_host_ptr_2d(context.value, g_backend, m1_desc, m1_binding, buf))
{
// Zero-copy requires both result and m1 bindings to succeed.
// If m1 cannot be host-mapped (e.g., alignment constraints), fall back both tensors
// to regular backend-managed buffers to keep upload/download logic consistent.
use_zero_copy = false;
result_binding = create_standard_binding(context.value, result_desc);
m1_binding = can_map_standard_view(m1_desc)
? create_standard_binding(context.value, m1_desc)
: create_packed_standard_binding(context.value, m1_desc, packed_m1);
}
else
host_ptr_buffers.emplace_back(buf);
}
Expand Down Expand Up @@ -1399,9 +1414,12 @@ namespace
}
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, m2_quant.data,
static_cast<std::size_t>(m2_quant.raw_bytes),
static_cast<std::size_t>(m2_quant.raw_bytes));
(void)try_create_host_ptr_buffer(
g_backend,
dev,
m2_quant.data,
static_cast<std::size_t>(m2_quant.raw_bytes),
buf);
if (buf != nullptr)
g_host_buffer_cache[m2_quant.data] = {buf, static_cast<std::size_t>(m2_quant.raw_bytes)};
}
Expand Down Expand Up @@ -1558,9 +1576,12 @@ namespace
}
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, src_quant.data,
static_cast<std::size_t>(src_quant.raw_bytes),
static_cast<std::size_t>(src_quant.raw_bytes));
(void)try_create_host_ptr_buffer(
g_backend,
dev,
src_quant.data,
static_cast<std::size_t>(src_quant.raw_bytes),
buf);
if (buf != nullptr)
g_host_buffer_cache[src_quant.data] = {buf, static_cast<std::size_t>(src_quant.raw_bytes)};
}
Expand Down Expand Up @@ -7018,14 +7039,14 @@ namespace
}
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes);
(void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf);
if (buf != nullptr)
g_host_buffer_cache[data] = {buf, bytes};
}
}
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes);
(void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf);
if (buf != nullptr)
ephemeral_bufs.emplace_back(buf);
}
Expand Down Expand Up @@ -7393,14 +7414,14 @@ TSG_EXPORT int TSGgml_TransformerModelDecode(
buf = it->second.buffer;
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes);
(void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf);
if (buf != nullptr)
g_host_buffer_cache[data] = {buf, bytes};
}
}
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes);
(void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf);
if (buf != nullptr)
ephemeral_bufs.emplace_back(buf);
}
Expand Down Expand Up @@ -7987,14 +8008,14 @@ TSG_EXPORT int TSGgml_Gemma4ModelDecode(
buf = it->second.buffer;
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes);
(void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf);
if (buf != nullptr)
g_host_buffer_cache[data] = {buf, bytes};
}
}
else
{
buf = ggml_backend_dev_buffer_from_host_ptr(dev, data, bytes, bytes);
(void)try_create_host_ptr_buffer(g_backend, dev, data, bytes, buf);
if (buf != nullptr)
ephemeral_bufs.emplace_back(buf);
}
Expand Down
9 changes: 9 additions & 0 deletions TensorSharp.GGML/TensorSharp.GGML.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,19 @@
<Target Name="BuildGgmlNative" BeforeTargets="BeforeBuild" Condition="$([MSBuild]::IsOSPlatform('OSX')) And !Exists('$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build/libGgmlOps.dylib')">
<Exec Command="bash &quot;$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build-macos.sh&quot;" />
</Target>
<Target Name="BuildGgmlNativeLinux" BeforeTargets="BeforeBuild" Condition="$([MSBuild]::IsOSPlatform('Linux')) And !Exists('$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build-linux/libGgmlOps.so')">
<Exec Command="bash &quot;$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build-linux.sh&quot;" />
</Target>
<Target Name="CopyGgmlNativeBinary" AfterTargets="Build" Condition="$([MSBuild]::IsOSPlatform('OSX'))">
<ItemGroup>
<GgmlNativeBinary Include="$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build/libGgmlOps.dylib" />
</ItemGroup>
<Copy SourceFiles="@(GgmlNativeBinary)" DestinationFolder="$(OutputPath)" SkipUnchangedFiles="true" />
</Target>
<Target Name="CopyGgmlNativeBinaryLinux" AfterTargets="Build" Condition="$([MSBuild]::IsOSPlatform('Linux'))">
<ItemGroup>
<GgmlNativeBinaryLinux Include="$(MSBuildProjectDirectory)/../TensorSharp.GGML.Native/build-linux/libGgmlOps.so" />
</ItemGroup>
<Copy SourceFiles="@(GgmlNativeBinaryLinux)" DestinationFolder="$(OutputPath)" SkipUnchangedFiles="true" />
</Target>
</Project>
10 changes: 10 additions & 0 deletions readme_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@ bash build-macos.sh

该过程会编译带 Metal GPU 支持的 `libGgmlOps.dylib`。构建产物会自动复制到应用输出目录。

### 构建原生 GGML 库(Linux)

Linux 脚本会编译 CPU-only 的 `libGgmlOps.so`:

```bash
cd TensorSharp.GGML.Native
bash build-linux.sh
```


## 使用方法

### 控制台应用
Expand Down