From 827824b669d3eb59b4639e834dba34ac1409a2b8 Mon Sep 17 00:00:00 2001 From: hsinhoyeh Date: Sat, 7 Mar 2026 20:43:16 +0800 Subject: [PATCH] fix: fix flags used in llama.cpp and whisper.cpp --- README.md | 145 ++++++++++++++++++++++++---------- ggml/llamacpp/llamacpp.go | 3 +- ggml/whispercpp/whispercpp.go | 3 +- 3 files changed, 108 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 23e2068..1f809a3 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,30 @@ Go bindings for C++ inference frameworks via CGO, with prebuilt static libraries for zero-dependency builds. +## Why go-nativeml? + +| Approach | Build complexity | Runtime dependency | `go mod vendor` | GPU support | +|----------|------------------|--------------------|-----------------|-------------| +| **go-nativeml (this project)** | `go build -tags llamacpp` — just works | None — static linking | Works via `go:embed` | Metal, CPU | +| HTTP/subprocess wrapper (e.g. ollama server) | Separate process to manage | Running server required | N/A | Depends on server | +| Dynamic linking (shared `.so`/`.dylib`) | Must install libs on every machine | Shared libs must exist at runtime | Cannot vendor native libs | Depends on build | +| Build from source at `go get` | Requires C++ toolchain + cmake on every machine | None | Fragile — source download at build | Depends on build | +| Pure Go reimplementation | Simple | None | Works | Limited/none | + +**Key advantages:** + +- **Zero build-time setup** — prebuilt `.a` files ship with the Go module. No cmake, no C++ toolchain, no downloads. +- **Vendoring works** — `embed.go` files use `//go:embed` to ensure `go mod vendor` captures headers and static libraries. Standard Go tooling just works. +- **No runtime dependencies** — everything is statically linked. No shared libraries to install, no server to run. +- **Stub fallback** — without build tags, all packages compile to stubs returning errors. CI, linters, and `go build ./...` work everywhere without CGO. +- **Type-safe Go API** — idiomatic option pattern, proper error handling, streaming callbacks. No shell-outs or HTTP round-trips. + ## Supported Frameworks | Framework | Version | Package | Build Tag | Capabilities | Status | |-----------|---------|---------|-----------|--------------|--------| | [llama.cpp](https://github.com/ggerganov/llama.cpp) | `b8220` | `ggml/llamacpp` | `llamacpp` | Text generation, embeddings, tokenization | Available | -| [whisper.cpp](https://github.com/ggerganov/whisper.cpp) | `v1.8.3` | `ggml/whispercpp` | `whispercpp` | Speech-to-text | Planned | +| [whisper.cpp](https://github.com/ggerganov/whisper.cpp) | `v1.8.3` | `ggml/whispercpp` | `whispercpp` | Speech-to-text transcription | Available | ## Quick Start @@ -15,6 +33,8 @@ Go bindings for C++ inference frameworks via CGO, with prebuilt static libraries go get github.com/footprintai/go-nativeml ``` +### llama.cpp — Text Generation + ```go import "github.com/footprintai/go-nativeml/ggml/llamacpp" @@ -34,12 +54,31 @@ ctx.GenerateStream("Hello, world", func(token string) bool { }, llamacpp.WithMaxTokens(256), llamacpp.WithTemperature(0.8)) ``` +### whisper.cpp — Speech-to-Text + +```go +import "github.com/footprintai/go-nativeml/ggml/whispercpp" + +model, _ := whispercpp.LoadModel("ggml-base.bin", whispercpp.WithGPU(true)) +defer model.Close() + +// pcmData: 16kHz mono float32 samples +segments, _ := model.Transcribe(pcmData, + whispercpp.WithLanguage("en"), + whispercpp.WithThreads(4), +) +for _, seg := range segments { + fmt.Printf("[%s -> %s] %s\n", seg.Start, seg.End, seg.Text) +} +``` + ## Build Tags | Tag | Behavior | |-----|----------| | _(none)_ | Stub implementations that return errors. Allows `go build` without CGO. | | `llamacpp` | Enables CGO bindings to prebuilt llama.cpp static libraries. | +| `whispercpp` | Enables CGO bindings to prebuilt whisper.cpp static libraries. | ```bash # Stub build (no CGO required) @@ -47,42 +86,37 @@ go build ./... # CGO build with llama.cpp CGO_ENABLED=1 go build -tags llamacpp ./... + +# CGO build with whisper.cpp +CGO_ENABLED=1 go build -tags whispercpp ./... + +# Both +CGO_ENABLED=1 go build -tags "llamacpp whispercpp" ./... ``` ## API -### Lifecycle +### llamacpp ```go -llamacpp.Init() // initialize backend -llamacpp.Shutdown() // cleanup -``` - -### Model +// Lifecycle +llamacpp.Init() +llamacpp.Shutdown() -```go -model, err := llamacpp.LoadModel(path, - llamacpp.WithGPULayers(n), // layers to offload to GPU -) +// Model +model, err := llamacpp.LoadModel(path, llamacpp.WithGPULayers(n)) model.Close() -model.EmbeddingSize() // returns embedding dimension -``` - -### Context +model.EmbeddingSize() -```go +// Context ctx, err := model.NewContext( llamacpp.WithContextSize(2048), llamacpp.WithThreads(4), - llamacpp.WithEmbeddings(), // enable embedding mode + llamacpp.WithEmbeddings(), ) ctx.Close() -``` -### Generation - -```go -// Blocking +// Generation (blocking) text, err := ctx.Generate(prompt, llamacpp.WithMaxTokens(256), llamacpp.WithTemperature(0.8), @@ -93,24 +127,45 @@ text, err := ctx.Generate(prompt, llamacpp.WithSeed(42), ) -// Streaming +// Generation (streaming) err := ctx.GenerateStream(prompt, func(token string) bool { fmt.Print(token) return true // return false to cancel }, llamacpp.WithMaxTokens(256)) -``` -### Embeddings +// Embeddings +embeddings, err := ctx.GetEmbeddings("some text") // []float32 -```go -ctx, _ := model.NewContext(llamacpp.WithContextSize(512), llamacpp.WithEmbeddings()) -embeddings, err := ctx.GetEmbeddings("some text") // []float32 +// Tokenization +tokens, err := ctx.Tokenize("some text") // []int ``` -### Tokenization +### whispercpp ```go -tokens, err := ctx.Tokenize("some text") // []int +// Model +model, err := whispercpp.LoadModel(path, + whispercpp.WithGPU(true), + whispercpp.WithFlashAttention(true), +) +model.Close() +model.IsMultilingual() + +// Transcription (pcmData: 16kHz mono float32) +segments, err := model.Transcribe(pcmData, + whispercpp.WithThreads(4), + whispercpp.WithLanguage("en"), + whispercpp.WithTranslate(false), + whispercpp.WithTimestamps(true), + whispercpp.WithTokenTimestamps(false), + whispercpp.WithSingleSegment(false), + whispercpp.WithTemperature(0.0), + whispercpp.WithMaxTokens(0), + whispercpp.WithPrompt(""), +) + +// Utilities +id := whispercpp.LangID("en") // language string -> ID ``` ## Examples @@ -151,21 +206,29 @@ make clean # Remove temp build dirs ## Adding New Platforms -1. Build llama.cpp static libraries for the target platform -2. Place `.a` files in `third_party/llama.cpp/prebuilt/-/` -3. Add a `#cgo , LDFLAGS` directive in `ggml/llamacpp/llamacpp.go` +1. Build static libraries for the target platform +2. Place `.a` files in `ggml//third_party/prebuilt/-/` +3. Add a `#cgo , LDFLAGS` directive in the corresponding `.go` file ## Project Structure ``` -ggml/llamacpp/ Go bindings for llama.cpp - llamacpp.go CGO implementation (build tag: llamacpp) - llamacpp_stub.go Stub implementation (default) - options.go Option builders for model, context, generation - wrapper.h/.cpp C++ bridge to llama.cpp APIs - bridge.c CGO callback adapter -third_party/llama.cpp/ Upstream headers + prebuilt static libraries -examples/ Usage examples (generate, embeddings) +ggml/ + llamacpp/ Go bindings for llama.cpp + llamacpp.go CGO implementation (build tag: llamacpp) + llamacpp_stub.go Stub implementation (default) + options.go Option builders + wrapper.h/.cpp C++ bridge to llama.cpp APIs + bridge.c CGO callback adapter + embed.go go:embed for vendoring support + third_party/ Upstream headers + prebuilt .a files + whispercpp/ Go bindings for whisper.cpp + whispercpp.go CGO implementation (build tag: whispercpp) + whispercpp_stub.go Stub implementation (default) + options.go Option builders + embed.go go:embed for vendoring support + third_party/ Upstream headers + prebuilt .a files +examples/ Usage examples (generate, embeddings) ``` ## License diff --git a/ggml/llamacpp/llamacpp.go b/ggml/llamacpp/llamacpp.go index 854e5f8..8e40dcb 100644 --- a/ggml/llamacpp/llamacpp.go +++ b/ggml/llamacpp/llamacpp.go @@ -14,7 +14,8 @@ package llamacpp #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/darwin-amd64 #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64 #cgo LDFLAGS: -lcommon -lllama -lggml-cpu -lggml-base -lggml -lstdc++ -lm -#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation +#cgo darwin LDFLAGS: -lggml-blas -lggml-metal -L/usr/local/opt/libomp/lib -L/opt/homebrew/opt/libomp/lib -lomp -framework Accelerate -framework Metal -framework Foundation +#cgo linux LDFLAGS: -lpthread -ldl -lrt -lgomp #include #include #include "wrapper.h" diff --git a/ggml/whispercpp/whispercpp.go b/ggml/whispercpp/whispercpp.go index 783bbcb..477c143 100644 --- a/ggml/whispercpp/whispercpp.go +++ b/ggml/whispercpp/whispercpp.go @@ -14,7 +14,8 @@ package whispercpp #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/darwin-amd64 #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64 #cgo LDFLAGS: -lwhisper -lcommon -lggml-cpu -lggml-base -lggml -lstdc++ -lm -#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation +#cgo darwin LDFLAGS: -lggml-blas -lggml-metal -L/usr/local/opt/libomp/lib -L/opt/homebrew/opt/libomp/lib -lomp -framework Accelerate -framework Metal -framework Foundation +#cgo linux LDFLAGS: -lpthread -ldl -lrt -lgomp #include #include "whisper.h" */