seed-hypermedia · juligasa · Feb 19, 2026 · Feb 5, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/.envrc b/.envrc
@@ -43,9 +43,22 @@ grep -qxF "$PATTERN" "$EXCLUDE_FILE" || echo "$PATTERN" >> "$EXCLUDE_FILE"
 # Needed for the Go extension in VS Code to find the right toolchain.
 export GOROOT="$(go env GOROOT)"
 
+# Ensure git submodules are initialized, embedding model is downloaded,
+# and llama.cpp libraries are built. These are idempotent (skip if already done).
+mise run ensure-llama-libs
+mise run ensure-model
+
+# CGO flags for llama.cpp - use source directory where mise builds the libraries.
+export LIBRARY_PATH="$WORKSPACE/backend/util/llama-go"
+export C_INCLUDE_PATH="$WORKSPACE/backend/util/llama-go"
+
 # These variables are defined in a separate file to avoid having to invoke direnv allow
 # every time we change them. The file doesn't allow any scripting for security, only variables.
 dotenv .env.vars
 
 # Optional loading of local env vars.
 dotenv_if_exists .env.local
+
+# GPU acceleration is platform-dependent:
+# - macOS: Metal (always enabled, via backend/util/llama-go/zgpu_darwin.go)
+# - Linux: CPU-only for local dev (Vulkan used in CI/production only)
diff --git a/.github/actions/ci-setup/action.yml b/.github/actions/ci-setup/action.yml
@@ -33,13 +33,79 @@ runs:
         sudo apt-get install -y gcc-12 g++-12
         sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100
         sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100
-        sudo apt-get install -y libgtk-3-dev webkit2gtk-4.0 libayatana-appindicator3-dev librsvg2-dev patchelf rpm libc6 python3 build-essential sqlite3 libsqlite3-dev flatpak flatpak-builder elfutils libnss3 libnspr4 libasound2t64 libnotify4 libpcre3 libpulse0 libxss1 libxtst6 squashfs-tools
+        sudo apt-get install -y libgtk-3-dev webkit2gtk-4.0 libayatana-appindicator3-dev librsvg2-dev patchelf rpm libc6 python3 build-essential sqlite3 libsqlite3-dev flatpak flatpak-builder elfutils libnss3 libnspr4 libasound2t64 libnotify4 libpcre3 libpulse0 libxss1 libxtst6 squashfs-tools cmake libgomp1
         # Snap-related packages temporarily disabled - focusing on flatpak
         # sudo apt-get install -y snapd
         # sudo snap install snapcraft --classic --channel=7.x/stable
         # sudo snap install multipass
       shell: bash
 
+    - name: "Install Vulkan dev packages (Linux)"
+      if: inputs.matrix-os == 'ubuntu-latest'
+      run: |
+        sudo apt-get install -y libvulkan-dev glslc
+      shell: bash
+
+    - name: "Install Vulkan SDK (Windows)"
+      if: inputs.matrix-os == 'windows-2025'
+      shell: powershell
+      run: |
+        $vulkanVersion = "1.4.313.2"
+        curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/$vulkanVersion/windows/vulkansdk-windows-X64-$vulkanVersion.exe"
+        & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
+        Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\$vulkanVersion"
+        Add-Content $env:GITHUB_PATH "C:\VulkanSDK\$vulkanVersion\Bin"
+
+    - name: "Build llama.cpp (Linux)"
+      if: inputs.matrix-os == 'ubuntu-latest'
+      run: |
+        cd backend/util/llama-go
+        BUILD_TYPE=vulkan CMAKE_ARGS="-DBUILD_SHARED_LIBS=OFF" make libbinding.a
+      shell: bash
+
+    - name: "Build llama.cpp (macOS)"
+      if: startsWith(inputs.matrix-os, 'macos')
+      run: |
+        cd backend/util/llama-go
+        BUILD_TYPE=metal CMAKE_ARGS="-DBUILD_SHARED_LIBS=OFF" make libbinding.a
+      shell: bash
+
+    - name: "Build llama.cpp (Windows)"
+      if: inputs.matrix-os == 'windows-2025'
+      shell: bash
+      run: |
+        set -euo pipefail
+        cd backend/util/llama-go
+
+        cmake -G "MinGW Makefiles" -B build -S llama.cpp \
+          -DGGML_VULKAN=ON \
+          -DGGML_OPENMP=OFF \
+          -DBUILD_SHARED_LIBS=OFF \
+          -DLLAMA_CURL=OFF \
+          -DLLAMA_BUILD_TESTS=OFF \
+          -DLLAMA_BUILD_TOOLS=OFF \
+          -DLLAMA_BUILD_EXAMPLES=OFF \
+          -DLLAMA_BUILD_SERVER=OFF \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_C_COMPILER=gcc \
+          -DCMAKE_CXX_COMPILER=g++
+
+        cmake --build build --config Release -j "$(nproc)"
+
+        cp build/src/libllama.a ./libllama.a
+        cp build/ggml/src/ggml.a ./libggml.a
+        cp build/ggml/src/ggml-base.a ./libggml-base.a
+        cp build/ggml/src/ggml-cpu.a ./libggml-cpu.a
+        cp build/ggml/src/ggml-vulkan/ggml-vulkan.a ./libggml-vulkan.a
+        cp build/common/libcommon.a ./libcommon.a
+        cp "$VULKAN_SDK/Lib/vulkan-1.lib" ./libvulkan-1.a
+
+        for lib in libllama.a libggml.a libggml-base.a libggml-cpu.a libggml-vulkan.a libcommon.a libvulkan-1.a; do
+          [ -f "$lib" ] || { echo "ERROR: Missing $lib"; exit 1; }
+        done
+
+        echo "All llama.cpp libraries built successfully"
+
     # Additional packages for Flatpak building
 
     - name: "Setup Flatpak"

diff --git a/.github/workflows/desktop-performance.yml b/.github/workflows/desktop-performance.yml
@@ -72,6 +72,8 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+        with:
+          submodules: recursive
 
       - uses: ./.github/actions/ci-setup
         with:
@@ -80,10 +82,13 @@ jobs:
       - name: Build Backend (Unix)
         run: |
           mkdir -p plz-out/bin/backend
+          # GPU is enabled by default (no -tags needed)
           go build -o plz-out/bin/backend/seed-daemon-x86_64-unknown-linux-gnu ./backend/cmd/seed-daemon
         env:
           GOARCH: amd64
           CGO_ENABLED: 1
+          LIBRARY_PATH: ${{ github.workspace }}/backend/util/llama-go
+          C_INCLUDE_PATH: ${{ github.workspace }}/backend/util/llama-go
 
       - name: Set temporal version in package.json
         run: |

diff --git a/.github/workflows/desktop-smoke-test.yml b/.github/workflows/desktop-smoke-test.yml
@@ -33,6 +33,8 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+        with:
+          submodules: recursive
 
       - uses: ./.github/actions/ci-setup
         with:
@@ -44,20 +46,26 @@ jobs:
         if: matrix.config.os != 'windows-2025'
         run: |
           mkdir -p plz-out/bin/backend
+          # GPU is enabled by default (no -tags needed)
           go build -o plz-out/bin/backend/seed-daemon-${{ matrix.config.daemon_name }} ./backend/cmd/seed-daemon
         env:
           GOARCH: ${{ matrix.config.goarch }}
           CGO_ENABLED: 1
+          LIBRARY_PATH: ${{ github.workspace }}/backend/util/llama-go
+          C_INCLUDE_PATH: ${{ github.workspace }}/backend/util/llama-go
 
       - name: Build Backend (Windows)
         if: matrix.config.os == 'windows-2025'
         run: |
           mkdir -p plz-out/bin/backend
+          # GPU is enabled by default (no -tags needed)
           go build -o plz-out/bin/backend/seed-daemon-${{ matrix.config.daemon_name }}.exe ./backend/cmd/seed-daemon
         env:
           GOOS: "windows"
           GOARCH: ${{ matrix.config.goarch }}
           CGO_ENABLED: 1
+          LIBRARY_PATH: ${{ github.workspace }}/backend/util/llama-go
+          C_INCLUDE_PATH: ${{ github.workspace }}/backend/util/llama-go
 
       - name: Set temporal version in package.json
         run: |

diff --git a/.github/workflows/dev-desktop.yml b/.github/workflows/dev-desktop.yml
@@ -71,10 +71,38 @@ jobs:
           - os: windows-2025
             arch: x64
             goarch: amd64
-            daemon_name: x86_64-pc-windows-msvc
+            daemon_name: x86_64-pc-windows-gnu
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Cache GGUF model
+        uses: actions/cache@v4
+        with:
+          path: backend/llm/backends/llamacpp/models/*.gguf
+          key: gguf-model-granite-v2
+          enableCrossOsArchive: true
+
+      - name: Download GGUF model (Unix)
+        if: matrix.config.os != 'windows-2025'
+        run: |
+          if [ ! -f backend/llm/backends/llamacpp/models/granite-embedding-107m-multilingual-Q8_0.gguf ]; then
+            mkdir -p backend/llm/backends/llamacpp/models
+            curl -fSL -o backend/llm/backends/llamacpp/models/granite-embedding-107m-multilingual-Q8_0.gguf \
+              "https://huggingface.co/keisuke-miyako/granite-embedding-107m-multilingual-gguf-q8_0/resolve/main/granite-embedding-107m-multilingual-Q8_0.gguf?download=true"
+          fi
+
+      - name: Download GGUF model (Windows)
+        if: startsWith(matrix.config.os, 'windows')
+        shell: pwsh
+        run: |
+          $modelPath = "backend/llm/backends/llamacpp/models/granite-embedding-107m-multilingual-Q8_0.gguf"
+          if (-not (Test-Path $modelPath)) {
+            New-Item -ItemType Directory -Force -Path "backend/llm/backends/llamacpp/models"
+            Invoke-WebRequest -Uri "https://huggingface.co/keisuke-miyako/granite-embedding-107m-multilingual-gguf-q8_0/resolve/main/granite-embedding-107m-multilingual-Q8_0.gguf?download=true" -OutFile $modelPath
+          }
 
       - uses: ./.github/actions/ci-setup
         with:
@@ -86,20 +114,73 @@ jobs:
         if: matrix.config.os != 'windows-2025'
         run: |
           mkdir -p plz-out/bin/backend
+          # GPU is enabled by default (no -tags needed)
           go build -o plz-out/bin/backend/seed-daemon-${{ matrix.config.daemon_name }} ./backend/cmd/seed-daemon
         env:
           GOARCH: ${{ matrix.config.goarch }}
           CGO_ENABLED: 1
+          LIBRARY_PATH: ${{ github.workspace }}/backend/util/llama-go
+          C_INCLUDE_PATH: ${{ github.workspace }}/backend/util/llama-go
 
       - name: Build Backend (Windows)
         if: matrix.config.os == 'windows-2025'
+        shell: bash
         run: |
           mkdir -p plz-out/bin/backend
+          # GPU is enabled by default (no -tags needed)
           go build -o plz-out/bin/backend/seed-daemon-${{ matrix.config.daemon_name }}.exe ./backend/cmd/seed-daemon
         env:
           GOOS: "windows"
           GOARCH: ${{ matrix.config.goarch }}
           CGO_ENABLED: 1
+          CGO_LDFLAGS: -static-libgcc -static-libstdc++
+          LIBRARY_PATH: ${{ github.workspace }}/backend/util/llama-go
+          C_INCLUDE_PATH: ${{ github.workspace }}/backend/util/llama-go
+
+      - name: Stage Windows runtime DLL
+        if: matrix.config.os == 'windows-2025'
+        shell: bash
+        run: |
+          set -euo pipefail
+          DLL_PATH="$(gcc -print-file-name=libwinpthread-1.dll)"
+
+          if [ ! -f "$DLL_PATH" ]; then
+            echo "ERROR: libwinpthread-1.dll not found in gcc toolchain"
+            exit 1
+          fi
+
+          cp "$DLL_PATH" plz-out/bin/backend/libwinpthread-1.dll
+          ls -la plz-out/bin/backend/libwinpthread-1.dll
+
+      - name: Verify Windows daemon runtime deps
+        if: matrix.config.os == 'windows-2025'
+        shell: bash
+        run: |
+          set -euo pipefail
+          BIN="plz-out/bin/backend/seed-daemon-${{ matrix.config.daemon_name }}.exe"
+
+          if ! command -v objdump >/dev/null 2>&1; then
+            echo "objdump not available on runner; skipping dependency check"
+            exit 0
+          fi
+
+          DLLS="$(objdump -p "$BIN" | awk '/DLL Name:/ {print $3}')"
+          echo "Windows DLL imports:"
+          echo "$DLLS"
+
+          if echo "$DLLS" | grep -Eiq '^(libstdc\+\+-6\.dll|libgcc_s_seh-1\.dll|libgomp-1\.dll)$'; then
+            echo "ERROR: MinGW runtime DLL dependency is still present"
+            exit 1
+          fi
+
+          if echo "$DLLS" | grep -Eiq '^libwinpthread-1\.dll$'; then
+            if [ ! -f "plz-out/bin/backend/libwinpthread-1.dll" ]; then
+              echo "ERROR: daemon imports libwinpthread-1.dll but runtime DLL is not staged"
+              exit 1
+            fi
+
+            echo "libwinpthread-1.dll import detected and staged correctly"
+          fi
 
       - name: Set MacOS signing certs
         if: startsWith(matrix.config.os, 'macos')

diff --git a/.github/workflows/dev-docker-images.yml b/.github/workflows/dev-docker-images.yml
@@ -28,14 +28,56 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Cache GGUF model
+        uses: actions/cache@v4
+        with:
+          path: backend/llm/backends/llamacpp/models/*.gguf
+          key: gguf-model-granite-v2
+          enableCrossOsArchive: true
+
+      - name: Download GGUF model
+        run: |
+          if [ ! -f backend/llm/backends/llamacpp/models/granite-embedding-107m-multilingual-Q8_0.gguf ]; then
+            mkdir -p backend/llm/backends/llamacpp/models
+            curl -fSL -o backend/llm/backends/llamacpp/models/granite-embedding-107m-multilingual-Q8_0.gguf \
+              "https://huggingface.co/keisuke-miyako/granite-embedding-107m-multilingual-gguf-q8_0/resolve/main/granite-embedding-107m-multilingual-Q8_0.gguf?download=true"
+          fi
+
       - name: Set up Go
         uses: actions/setup-go@v5
         with:
           go-version: "1.25.4"
-      - run: go test --count 1 ./backend/...
+
+      - name: Install build dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y cmake g++ libvulkan-dev glslc
+
+      - name: Build llama.cpp (with Vulkan GPU support)
+        run: |
+          cd backend/util/llama-go
+          BUILD_TYPE=vulkan CMAKE_ARGS="-DBUILD_SHARED_LIBS=OFF" make libbinding.a
+
+      - name: Run tests
+        run: go test --count 1 ./backend/...
+        env:
+          CGO_ENABLED: 1
+          LIBRARY_PATH: ${{ github.workspace }}/backend/util/llama-go
+          C_INCLUDE_PATH: ${{ github.workspace }}/backend/util/llama-go
+          LLAMA_LOG: error
+
       # Run tests again with the race-detector.
       # Using the same job to reuse the build cache.
-      - run: go test --count 1 -race ./backend/...
+      - name: Run tests with race detector
+        run: go test --count 1 -race ./backend/...
+        env:
+          CGO_ENABLED: 1
+          LIBRARY_PATH: ${{ github.workspace }}/backend/util/llama-go
+          C_INCLUDE_PATH: ${{ github.workspace }}/backend/util/llama-go
+          LLAMA_LOG: error
   generate-docker-images:
     runs-on: ubuntu-latest
     needs: [frontend-tests, backend-tests]
@@ -51,6 +93,8 @@ jobs:
 
       - name: Checkout code
         uses: actions/checkout@v4
+        with:
+          submodules: recursive
 
       - name: Get commit date for the triggering commit
         run: |

diff --git a/.github/workflows/lint-go.yml b/.github/workflows/lint-go.yml
@@ -25,6 +25,22 @@ jobs:
         with:
           go-version: "1.25.4"
       - uses: actions/checkout@v4
+
+      - name: Cache GGUF model
+        uses: actions/cache@v4
+        with:
+          path: backend/llm/backends/llamacpp/models/*.gguf
+          key: gguf-model-granite-v2
+          enableCrossOsArchive: true
+
+      - name: Download GGUF model
+        run: |
+          if [ ! -f backend/llm/backends/llamacpp/models/granite-embedding-107m-multilingual-Q8_0.gguf ]; then
+            mkdir -p backend/llm/backends/llamacpp/models
+            curl -fSL -o backend/llm/backends/llamacpp/models/granite-embedding-107m-multilingual-Q8_0.gguf \
+              "https://huggingface.co/keisuke-miyako/granite-embedding-107m-multilingual-gguf-q8_0/resolve/main/granite-embedding-107m-multilingual-Q8_0.gguf?download=true"
+          fi
+
       - uses: golangci/golangci-lint-action@v8
         with:
           version: latest