CortexReach · NW15D · Mar 26, 2026
diff --git a/README.md b/README.md
@@ -499,9 +499,32 @@ Cross-encoder reranking supports multiple providers via `rerankProvider`:
 | **SiliconFlow** (free tier available) | `siliconflow` | `BAAI/bge-reranker-v2-m3` |
 | **Voyage AI** | `voyage` | `rerank-2.5` |
 | **Pinecone** | `pinecone` | `bge-reranker-v2-m3` |
+| **llama.cpp** (local) | `llamacpp` | `bge-reranker-v2-m3` |
 
 Any Jina-compatible rerank endpoint also works — set `rerankProvider: "jina"` and point `rerankEndpoint` to your service (e.g., Hugging Face TEI, DashScope `qwen3-rerank`).
 
+**Local reranking with llama.cpp:**
+```bash
+# Start llama.cpp server with a reranker model
+./llama-server -m bge-reranker-v2-m3.gguf --rerank --port 8080
+
+# Or with API key protection
+./llama-server -m bge-reranker-v2-m3.gguf --rerank --api-key your-key --port 8080
+```
+
+Configuration:
+```json
+{
+  "retrieval": {
+    "rerank": "cross-encoder",
+    "rerankProvider": "llamacpp",
+    "rerankEndpoint": "http://localhost:8080/v1/rerank",
+    "rerankModel": "bge-reranker-v2-m3",
+    "rerankApiKey": "optional-api-key"
+  }
+}
+```
+
 </details>
 
 <details>

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -38,7 +38,7 @@
     ]
   },
   "scripts": {
-    "test": "node test/embedder-error-hints.test.mjs && node test/cjk-recursion-regression.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/scope-access-undefined.test.mjs && node --test test/reflection-bypass-hook.test.mjs && node --test test/smart-extractor-scope-filter.test.mjs && node --test test/store-empty-scope-filter.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node --test test/strip-envelope-metadata.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node --test test/sync-plugin-version.test.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs && node test/temporal-facts.test.mjs && node test/memory-update-supersede.test.mjs && node test/memory-upgrader-diagnostics.test.mjs && node --test test/llm-api-key-client.test.mjs && node --test test/llm-oauth-client.test.mjs && node --test test/cli-oauth-login.test.mjs && node --test test/workflow-fork-guards.test.mjs && node --test test/clawteam-scope.test.mjs && node --test test/cross-process-lock.test.mjs && node --test test/preference-slots.test.mjs",
+    "test": "node test/embedder-error-hints.test.mjs && node test/cjk-recursion-regression.test.mjs && node test/migrate-legacy-schema.test.mjs && node --test test/config-session-strategy-migration.test.mjs && node --test test/scope-access-undefined.test.mjs && node --test test/reflection-bypass-hook.test.mjs && node --test test/smart-extractor-scope-filter.test.mjs && node --test test/store-empty-scope-filter.test.mjs && node --test test/recall-text-cleanup.test.mjs && node test/update-consistency-lancedb.test.mjs && node --test test/strip-envelope-metadata.test.mjs && node test/cli-smoke.mjs && node test/functional-e2e.mjs && node test/retriever-rerank-regression.mjs && node --test test/retriever-llamacpp-rerank.test.mjs && node test/smart-memory-lifecycle.mjs && node test/smart-extractor-branches.mjs && node test/plugin-manifest-regression.mjs && node --test test/sync-plugin-version.test.mjs && node test/smart-metadata-v2.mjs && node test/vector-search-cosine.test.mjs && node test/context-support-e2e.mjs && node test/temporal-facts.test.mjs && node test/memory-update-supersede.test.mjs && node test/memory-upgrader-diagnostics.test.mjs && node --test test/llm-api-key-client.test.mjs && node --test test/llm-oauth-client.test.mjs && node --test test/cli-oauth-login.test.mjs && node --test test/workflow-fork-guards.test.mjs && node --test test/clawteam-scope.test.mjs && node --test test/cross-process-lock.test.mjs && node --test test/preference-slots.test.mjs",
     "test:openclaw-host": "node test/openclaw-host-functional.mjs",
     "version": "node scripts/sync-plugin-version.mjs openclaw.plugin.json package.json && git add openclaw.plugin.json"
   },

diff --git a/src/retriever.ts b/src/retriever.ts
@@ -57,7 +57,8 @@ export interface RetrievalConfig {
     | "voyage"
     | "pinecone"
     | "dashscope"
-    | "tei";
+    | "tei"
+    | "llamacpp";
   /**
    * Length normalization: penalize long entries that dominate via sheer keyword
    * density. Formula: score *= 1 / (1 + log2(charLen / anchor)).
@@ -164,7 +165,8 @@ type RerankProvider =
   | "voyage"
   | "pinecone"
   | "dashscope"
-  | "tei";
+  | "tei"
+  | "llamacpp";
 
 interface RerankItem {
   index: number;
@@ -237,6 +239,25 @@ function buildRerankRequest(
           top_k: topN,
         },
       };
+    case "llamacpp": {
+      // llama.cpp uses OpenAI-compatible format
+      // API key is optional - only add Authorization if provided
+      const headers: Record<string, string> = {
+        "Content-Type": "application/json",
+      };
+      if (apiKey) {
+        headers["Authorization"] = `Bearer ${apiKey}`;
+      }
+      return {
+        headers,
+        body: {
+          model,
+          query,
+          documents: candidates,
+          top_n: topN,
+        },
+      };
+    }
     case "siliconflow":
     case "jina":
     default:
@@ -321,6 +342,14 @@ function parseRerankResponse(
         parseItems(objectData?.results, ["relevance_score", "score"])
       );
     }
+    case "llamacpp": {
+      // llama.cpp returns: { results: [{ index, relevance_score }] }
+      // Same format as Jina/SiliconFlow (OpenAI-compatible)
+      return (
+        parseItems(objectData?.results, ["relevance_score", "score"]) ??
+        parseItems(objectData?.data, ["relevance_score", "score"])
+      );
+    }
     case "siliconflow":
     case "jina":
     default: {
@@ -840,9 +869,12 @@ export class MemoryRetriever {
     }
 
     // Try cross-encoder rerank via configured provider API
-    if (this.config.rerank === "cross-encoder" && this.config.rerankApiKey) {
+    // For llama.cpp, API key is optional (local deployment)
+    const provider = this.config.rerankProvider || "jina";
+    const isApiKeyOptional = provider === "llamacpp";
+
+    if (this.config.rerank === "cross-encoder" && (this.config.rerankApiKey || isApiKeyOptional)) {
       try {
-        const provider = this.config.rerankProvider || "jina";
         const model = this.config.rerankModel || "jina-reranker-v3";
         const endpoint =
           this.config.rerankEndpoint || "https://api.jina.ai/v1/rerank";
@@ -851,7 +883,7 @@ export class MemoryRetriever {
         // Build provider-specific request
         const { headers, body } = buildRerankRequest(
           provider,
-          this.config.rerankApiKey,
+          this.config.rerankApiKey ?? "",
           model,
           query,
           documents,