From 39ba7038cdcff90cb14c315bdbfd5d008b298e09 Mon Sep 17 00:00:00 2001
From: VicJay <98076606+vicjayjay@users.noreply.github.com>
Date: Wed, 18 Mar 2026 19:01:30 -0700
Subject: [PATCH 1/6] fix: add NVIDIA NIM provider profile for input_type
 embedding field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NVIDIA NIM rejects the `task` field and requires `input_type` instead.
This adds a proper "nvidia" provider profile following the architecture
introduced in #216, rather than hardcoding URL checks in buildPayload.

Detection: matches *.nvidia.com base URLs, nvidia/* model prefixes,
and nv-embed* model names.

Capabilities: sends input_type instead of task, maps retrieval.query →
query and retrieval.passage → passage, supports encoding_format: float.

Includes 5 automated tests covering:
- NVIDIA sends input_type (not task)
- retrieval.passage → passage value mapping
- nvidia/ model prefix detection
- Jina still sends task field
- Generic providers send neither

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/embedder.ts                           |  21 +++
 test/nvidia-nim-provider-profile.test.mjs | 153 ++++++++++++++++++++++
 2 files changed, 174 insertions(+)
 create mode 100644 test/nvidia-nim-provider-profile.test.mjs

diff --git a/src/embedder.ts b/src/embedder.ts
index 74947fb5..d21097ef 100644
--- a/src/embedder.ts
+++ b/src/embedder.ts
@@ -107,6 +107,7 @@ type EmbeddingProviderProfile =
   | "azure-openai"
   | "jina"
   | "voyage-compatible"
+  | "nvidia"
   | "generic-openai-compatible";
 
 interface EmbeddingCapabilities {
@@ -207,6 +208,7 @@ function getProviderLabel(baseURL: string | undefined, model: string): string {
     if (profile === "voyage-compatible" && /api\.voyageai\.com/i.test(base)) return "Voyage";
     if (profile === "openai" && /api\.openai\.com/i.test(base)) return "OpenAI";
     if (profile === "azure-openai" || /\.openai\.azure\.com/i.test(base)) return "Azure OpenAI";
+    if (profile === "nvidia") return "NVIDIA NIM";
 
     try {
       return new URL(base).host;
@@ -223,6 +225,8 @@ function getProviderLabel(baseURL: string | undefined, model: string): string {
     case "openai":
     case "azure-openai":
       return "OpenAI";
+    case "nvidia":
+      return "NVIDIA NIM";
     default:
       return "embedding provider";
   }
@@ -241,6 +245,10 @@ function detectEmbeddingProviderProfile(
     return "voyage-compatible";
   }
 
+  if (/\.nvidia\.com|integrate\.api\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) {
+    return "nvidia";
+  }
+
   return "generic-openai-compatible";
 }
 
@@ -273,6 +281,19 @@ function getEmbeddingCapabilities(profile: EmbeddingProviderProfile): EmbeddingC
         },
         dimensionsField: "output_dimension",
       };
+    case "nvidia":
+      return {
+        encoding_format: true,
+        normalized: false,
+        taskField: "input_type",
+        taskValueMap: {
+          "retrieval.query": "query",
+          "retrieval.passage": "passage",
+          "query": "query",
+          "passage": "passage",
+        },
+        dimensionsField: null,
+      };
     case "generic-openai-compatible":
     default:
       return {
diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs
new file mode 100644
index 00000000..39233357
--- /dev/null
+++ b/test/nvidia-nim-provider-profile.test.mjs
@@ -0,0 +1,153 @@
+import assert from "node:assert/strict";
+import http from "node:http";
+import { describe, it } from "node:test";
+
+import jitiFactory from "jiti";
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { Embedder } = jiti("../src/embedder.ts");
+
+/**
+ * Create a capture server that records POST bodies and returns embeddings
+ * with configurable dimension count.
+ */
+async function withCaptureServer(dims, fn) {
+  let capturedBody = null;
+  const fakeVec = Array.from({ length: dims }, (_, i) => i * 0.01);
+  const server = http.createServer((req, res) => {
+    if (req.url === "/v1/embeddings" && req.method === "POST") {
+      const chunks = [];
+      req.on("data", (c) => chunks.push(c));
+      req.on("end", () => {
+        capturedBody = JSON.parse(Buffer.concat(chunks).toString());
+        res.writeHead(200, { "content-type": "application/json" });
+        res.end(
+          JSON.stringify({
+            object: "list",
+            data: [{ object: "embedding", index: 0, embedding: fakeVec }],
+            usage: { prompt_tokens: 5, total_tokens: 5 },
+          }),
+        );
+      });
+      return;
+    }
+    res.writeHead(404);
+    res.end("not found");
+  });
+
+  await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve));
+  const address = server.address();
+  const port = typeof address === "object" && address ? address.port : 0;
+  const baseURL = `http://127.0.0.1:${port}/v1`;
+
+  try {
+    await fn({ baseURL, port, getCaptured: () => capturedBody });
+  } finally {
+    await new Promise((resolve) => server.close(resolve));
+  }
+}
+
+describe("NVIDIA NIM provider profile", () => {
+  it("sends input_type=query for NVIDIA NIM (nv-embed model prefix)", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nv-embedqa-e5-v5",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, "query", "Should send input_type=query for NVIDIA");
+      assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA");
+    });
+  });
+
+  it("maps retrieval.passage → passage for NVIDIA NIM", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nv-embedqa-e5-v5",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+
+      await embedder.embedPassage("test document");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, "passage", "Should map retrieval.passage → passage");
+      assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA");
+    });
+  });
+
+  it("detects NVIDIA from nvidia/ model prefix", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nvidia/llama-3.2-nv-embedqa-1b-v2",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "query",
+        taskPassage: "passage",
+      });
+
+      await embedder.embedQuery("test");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, "query", "nvidia/ model prefix should trigger input_type");
+      assert.equal(body.task, undefined, "nvidia/ model prefix should NOT send task");
+    });
+  });
+
+  it("non-NVIDIA: Jina sends task field", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "jina-embeddings-v5-text-small",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.task, "retrieval.query", "Jina should send task field");
+      assert.equal(body.input_type, undefined, "Jina should NOT send input_type");
+    });
+  });
+
+  it("non-NVIDIA: generic OpenAI-compatible sends neither task nor input_type", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "custom-embed-model",
+        apiKey: "test-key",
+        dimensions: dims,
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.task, undefined, "Generic provider should NOT send task");
+      assert.equal(body.input_type, undefined, "Generic provider should NOT send input_type");
+    });
+  });
+});

From 423f8c2d38cf3fee1cdff941e848f58a182ab52d Mon Sep 17 00:00:00 2001
From: VicJay <98076606+vicjayjay@users.noreply.github.com>
Date: Wed, 18 Mar 2026 23:23:56 -0700
Subject: [PATCH 2/6] fix: forward dimensions for NVIDIA dynamic embedding
 models

NVIDIA NIM's OpenAI-compatible API supports a `dimensions` parameter
for dynamic models like nvidia/llama-3.2-nv-embedqa-1b-v2. Setting
dimensionsField to null prevented buildPayload() from forwarding the
configured dimensions, causing dimension mismatch errors.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/embedder.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/embedder.ts b/src/embedder.ts
index d21097ef..22d150a9 100644
--- a/src/embedder.ts
+++ b/src/embedder.ts
@@ -292,7 +292,7 @@ function getEmbeddingCapabilities(profile: EmbeddingProviderProfile): EmbeddingC
           "query": "query",
           "passage": "passage",
         },
-        dimensionsField: null,
+        dimensionsField: "dimensions",
       };
     case "generic-openai-compatible":
     default:

From a9b4f668eeb0778361ae339766f4a1554f1d16aa Mon Sep 17 00:00:00 2001
From: VicJay <98076606+vicjayjay@users.noreply.github.com>
Date: Sat, 21 Mar 2026 20:26:31 -0700
Subject: [PATCH 3/6] Add NVIDIA detection test and update imports

Added a test case to detect NVIDIA from a .nvidia.com baseURL and modified import to include formatEmbeddingProviderError.
---
 test/nvidia-nim-provider-profile.test.mjs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs
index 39233357..cfd80ca5 100644
--- a/test/nvidia-nim-provider-profile.test.mjs
+++ b/test/nvidia-nim-provider-profile.test.mjs
@@ -5,7 +5,7 @@ import { describe, it } from "node:test";
 import jitiFactory from "jiti";
 
 const jiti = jitiFactory(import.meta.url, { interopDefault: true });
-const { Embedder } = jiti("../src/embedder.ts");
+const { Embedder, formatEmbeddingProviderError } = jiti("../src/embedder.ts");
 
 /**
  * Create a capture server that records POST bodies and returns embeddings
@@ -111,6 +111,16 @@ describe("NVIDIA NIM provider profile", () => {
     });
   });
 
+  it("detects NVIDIA from a .nvidia.com baseURL", () => {
+    const message = formatEmbeddingProviderError(new Error("boom"), {
+      baseURL: "https://build.nvidia.com/v1",
+      model: "custom-embed-model",
+      mode: "single",
+    });
+
+    assert.equal(message, "Failed to generate embedding from NVIDIA NIM: boom");
+  });
+
   it("non-NVIDIA: Jina sends task field", async () => {
     const dims = 128;
     await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {

From 3f26c60be26c404a50f67ad0fce037ff744ba3f4 Mon Sep 17 00:00:00 2001
From: VicJay <98076606+vicjayjay@users.noreply.github.com>
Date: Sat, 21 Mar 2026 20:28:34 -0700
Subject: [PATCH 4/6] Refactor NVIDIA compatibility check regex

Simplifies the NVIDIA provider baseURL detection pattern now that .nvidia.com already covers integrate.api.nvidia.com.
---
 src/embedder.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/embedder.ts b/src/embedder.ts
index 22d150a9..a19993b4 100644
--- a/src/embedder.ts
+++ b/src/embedder.ts
@@ -245,7 +245,7 @@ function detectEmbeddingProviderProfile(
     return "voyage-compatible";
   }
 
-  if (/\.nvidia\.com|integrate\.api\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) {
+  if (/\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) {
     return "nvidia";
   }
 

From e6e705de1b50a5bd6fea76f5e0f279e434b166ff Mon Sep 17 00:00:00 2001
From: VicJay <98076606+vicjayjay@users.noreply.github.com>
Date: Tue, 24 Mar 2026 21:09:25 -0700
Subject: [PATCH 5/6] fix: tighten NVIDIA detection precedence and add negative
 test coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address PR review feedback from @AliceLJY:

1. Detection ordering: split host-based detection (runs first) from
   model-prefix fallback. .nvidia.com host now takes precedence over
   jina-/voyage model prefixes, preventing misclassification of models
   like jina-xxx hosted on NVIDIA endpoints.

2. Detection scope safety: the broad .nvidia.com match is safe because
   buildPayload() only injects input_type when the user explicitly
   configures taskQuery/taskPassage. Non-retriever models (NV-CLIP etc.)
   are unaffected. Added comment documenting this guard.

3. Negative tests:
   - .nvidia.com + jina- model prefix → NVIDIA wins
   - NVIDIA profile without taskQuery/taskPassage → no input_type

All 8 NVIDIA tests + existing embedder tests pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/embedder.ts                           | 23 +++++++----
 test/nvidia-nim-provider-profile.test.mjs | 48 +++++++++++++++++++++++
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/src/embedder.ts b/src/embedder.ts
index a19993b4..7e01bf82 100644
--- a/src/embedder.ts
+++ b/src/embedder.ts
@@ -238,16 +238,19 @@ function detectEmbeddingProviderProfile(
 ): EmbeddingProviderProfile {
   const base = baseURL || "";
 
+  // Host-based detection runs first — endpoint owner semantics take precedence
+  // over model-name heuristics to avoid misclassifying e.g. a jina-xxx model
+  // served from .nvidia.com as Jina instead of NVIDIA.
   if (/api\.openai\.com/i.test(base)) return "openai";
   if (/\.openai\.azure\.com/i.test(base)) return "azure-openai";
-  if (/api\.jina\.ai/i.test(base) || /^jina-/i.test(model)) return "jina";
-  if (/api\.voyageai\.com/i.test(base) || /^voyage\b/i.test(model)) {
-    return "voyage-compatible";
-  }
+  if (/api\.jina\.ai/i.test(base)) return "jina";
+  if (/api\.voyageai\.com/i.test(base)) return "voyage-compatible";
+  if (/\.nvidia\.com/i.test(base)) return "nvidia";
 
-  if (/\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) {
-    return "nvidia";
-  }
+  // Model-prefix fallback — only when baseURL didn't match a known host
+  if (/^jina-/i.test(model)) return "jina";
+  if (/^voyage\b/i.test(model)) return "voyage-compatible";
+  if (/^nvidia\//i.test(model) || /^nv-embed/i.test(model)) return "nvidia";
 
   return "generic-openai-compatible";
 }
@@ -654,7 +657,11 @@ export class Embedder {
       payload.normalized = this._normalized;
     }
 
-    // Task hint: field name and optional value translation are provider-defined.
+    // Task hint: only injected when BOTH the provider profile defines a taskField
+    // AND the caller passes a task value (from user-configured taskQuery/taskPassage).
+    // This means broad provider detection (e.g. any .nvidia.com host) is safe —
+    // non-retriever models that don't expect input_type are unaffected unless the
+    // user explicitly configures task hints.
     if (this._capabilities.taskField && task) {
       const cap = this._capabilities;
       const value = cap.taskValueMap?.[task] ?? task;
diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs
index cfd80ca5..c1568423 100644
--- a/test/nvidia-nim-provider-profile.test.mjs
+++ b/test/nvidia-nim-provider-profile.test.mjs
@@ -121,6 +121,54 @@ describe("NVIDIA NIM provider profile", () => {
     assert.equal(message, "Failed to generate embedding from NVIDIA NIM: boom");
   });
 
+  it(".nvidia.com baseURL with conflicting jina- model prefix → NVIDIA wins", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      // Replace localhost URL with a .nvidia.com URL for detection, but route
+      // the actual HTTP request to the capture server.
+      const nvidiaBaseURL = baseURL.replace("127.0.0.1", "integrate.api.nvidia.com");
+      const embedder = new Embedder({
+        baseURL, // actual network target
+        model: "jina-embeddings-v3",
+        apiKey: "test-key",
+        dimensions: dims,
+        taskQuery: "retrieval.query",
+        taskPassage: "retrieval.passage",
+      });
+      // Override the detected profile by using a real .nvidia.com baseURL in detection
+      // We test detection separately via the error label path:
+      const message = formatEmbeddingProviderError(new Error("test"), {
+        baseURL: "https://integrate.api.nvidia.com/v1",
+        model: "jina-embeddings-v3",
+        mode: "single",
+      });
+      assert.equal(message, "Failed to generate embedding from NVIDIA NIM: test",
+        ".nvidia.com host should win over jina- model prefix");
+    });
+  });
+
+  it(".nvidia.com baseURL without taskQuery/taskPassage → no input_type injected", async () => {
+    const dims = 128;
+    await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {
+      const embedder = new Embedder({
+        baseURL,
+        model: "nvidia/nv-clip-v1",
+        apiKey: "test-key",
+        dimensions: dims,
+        // Deliberately omit taskQuery and taskPassage
+      });
+
+      await embedder.embedQuery("test query");
+      const body = getCaptured();
+
+      assert.ok(body, "Request body should be captured");
+      assert.equal(body.input_type, undefined,
+        "NVIDIA profile without taskQuery/taskPassage should NOT inject input_type");
+      assert.equal(body.task, undefined,
+        "NVIDIA profile without taskQuery/taskPassage should NOT inject task");
+    });
+  });
+
   it("non-NVIDIA: Jina sends task field", async () => {
     const dims = 128;
     await withCaptureServer(dims, async ({ baseURL, getCaptured }) => {

From 255475d44baa7eb9beb3672a2edbe0e5f6e97af3 Mon Sep 17 00:00:00 2001
From: VicJay <98076606+vicjayjay@users.noreply.github.com>
Date: Tue, 24 Mar 2026 21:16:27 -0700
Subject: [PATCH 6/6] fix: parse hostname for provider detection instead of raw
 URL matching

Address Codex review: regex matching /.nvidia.com/ against the full
baseURL string can misclassify proxy URLs that contain .nvidia.com
in their path or query. Parse the hostname via URL() and match with
endsWith() for all host-based provider checks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/embedder.ts | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/embedder.ts b/src/embedder.ts
index 7e01bf82..74ab48de 100644
--- a/src/embedder.ts
+++ b/src/embedder.ts
@@ -237,15 +237,19 @@ function detectEmbeddingProviderProfile(
   model: string,
 ): EmbeddingProviderProfile {
   const base = baseURL || "";
+  let host = "";
+  try { host = new URL(base).hostname.toLowerCase(); } catch { /* invalid URL — skip host checks */ }
 
   // Host-based detection runs first — endpoint owner semantics take precedence
   // over model-name heuristics to avoid misclassifying e.g. a jina-xxx model
   // served from .nvidia.com as Jina instead of NVIDIA.
-  if (/api\.openai\.com/i.test(base)) return "openai";
-  if (/\.openai\.azure\.com/i.test(base)) return "azure-openai";
-  if (/api\.jina\.ai/i.test(base)) return "jina";
-  if (/api\.voyageai\.com/i.test(base)) return "voyage-compatible";
-  if (/\.nvidia\.com/i.test(base)) return "nvidia";
+  // Match on parsed hostname to avoid false positives from proxy URLs that
+  // contain provider domains in their path or query string.
+  if (host.endsWith("api.openai.com")) return "openai";
+  if (host.endsWith(".openai.azure.com")) return "azure-openai";
+  if (host.endsWith("api.jina.ai")) return "jina";
+  if (host.endsWith("api.voyageai.com")) return "voyage-compatible";
+  if (host.endsWith(".nvidia.com") || host === "nvidia.com") return "nvidia";
 
   // Model-prefix fallback — only when baseURL didn't match a known host
   if (/^jina-/i.test(model)) return "jina";