From 39ba7038cdcff90cb14c315bdbfd5d008b298e09 Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Wed, 18 Mar 2026 19:01:30 -0700 Subject: [PATCH 1/6] fix: add NVIDIA NIM provider profile for input_type embedding field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVIDIA NIM rejects the `task` field and requires `input_type` instead. This adds a proper "nvidia" provider profile following the architecture introduced in #216, rather than hardcoding URL checks in buildPayload. Detection: matches *.nvidia.com base URLs, nvidia/* model prefixes, and nv-embed* model names. Capabilities: sends input_type instead of task, maps retrieval.query → query and retrieval.passage → passage, supports encoding_format: float. Includes 5 automated tests covering: - NVIDIA sends input_type (not task) - retrieval.passage → passage value mapping - nvidia/ model prefix detection - Jina still sends task field - Generic providers send neither Co-Authored-By: Claude Opus 4.6 (1M context) --- src/embedder.ts | 21 +++ test/nvidia-nim-provider-profile.test.mjs | 153 ++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 test/nvidia-nim-provider-profile.test.mjs diff --git a/src/embedder.ts b/src/embedder.ts index 74947fb5..d21097ef 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -107,6 +107,7 @@ type EmbeddingProviderProfile = | "azure-openai" | "jina" | "voyage-compatible" + | "nvidia" | "generic-openai-compatible"; interface EmbeddingCapabilities { @@ -207,6 +208,7 @@ function getProviderLabel(baseURL: string | undefined, model: string): string { if (profile === "voyage-compatible" && /api\.voyageai\.com/i.test(base)) return "Voyage"; if (profile === "openai" && /api\.openai\.com/i.test(base)) return "OpenAI"; if (profile === "azure-openai" || /\.openai\.azure\.com/i.test(base)) return "Azure OpenAI"; + if (profile === "nvidia") return "NVIDIA NIM"; try { return new URL(base).host; @@ -223,6 +225,8 @@ function getProviderLabel(baseURL: string | undefined, model: string): string { case "openai": case "azure-openai": return "OpenAI"; + case "nvidia": + return "NVIDIA NIM"; default: return "embedding provider"; } @@ -241,6 +245,10 @@ function detectEmbeddingProviderProfile( return "voyage-compatible"; } + if (/\.nvidia\.com|integrate\.api\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) { + return "nvidia"; + } + return "generic-openai-compatible"; } @@ -273,6 +281,19 @@ function getEmbeddingCapabilities(profile: EmbeddingProviderProfile): EmbeddingC }, dimensionsField: "output_dimension", }; + case "nvidia": + return { + encoding_format: true, + normalized: false, + taskField: "input_type", + taskValueMap: { + "retrieval.query": "query", + "retrieval.passage": "passage", + "query": "query", + "passage": "passage", + }, + dimensionsField: null, + }; case "generic-openai-compatible": default: return { diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs new file mode 100644 index 00000000..39233357 --- /dev/null +++ b/test/nvidia-nim-provider-profile.test.mjs @@ -0,0 +1,153 @@ +import assert from "node:assert/strict"; +import http from "node:http"; +import { describe, it } from "node:test"; + +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { Embedder } = jiti("../src/embedder.ts"); + +/** + * Create a capture server that records POST bodies and returns embeddings + * with configurable dimension count. + */ +async function withCaptureServer(dims, fn) { + let capturedBody = null; + const fakeVec = Array.from({ length: dims }, (_, i) => i * 0.01); + const server = http.createServer((req, res) => { + if (req.url === "/v1/embeddings" && req.method === "POST") { + const chunks = []; + req.on("data", (c) => chunks.push(c)); + req.on("end", () => { + capturedBody = JSON.parse(Buffer.concat(chunks).toString()); + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + object: "list", + data: [{ object: "embedding", index: 0, embedding: fakeVec }], + usage: { prompt_tokens: 5, total_tokens: 5 }, + }), + ); + }); + return; + } + res.writeHead(404); + res.end("not found"); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const address = server.address(); + const port = typeof address === "object" && address ? address.port : 0; + const baseURL = `http://127.0.0.1:${port}/v1`; + + try { + await fn({ baseURL, port, getCaptured: () => capturedBody }); + } finally { + await new Promise((resolve) => server.close(resolve)); + } +} + +describe("NVIDIA NIM provider profile", () => { + it("sends input_type=query for NVIDIA NIM (nv-embed model prefix)", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "nv-embedqa-e5-v5", + apiKey: "test-key", + dimensions: dims, + taskQuery: "retrieval.query", + taskPassage: "retrieval.passage", + }); + + await embedder.embedQuery("test query"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.input_type, "query", "Should send input_type=query for NVIDIA"); + assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA"); + }); + }); + + it("maps retrieval.passage → passage for NVIDIA NIM", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "nv-embedqa-e5-v5", + apiKey: "test-key", + dimensions: dims, + taskQuery: "retrieval.query", + taskPassage: "retrieval.passage", + }); + + await embedder.embedPassage("test document"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.input_type, "passage", "Should map retrieval.passage → passage"); + assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA"); + }); + }); + + it("detects NVIDIA from nvidia/ model prefix", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "nvidia/llama-3.2-nv-embedqa-1b-v2", + apiKey: "test-key", + dimensions: dims, + taskQuery: "query", + taskPassage: "passage", + }); + + await embedder.embedQuery("test"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.input_type, "query", "nvidia/ model prefix should trigger input_type"); + assert.equal(body.task, undefined, "nvidia/ model prefix should NOT send task"); + }); + }); + + it("non-NVIDIA: Jina sends task field", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "jina-embeddings-v5-text-small", + apiKey: "test-key", + dimensions: dims, + taskQuery: "retrieval.query", + taskPassage: "retrieval.passage", + }); + + await embedder.embedQuery("test query"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.task, "retrieval.query", "Jina should send task field"); + assert.equal(body.input_type, undefined, "Jina should NOT send input_type"); + }); + }); + + it("non-NVIDIA: generic OpenAI-compatible sends neither task nor input_type", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "custom-embed-model", + apiKey: "test-key", + dimensions: dims, + }); + + await embedder.embedQuery("test query"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.task, undefined, "Generic provider should NOT send task"); + assert.equal(body.input_type, undefined, "Generic provider should NOT send input_type"); + }); + }); +}); From 423f8c2d38cf3fee1cdff941e848f58a182ab52d Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Wed, 18 Mar 2026 23:23:56 -0700 Subject: [PATCH 2/6] fix: forward dimensions for NVIDIA dynamic embedding models NVIDIA NIM's OpenAI-compatible API supports a `dimensions` parameter for dynamic models like nvidia/llama-3.2-nv-embedqa-1b-v2. Setting dimensionsField to null prevented buildPayload() from forwarding the configured dimensions, causing dimension mismatch errors. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/embedder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/embedder.ts b/src/embedder.ts index d21097ef..22d150a9 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -292,7 +292,7 @@ function getEmbeddingCapabilities(profile: EmbeddingProviderProfile): EmbeddingC "query": "query", "passage": "passage", }, - dimensionsField: null, + dimensionsField: "dimensions", }; case "generic-openai-compatible": default: From a9b4f668eeb0778361ae339766f4a1554f1d16aa Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:26:31 -0700 Subject: [PATCH 3/6] Add NVIDIA detection test and update imports Added a test case to detect NVIDIA from a .nvidia.com baseURL and modified import to include formatEmbeddingProviderError. --- test/nvidia-nim-provider-profile.test.mjs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs index 39233357..cfd80ca5 100644 --- a/test/nvidia-nim-provider-profile.test.mjs +++ b/test/nvidia-nim-provider-profile.test.mjs @@ -5,7 +5,7 @@ import { describe, it } from "node:test"; import jitiFactory from "jiti"; const jiti = jitiFactory(import.meta.url, { interopDefault: true }); -const { Embedder } = jiti("../src/embedder.ts"); +const { Embedder, formatEmbeddingProviderError } = jiti("../src/embedder.ts"); /** * Create a capture server that records POST bodies and returns embeddings @@ -111,6 +111,16 @@ describe("NVIDIA NIM provider profile", () => { }); }); + it("detects NVIDIA from a .nvidia.com baseURL", () => { + const message = formatEmbeddingProviderError(new Error("boom"), { + baseURL: "https://build.nvidia.com/v1", + model: "custom-embed-model", + mode: "single", + }); + + assert.equal(message, "Failed to generate embedding from NVIDIA NIM: boom"); + }); + it("non-NVIDIA: Jina sends task field", async () => { const dims = 128; await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { From 3f26c60be26c404a50f67ad0fce037ff744ba3f4 Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:28:34 -0700 Subject: [PATCH 4/6] Refactor NVIDIA compatibility check regex Simplifies the NVIDIA provider baseURL detection pattern now that .nvidia.com already covers integrate.api.nvidia.com. --- src/embedder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/embedder.ts b/src/embedder.ts index 22d150a9..a19993b4 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -245,7 +245,7 @@ function detectEmbeddingProviderProfile( return "voyage-compatible"; } - if (/\.nvidia\.com|integrate\.api\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) { + if (/\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) { return "nvidia"; } From e6e705de1b50a5bd6fea76f5e0f279e434b166ff Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Tue, 24 Mar 2026 21:09:25 -0700 Subject: [PATCH 5/6] fix: tighten NVIDIA detection precedence and add negative test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR review feedback from @AliceLJY: 1. Detection ordering: split host-based detection (runs first) from model-prefix fallback. .nvidia.com host now takes precedence over jina-/voyage model prefixes, preventing misclassification of models like jina-xxx hosted on NVIDIA endpoints. 2. Detection scope safety: the broad .nvidia.com match is safe because buildPayload() only injects input_type when the user explicitly configures taskQuery/taskPassage. Non-retriever models (NV-CLIP etc.) are unaffected. Added comment documenting this guard. 3. Negative tests: - .nvidia.com + jina- model prefix → NVIDIA wins - NVIDIA profile without taskQuery/taskPassage → no input_type All 8 NVIDIA tests + existing embedder tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/embedder.ts | 23 +++++++---- test/nvidia-nim-provider-profile.test.mjs | 48 +++++++++++++++++++++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/src/embedder.ts b/src/embedder.ts index a19993b4..7e01bf82 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -238,16 +238,19 @@ function detectEmbeddingProviderProfile( ): EmbeddingProviderProfile { const base = baseURL || ""; + // Host-based detection runs first — endpoint owner semantics take precedence + // over model-name heuristics to avoid misclassifying e.g. a jina-xxx model + // served from .nvidia.com as Jina instead of NVIDIA. if (/api\.openai\.com/i.test(base)) return "openai"; if (/\.openai\.azure\.com/i.test(base)) return "azure-openai"; - if (/api\.jina\.ai/i.test(base) || /^jina-/i.test(model)) return "jina"; - if (/api\.voyageai\.com/i.test(base) || /^voyage\b/i.test(model)) { - return "voyage-compatible"; - } + if (/api\.jina\.ai/i.test(base)) return "jina"; + if (/api\.voyageai\.com/i.test(base)) return "voyage-compatible"; + if (/\.nvidia\.com/i.test(base)) return "nvidia"; - if (/\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) { - return "nvidia"; - } + // Model-prefix fallback — only when baseURL didn't match a known host + if (/^jina-/i.test(model)) return "jina"; + if (/^voyage\b/i.test(model)) return "voyage-compatible"; + if (/^nvidia\//i.test(model) || /^nv-embed/i.test(model)) return "nvidia"; return "generic-openai-compatible"; } @@ -654,7 +657,11 @@ export class Embedder { payload.normalized = this._normalized; } - // Task hint: field name and optional value translation are provider-defined. + // Task hint: only injected when BOTH the provider profile defines a taskField + // AND the caller passes a task value (from user-configured taskQuery/taskPassage). + // This means broad provider detection (e.g. any .nvidia.com host) is safe — + // non-retriever models that don't expect input_type are unaffected unless the + // user explicitly configures task hints. if (this._capabilities.taskField && task) { const cap = this._capabilities; const value = cap.taskValueMap?.[task] ?? task; diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs index cfd80ca5..c1568423 100644 --- a/test/nvidia-nim-provider-profile.test.mjs +++ b/test/nvidia-nim-provider-profile.test.mjs @@ -121,6 +121,54 @@ describe("NVIDIA NIM provider profile", () => { assert.equal(message, "Failed to generate embedding from NVIDIA NIM: boom"); }); + it(".nvidia.com baseURL with conflicting jina- model prefix → NVIDIA wins", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + // Replace localhost URL with a .nvidia.com URL for detection, but route + // the actual HTTP request to the capture server. + const nvidiaBaseURL = baseURL.replace("127.0.0.1", "integrate.api.nvidia.com"); + const embedder = new Embedder({ + baseURL, // actual network target + model: "jina-embeddings-v3", + apiKey: "test-key", + dimensions: dims, + taskQuery: "retrieval.query", + taskPassage: "retrieval.passage", + }); + // Override the detected profile by using a real .nvidia.com baseURL in detection + // We test detection separately via the error label path: + const message = formatEmbeddingProviderError(new Error("test"), { + baseURL: "https://integrate.api.nvidia.com/v1", + model: "jina-embeddings-v3", + mode: "single", + }); + assert.equal(message, "Failed to generate embedding from NVIDIA NIM: test", + ".nvidia.com host should win over jina- model prefix"); + }); + }); + + it(".nvidia.com baseURL without taskQuery/taskPassage → no input_type injected", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "nvidia/nv-clip-v1", + apiKey: "test-key", + dimensions: dims, + // Deliberately omit taskQuery and taskPassage + }); + + await embedder.embedQuery("test query"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.input_type, undefined, + "NVIDIA profile without taskQuery/taskPassage should NOT inject input_type"); + assert.equal(body.task, undefined, + "NVIDIA profile without taskQuery/taskPassage should NOT inject task"); + }); + }); + it("non-NVIDIA: Jina sends task field", async () => { const dims = 128; await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { From 255475d44baa7eb9beb3672a2edbe0e5f6e97af3 Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Tue, 24 Mar 2026 21:16:27 -0700 Subject: [PATCH 6/6] fix: parse hostname for provider detection instead of raw URL matching Address Codex review: regex matching /.nvidia.com/ against the full baseURL string can misclassify proxy URLs that contain .nvidia.com in their path or query. Parse the hostname via URL() and match with endsWith() for all host-based provider checks. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/embedder.ts | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/embedder.ts b/src/embedder.ts index 7e01bf82..74ab48de 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -237,15 +237,19 @@ function detectEmbeddingProviderProfile( model: string, ): EmbeddingProviderProfile { const base = baseURL || ""; + let host = ""; + try { host = new URL(base).hostname.toLowerCase(); } catch { /* invalid URL — skip host checks */ } // Host-based detection runs first — endpoint owner semantics take precedence // over model-name heuristics to avoid misclassifying e.g. a jina-xxx model // served from .nvidia.com as Jina instead of NVIDIA. - if (/api\.openai\.com/i.test(base)) return "openai"; - if (/\.openai\.azure\.com/i.test(base)) return "azure-openai"; - if (/api\.jina\.ai/i.test(base)) return "jina"; - if (/api\.voyageai\.com/i.test(base)) return "voyage-compatible"; - if (/\.nvidia\.com/i.test(base)) return "nvidia"; + // Match on parsed hostname to avoid false positives from proxy URLs that + // contain provider domains in their path or query string. + if (host.endsWith("api.openai.com")) return "openai"; + if (host.endsWith(".openai.azure.com")) return "azure-openai"; + if (host.endsWith("api.jina.ai")) return "jina"; + if (host.endsWith("api.voyageai.com")) return "voyage-compatible"; + if (host.endsWith(".nvidia.com") || host === "nvidia.com") return "nvidia"; // Model-prefix fallback — only when baseURL didn't match a known host if (/^jina-/i.test(model)) return "jina";