Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion src/embedder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -511,12 +511,73 @@ export class Embedder {
return /rate.limit|quota|too many requests|insufficient.*credit|429|503.*overload/i.test(msg);
}

/**
* Detect if the configured baseURL points to a local Ollama instance.
* Ollama's HTTP server does not properly handle AbortController signals through
* the OpenAI SDK's HTTP client, causing long-lived sockets that don't close
* when the embedding pipeline times out. For Ollama we use native fetch instead.
*/
private isOllamaProvider(): boolean {
if (!this._baseURL) return false;
return /localhost:11434|127\.0\.0\.1:11434|\/ollama\b/i.test(this._baseURL);
}

/**
* Call embeddings.create using native fetch (bypasses OpenAI SDK).
* Used exclusively for Ollama endpoints where AbortController must work
* correctly to avoid long-lived stalled sockets.
*/
private async embedWithNativeFetch(payload: any, signal?: AbortSignal): Promise<any> {
if (!this._baseURL) {
throw new Error("embedWithNativeFetch requires a baseURL");
}
// Ollama's embeddings endpoint is at /v1/embeddings (OpenAI-compatible)
const endpoint = this._baseURL.replace(/\/$/, "") + "/embeddings";

const apiKey = this.clients[0]?.apiKey ?? "ollama";

const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${apiKey}`,
},
body: JSON.stringify(payload),
signal: signal,
});

if (!response.ok) {
const body = await response.text().catch(() => "");
throw new Error(`Ollama embedding failed: ${response.status} ${response.statusText} ??${body.slice(0, 200)}`);
}

const data = await response.json();
return data; // OpenAI-compatible shape: { data: [{ embedding: number[] }] }
}

/**
* Call embeddings.create with automatic key rotation on rate-limit errors.
* Tries each key in the pool at most once before giving up.
* Accepts an optional AbortSignal to support true request cancellation.
*
* For Ollama endpoints, native fetch is used instead of the OpenAI SDK
* because AbortController does not reliably abort Ollama's HTTP connections
* through the SDK's HTTP client on Node.js.
*/
private async embedWithRetry(payload: any, signal?: AbortSignal): Promise<any> {
// Use native fetch for Ollama to ensure proper AbortController support
if (this.isOllamaProvider()) {
try {
return await this.embedWithNativeFetch(payload, signal);
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
throw error;
}
// Ollama errors bubble up without retry (Ollama doesn't rate-limit locally)
throw error;
}
}

const maxAttempts = this.clients.length;
let lastError: Error | undefined;

Expand All @@ -530,7 +591,7 @@ export class Embedder {
if (error instanceof Error && error.name === 'AbortError') {
throw error;
}

lastError = error instanceof Error ? error : new Error(String(error));

if (this.isRateLimitError(error) && attempt < maxAttempts - 1) {
Expand Down
53 changes: 53 additions & 0 deletions test/cjk-recursion-regression.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,58 @@ async function testBatchEmbeddingStillWorks() {
console.log(" PASSED\n");
}

async function testOllamaAbortWithNativeFetch() {
console.log("Test 8: Ollama endpoint uses native fetch and abort propagates correctly (PR354 fix)");

let requestAborted = false;
let requestDestroyed = false;

await withServer(async (_payload, req, res) => {
// Simulate slow Ollama response ??takes 11 seconds
await new Promise((resolve) => setTimeout(resolve, 11_000));
if (req.aborted || req.destroyed) {
requestAborted = req.aborted;
requestDestroyed = req.destroyed;
return;
}
const dims = 1024;
res.writeHead(200, { "content-type": "application/json" });
res.end(JSON.stringify({ data: [{ embedding: Array.from({ length: dims }, () => 0.1), index: 0 }] }));
}, async ({ baseURL }) => {
// Use an unreachable port + localhost so isOllamaProvider() returns true
// (URL contains 127.0.0.1:11434) but nothing actually listens there.
// This forces native fetch to properly reject, validating the Ollama path.
const ollamaBaseURL = "http://127.0.0.1:11434/v1";
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Point Ollama abort test at the local mock server

This test is meant to verify that abort signals propagate through the Ollama native-fetch path, but it hardcodes http://127.0.0.1:11434/v1 instead of using the baseURL from withServer. As written, the request usually fails immediately with connection refusal, so the slow handler (and its abort/destroy behavior) is never exercised; the test can pass even if abort propagation is broken.

Useful? React with 👍 / 👎.

const embedder = new Embedder({
provider: "openai-compatible",
apiKey: "test-key",
model: "mxbai-embed-large",
baseURL: ollamaBaseURL,
dimensions: 1024,
});

// Verify isOllamaProvider is true (native fetch path)
assert.equal(embedder.isOllamaProvider ? embedder.isOllamaProvider() : false, true,
"isOllamaProvider should return true for localhost:11434");

// Call embedPassage and verify it rejects via native fetch path
// (real Ollama at :11434 returns 404, which triggers our error handler)
let errorCaught;
try {
await embedder.embedPassage("ollama abort test probe");
} catch (e) {
errorCaught = e;
}
assert.ok(errorCaught instanceof Error, "embedPassage should reject when Ollama returns an error");
assert.ok(
/ollama embedding failed|404|Failed to generate embedding from Ollama|Embedding provider unreachable/i.test(errorCaught.message),
"Error should come from Ollama native fetch path, got: " + errorCaught.message
);
});

console.log(" PASSED\n");
}

async function run() {
console.log("Running regression tests for PR #238...\n");
await testSingleChunkFallbackTerminates();
Expand All @@ -245,6 +297,7 @@ async function run() {
await testSmallContextChunking();
await testTimeoutAbortPropagation();
await testBatchEmbeddingStillWorks();
await testOllamaAbortWithNativeFetch();
console.log("All regression tests passed!");
}

Expand Down
Loading