From f7312bf240498110c84afa7aeb4f97368d00b031 Mon Sep 17 00:00:00 2001 From: "Jansen@home" Date: Fri, 27 Mar 2026 19:48:36 +0800 Subject: [PATCH] feat: separate internal and request dimensions for embeddings --- index.ts | 8 ++++++ openclaw.plugin.json | 5 ++++ src/embedder.ts | 7 ++++- test/plugin-manifest-regression.mjs | 44 ++++++++++++++++++++++++++--- 4 files changed, 59 insertions(+), 5 deletions(-) diff --git a/index.ts b/index.ts index b9d9ee6..95235e6 100644 --- a/index.ts +++ b/index.ts @@ -84,7 +84,10 @@ interface PluginConfig { apiKey: string | string[]; model?: string; baseURL?: string; + /** Internal schema/validation dimension (LanceDB + local checks). */ dimensions?: number; + /** Optional provider request dimension (dimensions/output_dimension). */ + requestDimensions?: number; omitDimensions?: boolean; taskQuery?: string; taskPassage?: string; @@ -1639,7 +1642,10 @@ const memoryLanceDBProPlugin = { apiKey: config.embedding.apiKey, model: config.embedding.model || "text-embedding-3-small", baseURL: config.embedding.baseURL, + // Internal dimension for local schema/validation checks. dimensions: config.embedding.dimensions, + // Optional request hint sent to providers that support variable dimensions. + requestDimensions: config.embedding.requestDimensions, omitDimensions: config.embedding.omitDimensions, taskQuery: config.embedding.taskQuery, taskPassage: config.embedding.taskPassage, @@ -3776,6 +3782,8 @@ export function parsePluginConfig(value: unknown): PluginConfig { // Accept number, numeric string, or env-var string (e.g. "${EMBED_DIM}"). // Also accept legacy top-level `dimensions` for convenience. dimensions: parsePositiveInt(embedding.dimensions ?? cfg.dimensions), + // Request dimension is intentionally separate from internal schema sizing. + requestDimensions: parsePositiveInt(embedding.requestDimensions), omitDimensions: typeof embedding.omitDimensions === "boolean" ? embedding.omitDimensions diff --git a/openclaw.plugin.json b/openclaw.plugin.json index a2cfb1f..f3df07c 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -46,6 +46,11 @@ "type": "integer", "minimum": 1 }, + "requestDimensions": { + "type": "integer", + "minimum": 1, + "description": "Optional output dimension sent to embedding API requests only (for providers supporting variable dimensions)" + }, "omitDimensions": { "type": "boolean", "description": "When true, omit the dimensions parameter from embedding requests even if dimensions is configured" diff --git a/src/embedder.ts b/src/embedder.ts index bcbbaa7..9974517 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -90,7 +90,10 @@ export interface EmbeddingConfig { apiKey: string | string[]; model: string; baseURL?: string; + /** Internal vector dimension for schema/validation. This does NOT imply sending API dimensions. */ dimensions?: number; + /** Optional API request output dimension for providers that support variable dimensions. */ + requestDimensions?: number; /** Optional task type for query embeddings (e.g. "retrieval.query") */ taskQuery?: string; @@ -428,7 +431,8 @@ export class Embedder { this._taskQuery = config.taskQuery; this._taskPassage = config.taskPassage; this._normalized = config.normalized; - this._requestDimensions = config.dimensions; + // Request-side dimension hint is isolated from internal schema dimension. + this._requestDimensions = config.requestDimensions; this._omitDimensions = config.omitDimensions === true; // Enable auto-chunking by default for better handling of long documents this._autoChunk = config.chunking !== false; @@ -472,6 +476,7 @@ export class Embedder { console.log(`[memory-lancedb-pro] Initialized ${this.clients.length} API keys for round-robin rotation`); } + // Internal dimension remains the single source of truth for local validation. this.dimensions = getVectorDimensions(config.model, config.dimensions); this._cache = new EmbeddingCache(256, 30); // 256 entries, 30 min TTL } diff --git a/test/plugin-manifest-regression.mjs b/test/plugin-manifest-regression.mjs index 65e9ec2..461d2e7 100644 --- a/test/plugin-manifest-regression.mjs +++ b/test/plugin-manifest-regression.mjs @@ -109,6 +109,11 @@ assert.equal( "boolean", "embedding.omitDimensions should be declared in the plugin schema", ); +assert.equal( + manifest.configSchema.properties.embedding.properties.requestDimensions?.type, + "integer", + "embedding.requestDimensions should be declared in the plugin schema", +); assert.equal( manifest.configSchema.properties.sessionMemory.properties.enabled.default, false, @@ -325,14 +330,44 @@ try { }); const requestCountBeforeWithDimensions = embeddingRequests.length; await withDimensionsTool.execute("tool-3", { - text: "dimensions should be sent by default", + text: "dimensions should not be sent by default", scope: "global", }); const withDimensionsRequest = embeddingRequests.at(requestCountBeforeWithDimensions); assert.equal( - withDimensionsRequest?.dimensions, + Object.prototype.hasOwnProperty.call(withDimensionsRequest ?? {}, "dimensions"), + false, + "embedding.dimensions should be used for internal schema sizing, not forwarded by default", + ); + + const withRequestDimensionsApi = createMockApi({ + dbPath: path.join(workDir, "db-with-request-dimensions"), + autoCapture: false, + autoRecall: false, + embedding: { + provider: "openai-compatible", + apiKey: "dummy", + model: "text-embedding-3-small", + baseURL: embeddingBaseURL, + dimensions: 4, + requestDimensions: 4, + }, + }); + plugin.register(withRequestDimensionsApi); + const withRequestDimensionsTool = withRequestDimensionsApi.toolFactories.memory_store({ + agentId: "main", + sessionKey: "agent:main:test", + }); + const requestCountBeforeRequestDimensions = embeddingRequests.length; + await withRequestDimensionsTool.execute("tool-3b", { + text: "requestDimensions should be forwarded", + scope: "global", + }); + const withRequestDimensionsRequest = embeddingRequests.at(requestCountBeforeRequestDimensions); + assert.equal( + withRequestDimensionsRequest?.dimensions, 4, - "embedding.dimensions should be forwarded by default", + "embedding.requestDimensions should be forwarded to embedding requests", ); const omitDimensionsApi = createMockApi({ @@ -345,6 +380,7 @@ try { model: "text-embedding-3-small", baseURL: embeddingBaseURL, dimensions: 4, + requestDimensions: 4, omitDimensions: true, }, }); @@ -362,7 +398,7 @@ try { assert.equal( Object.prototype.hasOwnProperty.call(omitDimensionsRequest, "dimensions"), false, - "embedding.omitDimensions=true should omit dimensions from embedding requests", + "embedding.omitDimensions=true should omit dimensions from embedding requests even when requestDimensions is set", ); } finally { await new Promise((resolve) => embeddingServer.close(resolve));