Add configurable embedding batchSize and bump version to 1.0.3

prih · prih · commit 184318873b6e · 2026-03-18T14:02:37.000Z
Splits embedBatch into mini-batches (default 1) to control memory usage.
New `batchSize` field in embedding config, configurable per graph/project.
diff --git a/graph-memory.yaml.example b/graph-memory.yaml.example
@@ -31,6 +31,7 @@ server:
   #   dtype: "q8"                               # Quantization: fp32, fp16, q8, q4
   #   queryPrefix: ""                           # Prefix prepended to search queries
   #   documentPrefix: ""                        # Prefix prepended to documents during indexing
+  #   batchSize: 1                              # Texts per ONNX forward pass (increase for faster indexing, more memory)
 
 # ---------------------------------------------------------------------------
 # Projects
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "mcp-graph-memory",
-  "version": "1.0.0",
+  "version": "1.0.3",
   "description": "MCP server for semantic graph memory from markdown files",
   "main": "dist/cli/index.js",
   "bin": {
diff --git a/src/lib/embedder.ts b/src/lib/embedder.ts
@@ -81,12 +81,17 @@ export async function embedBatch(
   const texts = inputs.map(({ title, content }) =>
     `${config.documentPrefix}${title}\n${content}`.slice(0, _maxChars),
   );
-  const tensor = await pipe._call(texts, { pooling: config.pooling, normalize: config.normalize });
-  const dim = tensor.dims[1];
-  const data = tensor.data as Float32Array;
+
+  const batchSize = config.batchSize;
   const result: number[][] = [];
-  for (let i = 0; i < inputs.length; i++) {
-    result.push(Array.from(data.slice(i * dim, (i + 1) * dim)));
+  for (let start = 0; start < texts.length; start += batchSize) {
+    const chunk = texts.slice(start, start + batchSize);
+    const tensor = await pipe._call(chunk, { pooling: config.pooling, normalize: config.normalize });
+    const dim = tensor.dims[1];
+    const data = tensor.data as Float32Array;
+    for (let i = 0; i < chunk.length; i++) {
+      result.push(Array.from(data.slice(i * dim, (i + 1) * dim)));
+    }
   }
   return result;
 }
diff --git a/src/lib/multi-config.ts b/src/lib/multi-config.ts
@@ -22,6 +22,7 @@ const embeddingConfigSchema = z.object({
   dtype:           z.string().optional(),
   queryPrefix:     z.string().optional(),
   documentPrefix:  z.string().optional(),
+  batchSize:       z.number().int().positive().optional(),
 });
 
 const graphEmbeddingOverridesSchema = z.object({
@@ -98,6 +99,7 @@ export interface EmbeddingConfig {
   dtype?: string;
   queryPrefix: string;
   documentPrefix: string;
+  batchSize: number;
 }
 
 /**
@@ -175,6 +177,7 @@ const EMBEDDING_DEFAULTS: EmbeddingConfig = {
   normalize:      true,
   queryPrefix:    '',
   documentPrefix: '',
+  batchSize:      1,
 };
 
 const SERVER_DEFAULTS: Omit<ServerConfig, 'embedding'> & { embedding: EmbeddingConfig } = {
@@ -214,6 +217,7 @@ function mergeEmbeddingConfig(
     dtype:          override.dtype          ?? base.dtype,
     queryPrefix:    override.queryPrefix    ?? base.queryPrefix,
     documentPrefix: override.documentPrefix ?? base.documentPrefix,
+    batchSize:      override.batchSize      ?? base.batchSize,
   };
 }
 
@@ -233,6 +237,7 @@ function resolveEmbeddingConfig(
     dtype:          raw.dtype           ?? fallback.dtype,
     queryPrefix:    raw.queryPrefix     ?? fallback.queryPrefix,
     documentPrefix: raw.documentPrefix  ?? fallback.documentPrefix,
+    batchSize:      raw.batchSize       ?? fallback.batchSize,
   };
 }
 
diff --git a/src/tests/rest-api.test.ts b/src/tests/rest-api.test.ts
@@ -43,9 +43,9 @@ function createTestProject(): ProjectInstance {
       chunkDepth: 4,
       maxTokensDefault: 4000,
       embedMaxChars: 2000,
-      embedding: { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '' },
+      embedding: { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '', batchSize: 1 },
       graphEmbeddings: Object.fromEntries(
-        ['docs', 'code', 'knowledge', 'tasks', 'files', 'skills'].map(g => [g, { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '' }]),
+        ['docs', 'code', 'knowledge', 'tasks', 'files', 'skills'].map(g => [g, { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '', batchSize: 1 }]),
       ) as any,
       author: { name: '', email: '' },
     },
@@ -482,9 +482,9 @@ describe('Attachment REST endpoints', () => {
         chunkDepth: 4,
         maxTokensDefault: 4000,
         embedMaxChars: 2000,
-        embedding: { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '' },
+        embedding: { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '', batchSize: 1 },
         graphEmbeddings: Object.fromEntries(
-          ['docs', 'code', 'knowledge', 'tasks', 'files', 'skills'].map(g => [g, { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '' }]),
+          ['docs', 'code', 'knowledge', 'tasks', 'files', 'skills'].map(g => [g, { model: 'test', pooling: 'mean' as const, normalize: true, queryPrefix: '', documentPrefix: '', batchSize: 1 }]),
         ) as any,
         author: { name: '', email: '' },
       },

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "mcp-graph-memory",`
`3`		`- "version": "1.0.0",`
	`3`	`+ "version": "1.0.3",`
`4`	`4`	`"description": "MCP server for semantic graph memory from markdown files",`
`5`	`5`	`"main": "dist/cli/index.js",`
`6`	`6`	`"bin": {`