From fadfd8a80e6d33925fd071272a01d5132d7148cd Mon Sep 17 00:00:00 2001 From: Csaba Tuncsik Date: Sun, 15 Mar 2026 10:53:59 +0100 Subject: [PATCH 1/3] feat: add SOCRATICODE_PROJECT_ID env var for shared indexes across directories When working with git worktrees (or any setup where the same codebase lives in multiple directories), each path currently gets its own Qdrant collection. This means the same codebase is indexed multiple times. This change adds a SOCRATICODE_PROJECT_ID environment variable that, when set, overrides the path-based project ID generation. All directories sharing the same SOCRATICODE_PROJECT_ID will use the same Qdrant collections (codebase, codegraph, context), eliminating redundant indexing. The value must match [a-zA-Z0-9_-]+ to remain Qdrant-friendly. An error is thrown at startup if the value contains invalid characters. --- README.md | 1 + src/config.ts | 14 +++++++++++++ tests/unit/config.test.ts | 43 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bf6986e..9469801 100644 --- a/README.md +++ b/README.md @@ -578,6 +578,7 @@ Artifacts are chunked and embedded into Qdrant using the same hybrid dense + BM2 | `MAX_FILE_SIZE_MB` | `5` | Maximum file size in MB. Files larger than this are skipped during indexing. Increase for repos with large generated or data files you want indexed. | | `SEARCH_DEFAULT_LIMIT` | `10` | Default number of results returned by `codebase_search` (1-50). Each result is a ranked code chunk with file path, line range, and content. Higher values give broader coverage but produce more output. Can still be overridden per-query via the `limit` tool parameter. | | `SEARCH_MIN_SCORE` | `0.10` | Minimum RRF (Reciprocal Rank Fusion) score threshold (0-1). Results below this score are filtered out. Helps remove low-relevance noise from search results. Set to `0` to disable filtering (returns all results up to `limit`). Can be overridden per-query via the `minScore` tool parameter. Works together with `limit`: results are first filtered by score, then capped at `limit`. | +| `SOCRATICODE_PROJECT_ID` | *(none)* | Override the auto-generated project ID. When set, all paths resolve to the same Qdrant collections, allowing multiple directories (e.g. git worktrees of the same repo) to share a single index. Must match `[a-zA-Z0-9_-]+`. | | `SOCRATICODE_LOG_LEVEL` | `info` | Log verbosity: `debug`, `info`, `warn`, `error` | | `SOCRATICODE_LOG_FILE` | *(none)* | Absolute path to a log file. When set, all log entries are appended to this file (a session separator is written on each server start). Useful for debugging when the MCP host doesn't surface log notifications. | diff --git a/src/config.ts b/src/config.ts index 1a71e18..7252438 100644 --- a/src/config.ts +++ b/src/config.ts @@ -6,8 +6,22 @@ import path from "node:path"; /** * Generate a stable project ID from an absolute folder path. * Uses a short SHA-256 prefix so collection names stay Qdrant-friendly. + * + * When `SOCRATICODE_PROJECT_ID` is set, that value is used directly instead + * of hashing the path. This lets multiple directory trees (e.g. git + * worktrees) share a single Qdrant index. The value must contain only + * characters valid in a Qdrant collection name (`[a-zA-Z0-9_-]`). */ export function projectIdFromPath(folderPath: string): string { + const explicit = process.env.SOCRATICODE_PROJECT_ID?.trim(); + if (explicit) { + if (!/^[a-zA-Z0-9_-]+$/.test(explicit)) { + throw new Error( + `SOCRATICODE_PROJECT_ID must match [a-zA-Z0-9_-]+ but got: "${explicit}"`, + ); + } + return explicit; + } const normalized = path.resolve(folderPath); return createHash("sha256").update(normalized).digest("hex").slice(0, 12); } diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index 9408a8e..c75fbfe 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -1,9 +1,19 @@ // SPDX-License-Identifier: AGPL-3.0-only // Copyright (C) 2026 Giancarlo Erra - Altaire Limited -import { describe, expect, it } from "vitest"; +import { afterEach, describe, expect, it } from "vitest"; import { collectionName, contextCollectionName, graphCollectionName, projectIdFromPath } from "../../src/config.js"; describe("config", () => { + // Clean up env override between tests + const originalEnv = process.env.SOCRATICODE_PROJECT_ID; + afterEach(() => { + if (originalEnv === undefined) { + delete process.env.SOCRATICODE_PROJECT_ID; + } else { + process.env.SOCRATICODE_PROJECT_ID = originalEnv; + } + }); + describe("projectIdFromPath", () => { it("returns a 12-character hex string", () => { const id = projectIdFromPath("/some/project/path"); @@ -42,6 +52,37 @@ describe("config", () => { // path.resolve normalizes trailing slash, so they should match expect(id1).toBe(id2); }); + + it("uses SOCRATICODE_PROJECT_ID when set", () => { + process.env.SOCRATICODE_PROJECT_ID = "my-shared-project"; + const id = projectIdFromPath("/some/project/path"); + expect(id).toBe("my-shared-project"); + }); + + it("ignores path differences when SOCRATICODE_PROJECT_ID is set", () => { + process.env.SOCRATICODE_PROJECT_ID = "shared"; + const id1 = projectIdFromPath("/worktree/a"); + const id2 = projectIdFromPath("/worktree/b"); + expect(id1).toBe(id2); + }); + + it("trims whitespace from SOCRATICODE_PROJECT_ID", () => { + process.env.SOCRATICODE_PROJECT_ID = " my-project "; + expect(projectIdFromPath("/any/path")).toBe("my-project"); + }); + + it("throws on invalid SOCRATICODE_PROJECT_ID characters", () => { + process.env.SOCRATICODE_PROJECT_ID = "invalid/name"; + expect(() => projectIdFromPath("/any/path")).toThrow( + /SOCRATICODE_PROJECT_ID must match/, + ); + }); + + it("falls back to hash when SOCRATICODE_PROJECT_ID is empty", () => { + process.env.SOCRATICODE_PROJECT_ID = " "; + const id = projectIdFromPath("/some/project/path"); + expect(id).toMatch(/^[0-9a-f]{12}$/); + }); }); describe("collectionName", () => { From 3cad30a6509837af2346fe6e83c7ec3aadc04900 Mon Sep 17 00:00:00 2001 From: Csaba Tuncsik Date: Sun, 15 Mar 2026 16:57:22 +0100 Subject: [PATCH 2/3] docs: add git worktrees section to README Documents how to use SOCRATICODE_PROJECT_ID with per-project .mcp.json to share a single index across git worktrees of the same repository. --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index 9469801..c9beefa 100644 --- a/README.md +++ b/README.md @@ -395,6 +395,36 @@ Use Google's Gemini embedding API. Requires an [API key](https://aistudio.google > Defaults: `EMBEDDING_MODEL=gemini-embedding-001`, `EMBEDDING_DIMENSIONS=3072`. +### Git Worktrees (shared index across directories) + +If you use [git worktrees](https://git-scm.com/docs/git-worktree) — or any workflow where the same repository lives in multiple directories — each path would normally get its own Qdrant index. This means redundant embedding and storage for what is essentially the same codebase. + +Set `SOCRATICODE_PROJECT_ID` to share a single index across all directories of the same project. Add a `.mcp.json` at the root of each worktree (and your main checkout): + +```json +{ + "mcpServers": { + "socraticode": { + "command": "npx", + "args": ["-y", "socraticode"], + "env": { + "SOCRATICODE_PROJECT_ID": "my-project" + } + } + } +} +``` + +With this config, agents running in `/repo/main`, `/repo/worktree-feat-a`, and `/repo/worktree-fix-b` all share the same `codebase_my-project`, `codegraph_my-project`, and `context_my-project` Qdrant collections. + +> **Why per-project `.mcp.json` instead of global config?** A global MCP config doesn't know which project directory to index. Per-project `.mcp.json` lets each repo specify its own `SOCRATICODE_PROJECT_ID` while worktrees of the same repo share the same value. Add `.mcp.json` to your `.gitignore` if you don't want it tracked. + +**How it works in practice:** + +- The semantic index reflects whichever worktree last triggered a file change — but since branches typically differ by only a handful of files, the index is 99%+ accurate for all worktrees +- Your AI agent reads actual file contents from its own worktree; the shared index is only used for discovery and navigation +- When changes merge back to main, the file watcher re-indexes the changed files and the index converges + ### Available tools Once connected, 21 tools are available to your AI assistant: From d7c32d1435021172762531860350f38f83173edf Mon Sep 17 00:00:00 2001 From: Csaba Tuncsik Date: Sun, 15 Mar 2026 20:58:23 +0100 Subject: [PATCH 3/3] docs: add Claude Code worktree auto-detection to git worktrees section Claude Code resolves git worktree links back to the main repo path for config lookup, so MCP config only needs to be set once on the main checkout. All worktrees inherit it automatically. Separate clones are unaffected. Documents both the auto-detection path and the manual .mcp.json fallback for other MCP hosts. --- README.md | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c9beefa..e9a786d 100644 --- a/README.md +++ b/README.md @@ -399,7 +399,26 @@ Use Google's Gemini embedding API. Requires an [API key](https://aistudio.google If you use [git worktrees](https://git-scm.com/docs/git-worktree) — or any workflow where the same repository lives in multiple directories — each path would normally get its own Qdrant index. This means redundant embedding and storage for what is essentially the same codebase. -Set `SOCRATICODE_PROJECT_ID` to share a single index across all directories of the same project. Add a `.mcp.json` at the root of each worktree (and your main checkout): +Set `SOCRATICODE_PROJECT_ID` to share a single index across all directories of the same project. + +#### MCP hosts with git worktree detection (e.g. Claude Code) + +Some MCP hosts (like [Claude Code](https://claude.ai/claude-code)) resolve the project root by following git worktree links. Since worktrees point back to the main repository's `.git` directory, the host automatically maps all worktrees to the same project config. This means you only need to configure the MCP server **once** for the main checkout — all worktrees inherit it automatically. + +For Claude Code, add the server with local scope from your main checkout: + +```bash +cd /path/to/main-checkout +claude mcp add -e SOCRATICODE_PROJECT_ID=my-project --scope local socraticode -- npx -y socraticode +``` + +All worktrees created from this repo will automatically connect to socraticode with the shared project ID. No per-worktree setup needed. + +> **Note:** This only works for git worktrees. Separate `git clone`s of the same repo have independent `.git` directories and won't share the config. + +#### Other MCP hosts (per-project `.mcp.json`) + +For MCP hosts that don't resolve git worktree paths, add a `.mcp.json` at the root of each worktree (and your main checkout): ```json { @@ -415,9 +434,11 @@ Set `SOCRATICODE_PROJECT_ID` to share a single index across all directories of t } ``` -With this config, agents running in `/repo/main`, `/repo/worktree-feat-a`, and `/repo/worktree-fix-b` all share the same `codebase_my-project`, `codegraph_my-project`, and `context_my-project` Qdrant collections. +Add `.mcp.json` to your `.gitignore` if you don't want it tracked. + +#### How it works -> **Why per-project `.mcp.json` instead of global config?** A global MCP config doesn't know which project directory to index. Per-project `.mcp.json` lets each repo specify its own `SOCRATICODE_PROJECT_ID` while worktrees of the same repo share the same value. Add `.mcp.json` to your `.gitignore` if you don't want it tracked. +With this config, agents running in `/repo/main`, `/repo/worktree-feat-a`, and `/repo/worktree-fix-b` all share the same `codebase_my-project`, `codegraph_my-project`, and `context_my-project` Qdrant collections. **How it works in practice:**