tobi · igrigorik · Feb 16, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/src/formatter.ts b/src/formatter.ts
@@ -101,7 +101,7 @@ export function searchResultsToJson(
   const output = results.map(row => {
     const bodyStr = row.body || "";
     let body = opts.full ? bodyStr : undefined;
-    let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos).snippet : undefined;
+    let snippet = !opts.full ? extractSnippet(bodyStr, query, { maxLen: 300, chunkPos: row.chunkPos }).snippet : undefined;
 
     if (opts.lineNumbers) {
       if (body) body = addLineNumbers(body);
@@ -132,7 +132,7 @@ export function searchResultsToCsv(
   const header = "docid,score,file,title,context,line,snippet";
   const rows = results.map(row => {
     const bodyStr = row.body || "";
-    const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos);
+    const { line, snippet } = extractSnippet(bodyStr, query, { maxLen: 500, chunkPos: row.chunkPos });
     let content = opts.full ? bodyStr : snippet;
     if (opts.lineNumbers && content) {
       content = addLineNumbers(content);
@@ -175,7 +175,7 @@ export function searchResultsToMarkdown(
     if (opts.full) {
       content = bodyStr;
     } else {
-      content = extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
+      content = extractSnippet(bodyStr, query, { maxLen: 500, chunkPos: row.chunkPos }).snippet;
     }
     if (opts.lineNumbers) {
       content = addLineNumbers(content);
@@ -196,7 +196,7 @@ export function searchResultsToXml(
   const items = results.map(row => {
     const titleAttr = row.title ? ` title="${escapeXml(row.title)}"` : "";
     const bodyStr = row.body || "";
-    let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
+    let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, { maxLen: 500, chunkPos: row.chunkPos }).snippet;
     if (opts.lineNumbers) {
       content = addLineNumbers(content);
     }

diff --git a/src/llm.ts b/src/llm.ts
@@ -933,7 +933,7 @@ export class LlamaCpp implements LLM {
   // High-level abstractions
   // ==========================================================================
 
-  async expandQuery(query: string, options: { context?: string, includeLexical?: boolean } = {}): Promise<Queryable[]> {
+  async expandQuery(query: string, options: { intent?: string, context?: string, includeLexical?: boolean } = {}): Promise<Queryable[]> {
     // Ping activity at start to keep models alive during this operation
     this.touchActivity();
 
@@ -942,6 +942,7 @@ export class LlamaCpp implements LLM {
 
     const includeLexical = options.includeLexical ?? true;
     const context = options.context;
+    const intent = options.intent;
 
     const grammar = await llama.createGrammar({
       grammar: `
@@ -952,7 +953,11 @@ export class LlamaCpp implements LLM {
       `
     });
 
-    const prompt = `/no_think Expand this search query: ${query}`;
+    // When intent is provided, include it as background context so the LLM
+    // generates expansions that are better aligned with the caller's goal.
+    const prompt = intent
+      ? `/no_think Context: ${intent}\nExpand this search query: ${query}`
+      : `/no_think Expand this search query: ${query}`;
 
     // Create fresh context for each call
     const genContext = await this.generateModel!.createContext();

diff --git a/src/mcp.ts b/src/mcp.ts
@@ -21,6 +21,7 @@ import {
   addLineNumbers,
   hybridQuery,
   vectorSearchQuery,
+  normalizeQuery,
   DEFAULT_MULTI_GET_MAX_BYTES,
 } from "./store.js";
 import type { Store } from "./store.js";
@@ -101,7 +102,8 @@ function buildInstructions(store: Store): string {
   // --- What's searchable? ---
   if (status.collections.length > 0) {
     lines.push("");
-    lines.push("Collections (scope with `collection` parameter):");
+    lines.push("Collections — when the user's request maps to a specific collection, always");
+    lines.push("set the `collection` parameter to filter. This reduces noise and improves relevance.");
     for (const col of status.collections) {
       const collConfig = getCollection(col.name);
       const rootCtx = collConfig?.context?.[""] || collConfig?.context?.["/"];
@@ -125,7 +127,14 @@ function buildInstructions(store: Store): string {
   lines.push("Search:");
   lines.push("  - `search` (~30ms) — keyword and exact phrase matching.");
   lines.push("  - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
-  lines.push("  - `deep_search` (~10s) — auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
+  lines.push("  - `deep_search` — hybrid search with reranking. You are the query expander —");
+  lines.push("    generate all three expansion fields to replace the built-in LLM expansion:");
+  lines.push("      query: `{ text, keywords, concepts, passage }`");
+  lines.push("      `keywords`: BM25 search terms and synonyms (e.g. [\"TTFB\", \"core web vitals\"])");
+  lines.push("      `concepts`: semantic phrases for embedding search (e.g. [\"frontend rendering optimization\"])");
+  lines.push("      `passage`: a paragraph written as if it were the ideal matching document");
+  lines.push("    Fallback: pass query as a plain string for automatic expansion (~10s slower).");
+  lines.push("  Always provide `intent` on every search call to disambiguate and improve snippets.");
 
   // --- Retrieval workflow ---
   lines.push("");
@@ -233,17 +242,18 @@ function createMcpServer(store: Store): McpServer {
       annotations: { readOnlyHint: true, openWorldHint: false },
       inputSchema: {
         query: z.string().describe("Search query - keywords or phrases to find"),
+        intent: z.string().optional().describe("Optional background context — when the query is ambiguous, describe the intended interpretation. Omit for precise queries."),
         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
         minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
         collection: z.string().optional().describe("Filter to a specific collection by name"),
       },
     },
-    async ({ query, limit, minScore, collection }) => {
+    async ({ query, intent, limit, minScore, collection }) => {
       const results = store.searchFTS(query, limit || 10, collection);
       const filtered: SearchResultItem[] = results
         .filter(r => r.score >= (minScore || 0))
         .map(r => {
-          const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
+          const { line, snippet } = extractSnippet(r.body || "", query, { maxLen: 300, chunkPos: r.chunkPos, intent });
           return {
             docid: `#${r.docid}`,
             file: r.displayPath,
@@ -273,13 +283,14 @@ function createMcpServer(store: Store): McpServer {
       annotations: { readOnlyHint: true, openWorldHint: false },
       inputSchema: {
         query: z.string().describe("Natural language query - describe what you're looking for"),
+        intent: z.string().optional().describe("Optional background context — when the query is ambiguous, describe the intended interpretation. Omit for precise queries."),
         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
         minScore: z.number().optional().default(0.3).describe("Minimum relevance score 0-1 (default: 0.3)"),
         collection: z.string().optional().describe("Filter to a specific collection by name"),
       },
     },
-    async ({ query, limit, minScore, collection }) => {
-      const results = await vectorSearchQuery(store, query, { collection, limit, minScore });
+    async ({ query, intent, limit, minScore, collection }) => {
+      const results = await vectorSearchQuery(store, query, { collection, limit, minScore, intent });
 
       if (results.length === 0) {
         // Distinguish "no embeddings" from "no matches" — check if vector table exists
@@ -293,7 +304,7 @@ function createMcpServer(store: Store): McpServer {
       }
 
       const filtered: SearchResultItem[] = results.map(r => {
-        const { line, snippet } = extractSnippet(r.body, query, 300);
+        const { line, snippet } = extractSnippet(r.body, query, { maxLen: 300, intent });
         return {
           docid: `#${r.docid}`,
           file: r.displayPath,
@@ -319,20 +330,33 @@ function createMcpServer(store: Store): McpServer {
     "deep_search",
     {
       title: "Deep Search",
-      description: "Deep search. Auto-expands the query into variations, searches each by keyword and meaning, and reranks for top hits across all results.",
+      description: "Deep search with reranking. Prefer query as object with keywords/concepts/passage for best results. Fallback: pass as string for automatic expansion.",
       annotations: { readOnlyHint: true, openWorldHint: false },
       inputSchema: {
-        query: z.string().describe("Natural language query - describe what you're looking for"),
+        query: z.union([
+          z.string(),
+          z.object({
+            text: z.string().describe("The search query"),
+            keywords: z.array(z.string()).optional()
+              .describe("BM25 keyword variants for exact term matching"),
+            concepts: z.array(z.string()).optional()
+              .describe("Semantic phrases matched by meaning, not exact words"),
+            passage: z.string().min(1).optional()
+              .describe("Hypothetical document passage or paragraph resembling a matching document"),
+          }),
+        ]).describe("Prefer object with text/keywords/concepts/passage for best results. Fallback: pass as string for automatic expansion."),
+        intent: z.string().optional().describe("Optional background context — when the query is ambiguous, describe the intended interpretation. Omit for precise queries."),
         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
         minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
         collection: z.string().optional().describe("Filter to a specific collection by name"),
       },
     },
-    async ({ query, limit, minScore, collection }) => {
-      const results = await hybridQuery(store, query, { collection, limit, minScore });
+    async ({ query, intent, limit, minScore, collection }) => {
+      const sq = normalizeQuery(query);
+      const results = await hybridQuery(store, sq, { collection, limit, minScore, intent });
 
       const filtered: SearchResultItem[] = results.map(r => {
-        const { line, snippet } = extractSnippet(r.bestChunk, query, 300);
+        const { line, snippet } = extractSnippet(r.bestChunk, sq.text, { maxLen: 300, intent });
         return {
           docid: `#${r.docid}`,
           file: r.displayPath,
@@ -344,7 +368,7 @@ function createMcpServer(store: Store): McpServer {
       });
 
       return {
-        content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
+        content: [{ type: "text", text: formatSearchSummary(filtered, sq.text) }],
         structuredContent: { results: filtered },
       };
     }

diff --git a/src/qmd.ts b/src/qmd.ts
@@ -61,6 +61,7 @@ import {
   vectorSearchQuery,
   addLineNumbers,
   type ExpandedQuery,
+  type StructuredQuery,
   DEFAULT_EMBED_MODEL,
   DEFAULT_RERANK_MODEL,
   DEFAULT_GLOB,
@@ -1751,6 +1752,11 @@ type OutputOptions = {
   collection?: string | string[];  // Filter by collection name(s)
   lineNumbers?: boolean; // Add line numbers to output
   context?: string;      // Optional context for query expansion
+  intent?: string;       // Optional background context for disambiguation
+  // Structured query options — bypass LLM expansion
+  keywords?: string;     // comma-separated keyword variants
+  concepts?: string;     // comma-separated semantic phrases
+  passage?: string;      // hypothetical document passage
 };
 
 // Highlight query terms in text (skip short words < 3 chars)
@@ -1799,7 +1805,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
     const output = filtered.map(row => {
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
       let body = opts.full ? row.body : undefined;
-      let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
+      let snippet = !opts.full ? extractSnippet(row.body, query, { maxLen: 300, chunkPos: row.chunkPos, intent: opts.intent }).snippet : undefined;
       if (opts.lineNumbers) {
         if (body) body = addLineNumbers(body);
         if (snippet) snippet = addLineNumbers(snippet);
@@ -1826,7 +1832,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
     for (let i = 0; i < filtered.length; i++) {
       const row = filtered[i];
       if (!row) continue;
-      const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
+      const { line, snippet } = extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent });
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
 
       // Line 1: filepath with docid
@@ -1867,7 +1873,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
       if (!row) continue;
       const heading = row.title || row.displayPath;
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
-      let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
+      let content = opts.full ? row.body : extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent }).snippet;
       if (opts.lineNumbers) {
         content = addLineNumbers(content);
       }
@@ -1880,7 +1886,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
       const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
       const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
-      let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
+      let content = opts.full ? row.body : extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent }).snippet;
       if (opts.lineNumbers) {
         content = addLineNumbers(content);
       }
@@ -1890,7 +1896,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
     // CSV format
     console.log("docid,score,file,title,context,line,snippet");
     for (const row of filtered) {
-      const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
+      const { line, snippet } = extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent });
       let content = opts.full ? row.body : snippet;
       if (opts.lineNumbers) {
         content = addLineNumbers(content, line);
@@ -1994,6 +2000,7 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
       collection: singleCollection,
       limit: opts.all ? 500 : (opts.limit || 10),
       minScore: opts.minScore || 0.3,
+      intent: opts.intent,
       hooks: {
         onExpand: (original, expanded) => {
           logExpansionTree(original, expanded);
@@ -2038,11 +2045,22 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
 
   checkIndexHealth(store.db);
 
+  // Build structured query when caller provides expansion flags
+  const sq: string | StructuredQuery = (opts.keywords || opts.concepts || opts.passage)
+    ? {
+        text: query,
+        ...(opts.keywords && { keywords: opts.keywords.split(",").map(s => s.trim()).filter(Boolean) }),
+        ...(opts.concepts && { concepts: opts.concepts.split(",").map(s => s.trim()).filter(Boolean) }),
+        ...(opts.passage && { passage: opts.passage }),
+      }
+    : query;
+
   await withLLMSession(async () => {
-    let results = await hybridQuery(store, query, {
+    let results = await hybridQuery(store, sq, {
       collection: singleCollection,
       limit: opts.all ? 500 : (opts.limit || 10),
       minScore: opts.minScore || 0,
+      intent: opts.intent,
       hooks: {
         onStrongSignal: (score) => {
           process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -2121,6 +2139,12 @@ function parseCLI() {
       // Collection options
       name: { type: "string" },  // collection name
       mask: { type: "string" },  // glob pattern
+      // Intent option (for query, vsearch, search)
+      intent: { type: "string" },
+      // Structured query options (for query/deep-search — bypasses LLM expansion)
+      keywords: { type: "string" },    // comma-separated keyword variants
+      concepts: { type: "string" },    // comma-separated semantic phrases
+      passage: { type: "string" },     // hypothetical document passage
       // Embed options
       force: { type: "boolean", short: "f" },
       // Update options
@@ -2168,6 +2192,10 @@ function parseCLI() {
     all: isAll,
     collection: values.collection as string[] | undefined,
     lineNumbers: !!values["line-numbers"],
+    intent: values.intent as string | undefined,
+    keywords: values.keywords as string | undefined,
+    concepts: values.concepts as string | undefined,
+    passage: values.passage as string | undefined,
   };
 
   return {