diff --git a/src/formatter.ts b/src/formatter.ts
index b3278307..9617503e 100644
--- a/src/formatter.ts
+++ b/src/formatter.ts
@@ -101,7 +101,7 @@ export function searchResultsToJson(
   const output = results.map(row => {
     const bodyStr = row.body || "";
     let body = opts.full ? bodyStr : undefined;
-    let snippet = !opts.full ? extractSnippet(bodyStr, query, 300, row.chunkPos).snippet : undefined;
+    let snippet = !opts.full ? extractSnippet(bodyStr, query, { maxLen: 300, chunkPos: row.chunkPos }).snippet : undefined;
 
     if (opts.lineNumbers) {
       if (body) body = addLineNumbers(body);
@@ -132,7 +132,7 @@ export function searchResultsToCsv(
   const header = "docid,score,file,title,context,line,snippet";
   const rows = results.map(row => {
     const bodyStr = row.body || "";
-    const { line, snippet } = extractSnippet(bodyStr, query, 500, row.chunkPos);
+    const { line, snippet } = extractSnippet(bodyStr, query, { maxLen: 500, chunkPos: row.chunkPos });
     let content = opts.full ? bodyStr : snippet;
     if (opts.lineNumbers && content) {
       content = addLineNumbers(content);
@@ -175,7 +175,7 @@ export function searchResultsToMarkdown(
     if (opts.full) {
       content = bodyStr;
     } else {
-      content = extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
+      content = extractSnippet(bodyStr, query, { maxLen: 500, chunkPos: row.chunkPos }).snippet;
     }
     if (opts.lineNumbers) {
       content = addLineNumbers(content);
@@ -196,7 +196,7 @@ export function searchResultsToXml(
   const items = results.map(row => {
     const titleAttr = row.title ? ` title="${escapeXml(row.title)}"` : "";
     const bodyStr = row.body || "";
-    let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, 500, row.chunkPos).snippet;
+    let content = opts.full ? bodyStr : extractSnippet(bodyStr, query, { maxLen: 500, chunkPos: row.chunkPos }).snippet;
     if (opts.lineNumbers) {
       content = addLineNumbers(content);
     }
diff --git a/src/llm.ts b/src/llm.ts
index 1beca8c4..03f987fb 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -933,7 +933,7 @@ export class LlamaCpp implements LLM {
   // High-level abstractions
   // ==========================================================================
 
-  async expandQuery(query: string, options: { context?: string, includeLexical?: boolean } = {}): Promise<Queryable[]> {
+  async expandQuery(query: string, options: { intent?: string, context?: string, includeLexical?: boolean } = {}): Promise<Queryable[]> {
     // Ping activity at start to keep models alive during this operation
     this.touchActivity();
 
@@ -942,6 +942,7 @@ export class LlamaCpp implements LLM {
 
     const includeLexical = options.includeLexical ?? true;
     const context = options.context;
+    const intent = options.intent;
 
     const grammar = await llama.createGrammar({
       grammar: `
@@ -952,7 +953,11 @@ export class LlamaCpp implements LLM {
       `
     });
 
-    const prompt = `/no_think Expand this search query: ${query}`;
+    // When intent is provided, include it as background context so the LLM
+    // generates expansions that are better aligned with the caller's goal.
+    const prompt = intent
+      ? `/no_think Context: ${intent}\nExpand this search query: ${query}`
+      : `/no_think Expand this search query: ${query}`;
 
     // Create fresh context for each call
     const genContext = await this.generateModel!.createContext();
diff --git a/src/mcp.ts b/src/mcp.ts
index fa674874..60b972e8 100644
--- a/src/mcp.ts
+++ b/src/mcp.ts
@@ -21,6 +21,7 @@ import {
   addLineNumbers,
   hybridQuery,
   vectorSearchQuery,
+  normalizeQuery,
   DEFAULT_MULTI_GET_MAX_BYTES,
 } from "./store.js";
 import type { Store } from "./store.js";
@@ -101,7 +102,8 @@ function buildInstructions(store: Store): string {
   // --- What's searchable? ---
   if (status.collections.length > 0) {
     lines.push("");
-    lines.push("Collections (scope with `collection` parameter):");
+    lines.push("Collections — when the user's request maps to a specific collection, always");
+    lines.push("set the `collection` parameter to filter. This reduces noise and improves relevance.");
     for (const col of status.collections) {
       const collConfig = getCollection(col.name);
       const rootCtx = collConfig?.context?.[""] || collConfig?.context?.["/"];
@@ -125,7 +127,14 @@ function buildInstructions(store: Store): string {
   lines.push("Search:");
   lines.push("  - `search` (~30ms) — keyword and exact phrase matching.");
   lines.push("  - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs.");
-  lines.push("  - `deep_search` (~10s) — auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits.");
+  lines.push("  - `deep_search` — hybrid search with reranking. You are the query expander —");
+  lines.push("    generate all three expansion fields to replace the built-in LLM expansion:");
+  lines.push("      query: `{ text, keywords, concepts, passage }`");
+  lines.push("      `keywords`: BM25 search terms and synonyms (e.g. [\"TTFB\", \"core web vitals\"])");
+  lines.push("      `concepts`: semantic phrases for embedding search (e.g. [\"frontend rendering optimization\"])");
+  lines.push("      `passage`: a paragraph written as if it were the ideal matching document");
+  lines.push("    Fallback: pass query as a plain string for automatic expansion (~10s slower).");
+  lines.push("  Always provide `intent` on every search call to disambiguate and improve snippets.");
 
   // --- Retrieval workflow ---
   lines.push("");
@@ -233,17 +242,18 @@ function createMcpServer(store: Store): McpServer {
       annotations: { readOnlyHint: true, openWorldHint: false },
       inputSchema: {
         query: z.string().describe("Search query - keywords or phrases to find"),
+        intent: z.string().optional().describe("Optional background context — when the query is ambiguous, describe the intended interpretation. Omit for precise queries."),
         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
         minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
         collection: z.string().optional().describe("Filter to a specific collection by name"),
       },
     },
-    async ({ query, limit, minScore, collection }) => {
+    async ({ query, intent, limit, minScore, collection }) => {
       const results = store.searchFTS(query, limit || 10, collection);
       const filtered: SearchResultItem[] = results
         .filter(r => r.score >= (minScore || 0))
         .map(r => {
-          const { line, snippet } = extractSnippet(r.body || "", query, 300, r.chunkPos);
+          const { line, snippet } = extractSnippet(r.body || "", query, { maxLen: 300, chunkPos: r.chunkPos, intent });
           return {
             docid: `#${r.docid}`,
             file: r.displayPath,
@@ -273,13 +283,14 @@ function createMcpServer(store: Store): McpServer {
       annotations: { readOnlyHint: true, openWorldHint: false },
       inputSchema: {
         query: z.string().describe("Natural language query - describe what you're looking for"),
+        intent: z.string().optional().describe("Optional background context — when the query is ambiguous, describe the intended interpretation. Omit for precise queries."),
         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
         minScore: z.number().optional().default(0.3).describe("Minimum relevance score 0-1 (default: 0.3)"),
         collection: z.string().optional().describe("Filter to a specific collection by name"),
       },
     },
-    async ({ query, limit, minScore, collection }) => {
-      const results = await vectorSearchQuery(store, query, { collection, limit, minScore });
+    async ({ query, intent, limit, minScore, collection }) => {
+      const results = await vectorSearchQuery(store, query, { collection, limit, minScore, intent });
 
       if (results.length === 0) {
         // Distinguish "no embeddings" from "no matches" — check if vector table exists
@@ -293,7 +304,7 @@ function createMcpServer(store: Store): McpServer {
       }
 
       const filtered: SearchResultItem[] = results.map(r => {
-        const { line, snippet } = extractSnippet(r.body, query, 300);
+        const { line, snippet } = extractSnippet(r.body, query, { maxLen: 300, intent });
         return {
           docid: `#${r.docid}`,
           file: r.displayPath,
@@ -319,20 +330,33 @@ function createMcpServer(store: Store): McpServer {
     "deep_search",
     {
       title: "Deep Search",
-      description: "Deep search. Auto-expands the query into variations, searches each by keyword and meaning, and reranks for top hits across all results.",
+      description: "Deep search with reranking. Prefer query as object with keywords/concepts/passage for best results. Fallback: pass as string for automatic expansion.",
       annotations: { readOnlyHint: true, openWorldHint: false },
       inputSchema: {
-        query: z.string().describe("Natural language query - describe what you're looking for"),
+        query: z.union([
+          z.string(),
+          z.object({
+            text: z.string().describe("The search query"),
+            keywords: z.array(z.string()).optional()
+              .describe("BM25 keyword variants for exact term matching"),
+            concepts: z.array(z.string()).optional()
+              .describe("Semantic phrases matched by meaning, not exact words"),
+            passage: z.string().min(1).optional()
+              .describe("Hypothetical document passage or paragraph resembling a matching document"),
+          }),
+        ]).describe("Prefer object with text/keywords/concepts/passage for best results. Fallback: pass as string for automatic expansion."),
+        intent: z.string().optional().describe("Optional background context — when the query is ambiguous, describe the intended interpretation. Omit for precise queries."),
         limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
         minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
         collection: z.string().optional().describe("Filter to a specific collection by name"),
       },
     },
-    async ({ query, limit, minScore, collection }) => {
-      const results = await hybridQuery(store, query, { collection, limit, minScore });
+    async ({ query, intent, limit, minScore, collection }) => {
+      const sq = normalizeQuery(query);
+      const results = await hybridQuery(store, sq, { collection, limit, minScore, intent });
 
       const filtered: SearchResultItem[] = results.map(r => {
-        const { line, snippet } = extractSnippet(r.bestChunk, query, 300);
+        const { line, snippet } = extractSnippet(r.bestChunk, sq.text, { maxLen: 300, intent });
         return {
           docid: `#${r.docid}`,
           file: r.displayPath,
@@ -344,7 +368,7 @@ function createMcpServer(store: Store): McpServer {
       });
 
       return {
-        content: [{ type: "text", text: formatSearchSummary(filtered, query) }],
+        content: [{ type: "text", text: formatSearchSummary(filtered, sq.text) }],
         structuredContent: { results: filtered },
       };
     }
diff --git a/src/qmd.ts b/src/qmd.ts
index 244578f8..3b2ee726 100755
--- a/src/qmd.ts
+++ b/src/qmd.ts
@@ -61,6 +61,7 @@ import {
   vectorSearchQuery,
   addLineNumbers,
   type ExpandedQuery,
+  type StructuredQuery,
   DEFAULT_EMBED_MODEL,
   DEFAULT_RERANK_MODEL,
   DEFAULT_GLOB,
@@ -1751,6 +1752,11 @@ type OutputOptions = {
   collection?: string | string[];  // Filter by collection name(s)
   lineNumbers?: boolean; // Add line numbers to output
   context?: string;      // Optional context for query expansion
+  intent?: string;       // Optional background context for disambiguation
+  // Structured query options — bypass LLM expansion
+  keywords?: string;     // comma-separated keyword variants
+  concepts?: string;     // comma-separated semantic phrases
+  passage?: string;      // hypothetical document passage
 };
 
 // Highlight query terms in text (skip short words < 3 chars)
@@ -1799,7 +1805,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
     const output = filtered.map(row => {
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
       let body = opts.full ? row.body : undefined;
-      let snippet = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos).snippet : undefined;
+      let snippet = !opts.full ? extractSnippet(row.body, query, { maxLen: 300, chunkPos: row.chunkPos, intent: opts.intent }).snippet : undefined;
       if (opts.lineNumbers) {
         if (body) body = addLineNumbers(body);
         if (snippet) snippet = addLineNumbers(snippet);
@@ -1826,7 +1832,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
     for (let i = 0; i < filtered.length; i++) {
       const row = filtered[i];
       if (!row) continue;
-      const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
+      const { line, snippet } = extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent });
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
 
       // Line 1: filepath with docid
@@ -1867,7 +1873,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
       if (!row) continue;
       const heading = row.title || row.displayPath;
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
-      let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
+      let content = opts.full ? row.body : extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent }).snippet;
       if (opts.lineNumbers) {
         content = addLineNumbers(content);
       }
@@ -1880,7 +1886,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
       const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
       const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
       const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
-      let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos).snippet;
+      let content = opts.full ? row.body : extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent }).snippet;
       if (opts.lineNumbers) {
         content = addLineNumbers(content);
       }
@@ -1890,7 +1896,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
     // CSV format
     console.log("docid,score,file,title,context,line,snippet");
     for (const row of filtered) {
-      const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos);
+      const { line, snippet } = extractSnippet(row.body, query, { maxLen: 500, chunkPos: row.chunkPos, intent: opts.intent });
       let content = opts.full ? row.body : snippet;
       if (opts.lineNumbers) {
         content = addLineNumbers(content, line);
@@ -1994,6 +2000,7 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
       collection: singleCollection,
       limit: opts.all ? 500 : (opts.limit || 10),
       minScore: opts.minScore || 0.3,
+      intent: opts.intent,
       hooks: {
         onExpand: (original, expanded) => {
           logExpansionTree(original, expanded);
@@ -2038,11 +2045,22 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
 
   checkIndexHealth(store.db);
 
+  // Build structured query when caller provides expansion flags
+  const sq: string | StructuredQuery = (opts.keywords || opts.concepts || opts.passage)
+    ? {
+        text: query,
+        ...(opts.keywords && { keywords: opts.keywords.split(",").map(s => s.trim()).filter(Boolean) }),
+        ...(opts.concepts && { concepts: opts.concepts.split(",").map(s => s.trim()).filter(Boolean) }),
+        ...(opts.passage && { passage: opts.passage }),
+      }
+    : query;
+
   await withLLMSession(async () => {
-    let results = await hybridQuery(store, query, {
+    let results = await hybridQuery(store, sq, {
       collection: singleCollection,
       limit: opts.all ? 500 : (opts.limit || 10),
       minScore: opts.minScore || 0,
+      intent: opts.intent,
       hooks: {
         onStrongSignal: (score) => {
           process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@@ -2121,6 +2139,12 @@ function parseCLI() {
       // Collection options
       name: { type: "string" },  // collection name
       mask: { type: "string" },  // glob pattern
+      // Intent option (for query, vsearch, search)
+      intent: { type: "string" },
+      // Structured query options (for query/deep-search — bypasses LLM expansion)
+      keywords: { type: "string" },    // comma-separated keyword variants
+      concepts: { type: "string" },    // comma-separated semantic phrases
+      passage: { type: "string" },     // hypothetical document passage
       // Embed options
       force: { type: "boolean", short: "f" },
       // Update options
@@ -2168,6 +2192,10 @@ function parseCLI() {
     all: isAll,
     collection: values.collection as string[] | undefined,
     lineNumbers: !!values["line-numbers"],
+    intent: values.intent as string | undefined,
+    keywords: values.keywords as string | undefined,
+    concepts: values.concepts as string | undefined,
+    passage: values.passage as string | undefined,
   };
 
   return {
diff --git a/src/store.ts b/src/store.ts
index b68f8c0b..1bdd48a6 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -239,6 +239,37 @@ export type ExpandedQuery = {
   text: string;
 };
 
+// =============================================================================
+// Structured query — caller-provided expansions that bypass LLM expansion
+// =============================================================================
+
+/** Structured query with optional caller-provided search expansions.
+ *  When any expansion field is present, LLM expansion is skipped entirely. */
+export interface StructuredQuery {
+  text: string;              // the query itself (always required)
+  keywords?: string[];       // → FTS (BM25) — replaces lex expansion
+  concepts?: string[];       // → embedding + vector search — replaces vec expansion
+  passage?: string;          // → embedding + vector search — replaces hyde expansion
+}
+
+/** Normalize query input: string becomes { text } object.
+ *  Strips empty expansion fields so hasCallerExpansions correctly
+ *  detects "no expansions provided" even when fields are present but empty. */
+export function normalizeQuery(query: string | StructuredQuery): StructuredQuery {
+  if (typeof query === 'string') return { text: query };
+  return {
+    text: query.text,
+    ...(query.keywords?.length && { keywords: query.keywords }),
+    ...(query.concepts?.length && { concepts: query.concepts }),
+    ...(query.passage && { passage: query.passage }),
+  };
+}
+
+/** True when the caller provided any expansion field (keywords, concepts, or passage). */
+export function hasCallerExpansions(q: StructuredQuery): boolean {
+  return !!((q.keywords && q.keywords.length > 0) || (q.concepts && q.concepts.length > 0) || q.passage);
+}
+
 // =============================================================================
 // Path utilities
 // =============================================================================
@@ -809,8 +840,8 @@ export type Store = {
   searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => Promise<SearchResult[]>;
 
   // Query expansion & reranking
-  expandQuery: (query: string, model?: string) => Promise<ExpandedQuery[]>;
-  rerank: (query: string, documents: { file: string; text: string }[], model?: string) => Promise<{ file: string; score: number }[]>;
+  expandQuery: (query: string, intent?: string, model?: string) => Promise<ExpandedQuery[]>;
+  rerank: (query: string, documents: { file: string; text: string }[], intent?: string, model?: string) => Promise<{ file: string; score: number }[]>;
 
   // Document retrieval
   findDocument: (filename: string, options?: { includeBody?: boolean }) => DocumentResult | DocumentNotFound;
@@ -892,8 +923,8 @@ export function createStore(dbPath?: string): Store {
     searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
 
     // Query expansion & reranking
-    expandQuery: (query: string, model?: string) => expandQuery(query, model, db),
-    rerank: (query: string, documents: { file: string; text: string }[], model?: string) => rerank(query, documents, model, db),
+    expandQuery: (query: string, intent?: string, model?: string) => expandQuery(query, intent, model, db),
+    rerank: (query: string, documents: { file: string; text: string }[], intent?: string, model?: string) => rerank(query, documents, intent, model, db),
 
     // Document retrieval
     findDocument: (filename: string, options?: { includeBody?: boolean }) => findDocument(db, filename, options),
@@ -2201,10 +2232,11 @@ export function insertEmbedding(
 // Query expansion
 // =============================================================================
 
-export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database): Promise<ExpandedQuery[]> {
+export async function expandQuery(query: string, intent?: string, model: string = DEFAULT_QUERY_MODEL, db?: Database): Promise<ExpandedQuery[]> {
   // Check cache first — stored as JSON preserving types
-  const cacheKey = getCacheKey("expandQuery", { query, model });
-  const cached = getCachedResult(db, cacheKey);
+  // Intent is part of the cache key so different intents produce different expansions
+  const cacheKey = getCacheKey("expandQuery", { query, intent: intent || "", model });
+  const cached = db ? getCachedResult(db, cacheKey) : null;
   if (cached) {
     try {
       return JSON.parse(cached) as ExpandedQuery[];
@@ -2215,7 +2247,7 @@ export async function expandQuery(query: string, model: string = DEFAULT_QUERY_M
 
   const llm = getDefaultLlamaCpp();
   // Note: LlamaCpp uses hardcoded model, model parameter is ignored
-  const results = await llm.expandQuery(query);
+  const results = await llm.expandQuery(query, { intent });
 
   // Map Queryable[] → ExpandedQuery[] (same shape, decoupled from llm.ts internals).
   // Filter out entries that duplicate the original query text.
@@ -2223,7 +2255,7 @@ export async function expandQuery(query: string, model: string = DEFAULT_QUERY_M
     .filter(r => r.text !== query)
     .map(r => ({ type: r.type, text: r.text }));
 
-  if (expanded.length > 0) {
+  if (expanded.length > 0 && db) {
     setCachedResult(db, cacheKey, JSON.stringify(expanded));
   }
 
@@ -2234,16 +2266,23 @@ export async function expandQuery(query: string, model: string = DEFAULT_QUERY_M
 // Reranking
 // =============================================================================
 
-export async function rerank(query: string, documents: { file: string; text: string }[], model: string = DEFAULT_RERANK_MODEL, db: Database): Promise<{ file: string; score: number }[]> {
+export async function rerank(query: string, documents: { file: string; text: string }[], intent?: string, model: string = DEFAULT_RERANK_MODEL, db?: Database): Promise<{ file: string; score: number }[]> {
+  // When intent is provided, prepend it to the query for the reranker.
+  // Qwen3-Reranker is instruction-aware and was trained with <Instruct> prefixes.
+  // Prepending intent to the query string is the simplest injection strategy
+  // that doesn't require changes to node-llama-cpp's rankAll() API.
+  const rerankQuery = intent ? `${intent}\n\n${query}` : query;
+
   const cachedResults: Map<string, number> = new Map();
   const uncachedDocs: RerankDocument[] = [];
 
   // Check cache for each document
   // Cache key includes chunk text — different queries can select different chunks
   // from the same file, and the reranker score depends on which chunk was sent.
+  // Intent is part of cache key so different intents produce different scores.
   for (const doc of documents) {
-    const cacheKey = getCacheKey("rerank", { query, file: doc.file, model, chunk: doc.text });
-    const cached = getCachedResult(db, cacheKey);
+    const cacheKey = getCacheKey("rerank", { query: rerankQuery, file: doc.file, model, chunk: doc.text });
+    const cached = db ? getCachedResult(db, cacheKey) : null;
     if (cached !== null) {
       cachedResults.set(doc.file, parseFloat(cached));
     } else {
@@ -2254,13 +2293,13 @@ export async function rerank(query: string, documents: { file: string; text: str
   // Rerank uncached documents using LlamaCpp
   if (uncachedDocs.length > 0) {
     const llm = getDefaultLlamaCpp();
-    const rerankResult = await llm.rerank(query, uncachedDocs, { model });
+    const rerankResult = await llm.rerank(rerankQuery, uncachedDocs, { model });
 
     // Cache results — use original doc.text for cache key (result.file lacks chunk text)
     const textByFile = new Map(documents.map(d => [d.file, d.text]));
     for (const result of rerankResult.results) {
-      const cacheKey = getCacheKey("rerank", { query, file: result.file, model, chunk: textByFile.get(result.file) || "" });
-      setCachedResult(db, cacheKey, result.score.toString());
+      const cacheKey = getCacheKey("rerank", { query: rerankQuery, file: result.file, model, chunk: textByFile.get(result.file) || "" });
+      if (db) setCachedResult(db, cacheKey, result.score.toString());
       cachedResults.set(result.file, result.score);
     }
   }
@@ -2271,6 +2310,15 @@ export async function rerank(query: string, documents: { file: string; text: str
     .sort((a, b) => b.score - a.score);
 }
 
+/** Convert SearchResult[] → RankedResult[] and populate docid map as side effect. */
+function toRankedList(results: SearchResult[], docidMap: Map<string, string>): RankedResult[] {
+  for (const r of results) docidMap.set(r.filepath, r.docid);
+  return results.map(r => ({
+    file: r.filepath, displayPath: r.displayPath,
+    title: r.title, body: r.body || "", score: r.score,
+  }));
+}
+
 // =============================================================================
 // Reciprocal Rank Fusion
 // =============================================================================
@@ -2669,7 +2717,48 @@ export type SnippetResult = {
   snippetLines: number;   // Number of lines in snippet
 };
 
-export function extractSnippet(body: string, query: string, maxLen = 500, chunkPos?: number, chunkLen?: number): SnippetResult {
+/** Intent term weight for chunk selection. Initial value — not tuned. */
+export const INTENT_WEIGHT_CHUNK = 0.5;
+/** Intent term weight for snippet line scoring. Initial value — not tuned. */
+export const INTENT_WEIGHT_SNIPPET = 0.3;
+
+// Common stop words filtered from intent strings before tokenization.
+// Seeded from finetune/reward.py KEY_TERM_STOPWORDS, extended with common
+// 2-3 char function words so the length threshold can drop to >1 and let
+// short domain terms (API, SQL, LLM, CPU, CDN, …) survive.
+const INTENT_STOP_WORDS = new Set([
+  // 2-char function words
+  "am", "an", "as", "at", "be", "by", "do", "he", "if",
+  "in", "is", "it", "me", "my", "no", "of", "on", "or", "so",
+  "to", "up", "us", "we",
+  // 3-char function words
+  "all", "and", "any", "are", "but", "can", "did", "for", "get",
+  "has", "her", "him", "his", "how", "its", "let", "may", "not",
+  "our", "out", "the", "too", "was", "who", "why", "you",
+  // 4+ char common words
+  "also", "does", "find", "from", "have", "into", "more", "need",
+  "show", "some", "tell", "that", "them", "this", "want", "what",
+  "when", "will", "with", "your",
+  // Search-context noise
+  "about", "looking", "notes", "search", "where", "which",
+]);
+
+/** Extract intent terms: lowercase, trim surrounding punctuation, >1 char, stop words removed. */
+export function extractIntentTerms(intent: string): string[] {
+  return intent.toLowerCase().split(/\s+/)
+    .map(t => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
+    .filter(t => t.length > 1 && !INTENT_STOP_WORDS.has(t));
+}
+
+export interface ExtractSnippetOptions {
+  maxLen?: number;
+  chunkPos?: number;
+  chunkLen?: number;
+  intent?: string;
+}
+
+export function extractSnippet(body: string, query: string, options?: ExtractSnippetOptions): SnippetResult {
+  const { maxLen = 500, chunkPos, chunkLen, intent } = options || {};
   const totalLines = body.split('\n').length;
   let searchBody = body;
   let lineOffset = 0;
@@ -2688,6 +2777,9 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
 
   const lines = searchBody.split('\n');
   const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0);
+  // Intent terms at lower weight — nudge snippet toward intent-relevant
+  // lines without overriding query-term anchoring.
+  const intentTerms = intent ? extractIntentTerms(intent) : [];
   let bestLine = 0, bestScore = -1;
 
   for (let i = 0; i < lines.length; i++) {
@@ -2696,6 +2788,9 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
     for (const term of queryTerms) {
       if (lineLower.includes(term)) score++;
     }
+    for (const term of intentTerms) {
+      if (lineLower.includes(term)) score += INTENT_WEIGHT_SNIPPET;
+    }
     if (score > bestScore) {
       bestScore = score;
       bestLine = i;
@@ -2710,7 +2805,7 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
   // If we focused on a chunk window and it produced an empty/whitespace-only snippet,
   // fall back to a full-document snippet so we always show something useful.
   if (chunkPos && chunkPos > 0 && snippetText.trim().length === 0) {
-    return extractSnippet(body, query, maxLen, undefined);
+    return extractSnippet(body, query, { maxLen, intent });
   }
 
   if (snippetText.length > maxLen) snippetText = snippetText.substring(0, maxLen - 3) + "...";
@@ -2776,6 +2871,10 @@ export interface HybridQueryOptions {
   minScore?: number;        // default 0
   candidateLimit?: number;  // default RERANK_CANDIDATE_LIMIT
   hooks?: SearchHooks;
+  /** Optional background context behind the search intent. Used to disambiguate
+   *  the query during query expansion and reranking. Example: query="decision making",
+   *  intent="writing about how engineering teams make architectural decisions". */
+  intent?: string;
 }
 
 export interface HybridQueryResult {
@@ -2805,7 +2904,7 @@ export interface HybridQueryResult {
  */
 export async function hybridQuery(
   store: Store,
-  query: string,
+  query: string | StructuredQuery,
   options?: HybridQueryOptions
 ): Promise<HybridQueryResult[]> {
   const limit = options?.limit ?? 10;
@@ -2813,6 +2912,11 @@ export async function hybridQuery(
   const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
   const collection = options?.collection;
   const hooks = options?.hooks;
+  const intent = options?.intent;
+
+  // Normalize: string → { text }, object passes through
+  const sq = normalizeQuery(query);
+  const callerExpansions = hasCallerExpansions(sq);
 
   const rankedLists: RankedResult[][] = [];
   const docidMap = new Map<string, string>(); // filepath -> docid
@@ -2820,90 +2924,91 @@ export async function hybridQuery(
     `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
   ).get();
 
-  // Step 1: BM25 probe — strong signal skips expensive LLM expansion
-  // Pass collection directly into FTS query (filter at SQL level, not post-hoc)
-  const initialFts = store.searchFTS(query, 20, collection);
+  // Step 1: BM25 probe — strong signal skips expensive LLM expansion.
+  // Disabled when intent or caller expansions are present — the caller
+  // is explicitly saying "I know what I want, don't take shortcuts."
+  const initialFts = store.searchFTS(sq.text, 20, collection);
   const topScore = initialFts[0]?.score ?? 0;
   const secondScore = initialFts[1]?.score ?? 0;
-  const hasStrongSignal = initialFts.length > 0
+  const hasStrongSignal = !intent && !callerExpansions
+    && initialFts.length > 0
     && topScore >= STRONG_SIGNAL_MIN_SCORE
     && (topScore - secondScore) >= STRONG_SIGNAL_MIN_GAP;
 
   if (hasStrongSignal) hooks?.onStrongSignal?.(topScore);
 
-  // Step 2: Expand query (or skip if strong signal)
-  const expanded = hasStrongSignal
-    ? []
-    : await store.expandQuery(query);
+  // Step 2: Build search variants — either from caller or from LLM expansion.
+  // When caller provides keywords/concepts/passage, skip LLM entirely.
+  const expanded: ExpandedQuery[] = callerExpansions
+    ? [] // Caller owns expansion — don't run LLM
+    : hasStrongSignal
+      ? []
+      : await store.expandQuery(sq.text, intent);
+
+  // Only fire onExpand when LLM expansion actually ran — caller expansions
+  // bypass the LLM and strong signal skips it, so there's nothing to observe.
+  if (!callerExpansions && expanded.length > 0) hooks?.onExpand?.(sq.text, expanded);
 
-  hooks?.onExpand?.(query, expanded);
+  // Track which ranked lists represent the original query (FTS + vec).
+  // These get 2x RRF weight because the original query is the strongest
+  // relevance signal — expansions are supplementary.
+  const primaryIndices = new Set<number>();
 
   // Seed with initial FTS results (avoid re-running original query FTS)
   if (initialFts.length > 0) {
-    for (const r of initialFts) docidMap.set(r.filepath, r.docid);
-    rankedLists.push(initialFts.map(r => ({
-      file: r.filepath, displayPath: r.displayPath,
-      title: r.title, body: r.body || "", score: r.score,
-    })));
-  }
-
-  // Step 3: Route searches by query type
-  //
-  // Strategy: run all FTS queries immediately (they're sync/instant), then
-  // batch-embed all vector queries in one embedBatch() call, then run
-  // sqlite-vec lookups with pre-computed embeddings.
-
-  // 3a: Run FTS for all lex expansions right away (no LLM needed)
-  for (const q of expanded) {
-    if (q.type === 'lex') {
-      const ftsResults = store.searchFTS(q.text, 20, collection);
-      if (ftsResults.length > 0) {
-        for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
-        rankedLists.push(ftsResults.map(r => ({
-          file: r.filepath, displayPath: r.displayPath,
-          title: r.title, body: r.body || "", score: r.score,
-        })));
-      }
+    primaryIndices.add(rankedLists.length);
+    rankedLists.push(toRankedList(initialFts, docidMap));
+  }
+
+  // Step 3: Route searches — FTS for keyword terms, vector for semantic queries.
+  // Source differs by path: caller-provided fields OR LLM expansion variants.
+  // Both paths produce ranked lists that feed into the same RRF fusion.
+
+  // 3a: FTS for keyword variants (caller keywords or LLM lex expansions)
+  const ftsTerms: string[] = callerExpansions
+    ? (sq.keywords || [])
+    : expanded.filter(q => q.type === 'lex').map(q => q.text);
+
+  for (const term of ftsTerms) {
+    const ftsResults = store.searchFTS(term, 20, collection);
+    if (ftsResults.length > 0) {
+      rankedLists.push(toRankedList(ftsResults, docidMap));
     }
   }
 
-  // 3b: Collect all texts that need vector search (original query + vec/hyde expansions)
+  // 3b: Vector search — original query (primary, 2x weight) + expansion variants
   if (hasVectors) {
-    const vecQueries: { text: string; isOriginal: boolean }[] = [
-      { text: query, isOriginal: true },
-    ];
-    for (const q of expanded) {
-      if (q.type === 'vec' || q.type === 'hyde') {
-        vecQueries.push({ text: q.text, isOriginal: false });
+    const vecTexts: string[] = [sq.text]; // index 0 = original query (primary)
+    if (callerExpansions) {
+      for (const c of sq.concepts || []) vecTexts.push(c);
+      if (sq.passage) vecTexts.push(sq.passage);
+    } else {
+      for (const q of expanded) {
+        if (q.type === 'vec' || q.type === 'hyde') vecTexts.push(q.text);
       }
     }
 
-    // Batch embed all vector queries in a single call
     const llm = getDefaultLlamaCpp();
-    const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text));
-    const embeddings = await llm.embedBatch(textsToEmbed);
+    const embeddings = await llm.embedBatch(vecTexts.map(formatQueryForEmbedding));
 
-    // Run sqlite-vec lookups with pre-computed embeddings
-    for (let i = 0; i < vecQueries.length; i++) {
+    for (let i = 0; i < vecTexts.length; i++) {
       const embedding = embeddings[i]?.embedding;
       if (!embedding) continue;
 
       const vecResults = await store.searchVec(
-        vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection,
+        vecTexts[i]!, DEFAULT_EMBED_MODEL, 20, collection,
         undefined, embedding
       );
       if (vecResults.length > 0) {
-        for (const r of vecResults) docidMap.set(r.filepath, r.docid);
-        rankedLists.push(vecResults.map(r => ({
-          file: r.filepath, displayPath: r.displayPath,
-          title: r.title, body: r.body || "", score: r.score,
-        })));
+        if (i === 0) primaryIndices.add(rankedLists.length);
+        rankedLists.push(toRankedList(vecResults, docidMap));
       }
     }
   }
 
-  // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
-  const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
+  // Step 4: RRF fusion — original query lists (FTS + vec) get 2x weight,
+  // expansion variants (keywords, concepts, passage, lex, vec, hyde) get 1x.
+  const weights = rankedLists.map((_, i) => primaryIndices.has(i) ? 2.0 : 1.0);
   const fused = reciprocalRankFusion(rankedLists, weights);
   const candidates = fused.slice(0, candidateLimit);
 
@@ -2911,7 +3016,10 @@ export async function hybridQuery(
 
   // Step 5: Chunk documents, pick best chunk per doc for reranking.
   // Reranking full bodies is O(tokens) — the critical perf lesson that motivated this refactor.
-  const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
+  const queryTerms = sq.text.toLowerCase().split(/\s+/).filter(t => t.length > 2);
+  // Intent terms steer chunk selection toward intent-relevant sections
+  // without overwhelming the query signal.
+  const intentTerms = intent ? extractIntentTerms(intent) : [];
   const chunksToRerank: { file: string; text: string }[] = [];
   const docChunkMap = new Map<string, { chunks: { text: string; pos: number }[]; bestIdx: number }>();
 
@@ -2924,8 +3032,9 @@ export async function hybridQuery(
     let bestScore = -1;
     for (let i = 0; i < chunks.length; i++) {
       const chunkLower = chunks[i]!.text.toLowerCase();
-      const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
-      if (score > bestScore) { bestScore = score; bestIdx = i; }
+      const qScore = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0);
+      const iScore = intentTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? INTENT_WEIGHT_CHUNK : 0), 0);
+      if (qScore + iScore > bestScore) { bestScore = qScore + iScore; bestIdx = i; }
     }
 
     chunksToRerank.push({ file: cand.file, text: chunks[bestIdx]!.text });
@@ -2934,7 +3043,7 @@ export async function hybridQuery(
 
   // Step 6: Rerank chunks (NOT full bodies)
   hooks?.onRerankStart?.(chunksToRerank.length);
-  const reranked = await store.rerank(query, chunksToRerank);
+  const reranked = await store.rerank(sq.text, chunksToRerank, intent);
   hooks?.onRerankDone?.();
 
   // Step 7: Blend RRF position score with reranker score
@@ -2989,6 +3098,8 @@ export interface VectorSearchOptions {
   limit?: number;           // default 10
   minScore?: number;        // default 0.3
   hooks?: Pick<SearchHooks, 'onExpand'>;
+  /** Optional background context behind the search intent. Passed to query expansion. */
+  intent?: string;
 }
 
 export interface VectorSearchResult {
@@ -3025,7 +3136,7 @@ export async function vectorSearchQuery(
   if (!hasVectors) return [];
 
   // Expand query — filter to vec/hyde only (lex queries target FTS, not vector)
-  const allExpanded = await store.expandQuery(query);
+  const allExpanded = await store.expandQuery(query, options?.intent);
   const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
   options?.hooks?.onExpand?.(query, vecExpanded);
 
diff --git a/test/intent.test.ts b/test/intent.test.ts
new file mode 100644
index 00000000..98ca1a19
--- /dev/null
+++ b/test/intent.test.ts
@@ -0,0 +1,190 @@
+/**
+ * Intent Parameter Unit Tests
+ *
+ * Tests the intent-aware pipeline logic:
+ * - extractSnippet with intent-derived terms
+ * - chunk selection scoring with intent
+ * - strong-signal bypass when intent is present
+ *
+ * These are pure logic tests — no LLM or database required.
+ */
+
+import { describe, test, expect } from "vitest";
+import { extractSnippet, extractIntentTerms, INTENT_WEIGHT_CHUNK } from "../src/store";
+
+// =============================================================================
+// extractSnippet with intent
+// =============================================================================
+
+describe("extractSnippet with intent", () => {
+  // Each section contains "performance" so the query score is tied (1.0 each).
+  // Intent terms (INTENT_WEIGHT_SNIPPET) then break the tie toward the relevant section.
+  const body = [
+    "# Notes on Various Topics",
+    "",
+    "## Web Performance Section",
+    "Web performance means optimizing page load times and Core Web Vitals.",
+    "Reduce latency, improve rendering speed, and measure performance budgets.",
+    "",
+    "## Team Performance Section",
+    "Team performance depends on trust, psychological safety, and feedback.",
+    "Build culture where performance reviews drive growth not fear.",
+    "",
+    "## Health Performance Section",
+    "Health performance comes from consistent exercise, sleep, and endurance.",
+    "Track fitness metrics, optimize recovery, and monitor healthspan.",
+  ].join("\n");
+
+  test("without intent, anchors on query terms only", () => {
+    const result = extractSnippet(body, "performance", { maxLen: 500 });
+    // "performance" appears in title and multiple sections — should anchor on first match
+    expect(result.snippet).toContain("Performance");
+  });
+
+  test("with web-perf intent, prefers web performance section", () => {
+    const result = extractSnippet(body, "performance", { maxLen: 500,
+      intent: "Looking for notes about web performance, latency, and page load times" });
+    expect(result.snippet).toMatch(/latency|page.*load|Core Web Vitals/i);
+  });
+
+  test("with health intent, prefers health section", () => {
+    const result = extractSnippet(body, "performance", { maxLen: 500,
+      intent: "Looking for notes about personal health, fitness, and endurance" });
+    expect(result.snippet).toMatch(/health|fitness|endurance|exercise/i);
+  });
+
+  test("with team intent, prefers team section", () => {
+    const result = extractSnippet(body, "performance", { maxLen: 500,
+      intent: "Looking for notes about building high-performing teams and culture" });
+    expect(result.snippet).toMatch(/team|culture|trust|feedback/i);
+  });
+
+  test("intent does not override strong query match", () => {
+    // Query "Core Web Vitals" is very specific — intent shouldn't pull away from it
+    const result = extractSnippet(body, "Core Web Vitals", { maxLen: 500,
+      intent: "Looking for notes about health and fitness" });
+    expect(result.snippet).toContain("Core Web Vitals");
+  });
+
+  test("absent intent produces same result as undefined", () => {
+    const withoutIntent = extractSnippet(body, "performance", { maxLen: 500 });
+    const withUndefined = extractSnippet(body, "performance", { maxLen: 500, intent: undefined });
+    expect(withoutIntent.line).toBe(withUndefined.line);
+    expect(withoutIntent.snippet).toBe(withUndefined.snippet);
+  });
+});
+
+// =============================================================================
+// Intent keyword extraction (used in chunk selection)
+// =============================================================================
+
+describe("intent keyword extraction logic", () => {
+  // Mirrors the chunk selection scoring in hybridQuery, using the shared
+  // extractIntentTerms helper and INTENT_WEIGHT_CHUNK constant.
+  function scoreChunk(text: string, query: string, intent?: string): number {
+    const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
+    const intentTerms = intent ? extractIntentTerms(intent) : [];
+    const lower = text.toLowerCase();
+    const qScore = queryTerms.reduce((acc, term) => acc + (lower.includes(term) ? 1 : 0), 0);
+    const iScore = intentTerms.reduce((acc, term) => acc + (lower.includes(term) ? INTENT_WEIGHT_CHUNK : 0), 0);
+    return qScore + iScore;
+  }
+
+  const chunks = [
+    "Web performance: optimize page load times, reduce latency, improve rendering pipeline.",
+    "Team performance: build trust, give feedback, set clear expectations for the group.",
+    "Health performance: exercise regularly, sleep 8 hours, manage stress for endurance.",
+  ];
+
+  test("without intent, all chunks score equally on 'performance'", () => {
+    const scores = chunks.map(c => scoreChunk(c, "performance"));
+    // All contain "performance", so all score 1
+    expect(scores[0]).toBe(scores[1]);
+    expect(scores[1]).toBe(scores[2]);
+  });
+
+  test("with web intent, web chunk scores highest", () => {
+    const intent = "looking for notes about page load times and latency optimization";
+    const scores = chunks.map(c => scoreChunk(c, "performance", intent));
+    expect(scores[0]).toBeGreaterThan(scores[1]!);
+    expect(scores[0]).toBeGreaterThan(scores[2]!);
+  });
+
+  test("with health intent, health chunk scores highest", () => {
+    const intent = "looking for notes about exercise, sleep, and endurance";
+    const scores = chunks.map(c => scoreChunk(c, "performance", intent));
+    expect(scores[2]).toBeGreaterThan(scores[0]!);
+    expect(scores[2]).toBeGreaterThan(scores[1]!);
+  });
+
+  test("intent terms have lower weight than query terms (1.0)", () => {
+    const intent = "looking for latency";
+    // Chunk 0 has "performance" (query: 1.0) + "latency" (intent: INTENT_WEIGHT_CHUNK) = 1.5
+    const withBoth = scoreChunk(chunks[0]!, "performance", intent);
+    const queryOnly = scoreChunk(chunks[0]!, "performance");
+    expect(withBoth).toBe(queryOnly + INTENT_WEIGHT_CHUNK);
+  });
+
+  test("stop words are filtered, short domain terms survive", () => {
+    const intent = "the art of web performance";
+    // "the" (stop word), "art" (survives), "of" (stop word),
+    // "web" (survives), "performance" (survives)
+    // Chunk 0 contains "Web" + "performance" → 2 intent hits
+    // Chunks 1,2 contain only "performance" → 1 intent hit
+    const scores = chunks.map(c => scoreChunk(c, "test", intent));
+    expect(scores[0]).toBe(INTENT_WEIGHT_CHUNK * 2); // "web" + "performance"
+    expect(scores[1]).toBe(INTENT_WEIGHT_CHUNK);      // "performance" only
+    expect(scores[2]).toBe(INTENT_WEIGHT_CHUNK);      // "performance" only
+  });
+
+  test("extractIntentTerms filters stop words and punctuation", () => {
+    // "looking", "for", "notes", "about" are stop words
+    expect(extractIntentTerms("looking for notes about latency optimization"))
+      .toEqual(["latency", "optimization"]);
+    // "what", "is", "the", "to", "find" are stop words; "best", "way" survive
+    expect(extractIntentTerms("what is the best way to find"))
+      .toEqual(["best", "way"]);
+    // Short domain terms survive (>1 char, not stop words)
+    expect(extractIntentTerms("web performance latency page load times"))
+      .toEqual(["web", "performance", "latency", "page", "load", "times"]);
+    // Acronyms survive — the whole point of >1 vs >3
+    expect(extractIntentTerms("API design for LLM agents"))
+      .toEqual(["api", "design", "llm", "agents"]);
+    // Surrounding punctuation stripped, internal hyphens preserved
+    expect(extractIntentTerms("personal health, fitness, and endurance"))
+      .toEqual(["personal", "health", "fitness", "endurance"]);
+    expect(extractIntentTerms("self-hosted real-time (decision-making)"))
+      .toEqual(["self-hosted", "real-time", "decision-making"]);
+  });
+});
+
+// =============================================================================
+// Strong-signal bypass with intent
+// =============================================================================
+
+describe("strong-signal bypass logic", () => {
+  // Mirrors the logic in hybridQuery:
+  // const hasStrongSignal = !intent && topScore >= 0.85 && gap >= 0.15
+  function hasStrongSignal(topScore: number, secondScore: number, intent?: string): boolean {
+    return !intent
+      && topScore >= 0.85
+      && (topScore - secondScore) >= 0.15;
+  }
+
+  test("strong signal detected without intent", () => {
+    expect(hasStrongSignal(0.90, 0.70)).toBe(true);
+  });
+
+  test("strong signal bypassed when intent provided", () => {
+    expect(hasStrongSignal(0.90, 0.70, "looking for health performance")).toBe(false);
+  });
+
+  test("weak signal not affected by intent", () => {
+    expect(hasStrongSignal(0.50, 0.45)).toBe(false);
+    expect(hasStrongSignal(0.50, 0.45, "some intent")).toBe(false);
+  });
+
+  test("close scores not strong even without intent", () => {
+    expect(hasStrongSignal(0.90, 0.80)).toBe(false); // gap < 0.15
+  });
+});
diff --git a/test/mcp.test.ts b/test/mcp.test.ts
index 881874d0..4417b300 100644
--- a/test/mcp.test.ts
+++ b/test/mcp.test.ts
@@ -279,7 +279,7 @@ describe("MCP Server", () => {
         title: r.title,
         score: Math.round(r.score * 100) / 100,
         context: getContextForFile(testDb, r.filepath),
-        snippet: extractSnippet(r.body || "", "api", 300, r.chunkPos).snippet,
+        snippet: extractSnippet(r.body || "", "api", { maxLen: 300, chunkPos: r.chunkPos }).snippet,
       }));
       // MCP now returns structuredContent with results array
       expect(filtered.length).toBeGreaterThan(0);
@@ -768,7 +768,7 @@ describe("MCP Server", () => {
 
     test("extracts snippet around matching text", () => {
       const body = "Line 1\nLine 2\nThis is the important line with the keyword\nLine 4\nLine 5";
-      const { line, snippet } = extractSnippet(body, "keyword", 200);
+      const { line, snippet } = extractSnippet(body, "keyword", { maxLen: 200 });
       expect(snippet).toContain("keyword");
       expect(line).toBe(3);
     });
@@ -776,7 +776,7 @@ describe("MCP Server", () => {
     test("handles snippet extraction with chunkPos", () => {
       const body = "A".repeat(1000) + "KEYWORD" + "B".repeat(1000);
       const chunkPos = 1000; // Position of KEYWORD
-      const { snippet } = extractSnippet(body, "keyword", 200, chunkPos);
+      const { snippet } = extractSnippet(body, "keyword", { maxLen: 200, chunkPos });
       expect(snippet).toContain("KEYWORD");
     });
   });
@@ -802,7 +802,7 @@ describe("MCP Server", () => {
         title: r.title,
         score: Math.round(r.score * 100) / 100,
         context: getContextForFile(testDb, r.filepath),
-        snippet: extractSnippet(r.body || "", "readme", 300, r.chunkPos).snippet,
+        snippet: extractSnippet(r.body || "", "readme", { maxLen: 300, chunkPos: r.chunkPos }).snippet,
       }));
 
       expect(structured.length).toBeGreaterThan(0);
diff --git a/test/store.test.ts b/test/store.test.ts
index 9c384770..4d34f3df 100644
--- a/test/store.test.ts
+++ b/test/store.test.ts
@@ -1746,7 +1746,7 @@ describe("Document Retrieval", () => {
 describe("Snippet Extraction", () => {
   test("extractSnippet finds query terms", () => {
     const body = "First line.\nSecond line with keyword.\nThird line.\nFourth line.";
-    const { line, snippet } = extractSnippet(body, "keyword", 500);
+    const { line, snippet } = extractSnippet(body, "keyword", { maxLen: 500 });
 
     expect(line).toBe(2); // Line 2 contains "keyword"
     expect(snippet).toContain("keyword");
@@ -1754,7 +1754,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet includes context lines", () => {
     const body = "Line 1\nLine 2\nLine 3 has keyword\nLine 4\nLine 5";
-    const { snippet } = extractSnippet(body, "keyword", 500);
+    const { snippet } = extractSnippet(body, "keyword", { maxLen: 500 });
 
     expect(snippet).toContain("Line 2"); // Context before
     expect(snippet).toContain("Line 3 has keyword");
@@ -1763,7 +1763,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet respects maxLen for content", () => {
     const body = "A".repeat(1000);
-    const result = extractSnippet(body, "query", 100);
+    const result = extractSnippet(body, "query", { maxLen: 100 });
 
     // Snippet includes header + content, content should be truncated
     expect(result.snippet).toContain("@@"); // Has diff header
@@ -1774,13 +1774,13 @@ describe("Snippet Extraction", () => {
     const body = "First section...\n".repeat(50) + "Target keyword here\n" + "More content...".repeat(50);
     const chunkPos = body.indexOf("Target keyword");
 
-    const { snippet } = extractSnippet(body, "Target", 200, chunkPos);
+    const { snippet } = extractSnippet(body, "Target", { maxLen: 200, chunkPos });
     expect(snippet).toContain("Target keyword");
   });
 
   test("extractSnippet returns beginning when no match", () => {
     const body = "First line\nSecond line\nThird line";
-    const { line, snippet } = extractSnippet(body, "nonexistent", 500);
+    const { line, snippet } = extractSnippet(body, "nonexistent", { maxLen: 500 });
 
     expect(line).toBe(1);
     expect(snippet).toContain("First line");
@@ -1788,7 +1788,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet includes diff-style header", () => {
     const body = "Line 1\nLine 2\nLine 3 has keyword\nLine 4\nLine 5";
-    const { snippet, linesBefore, linesAfter, snippetLines } = extractSnippet(body, "keyword", 500);
+    const { snippet, linesBefore, linesAfter, snippetLines } = extractSnippet(body, "keyword", { maxLen: 500 });
 
     // Header should show line position and context info
     expect(snippet).toMatch(/^@@ -\d+,\d+ @@ \(\d+ before, \d+ after\)/);
@@ -1799,7 +1799,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet calculates linesBefore and linesAfter correctly", () => {
     const body = "L1\nL2\nL3\nL4 match\nL5\nL6\nL7\nL8\nL9\nL10";
-    const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "match", 500);
+    const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "match", { maxLen: 500 });
 
     expect(line).toBe(4); // "L4 match" is line 4
     expect(linesBefore).toBe(2); // L1, L2 before snippet (snippet starts at L3)
@@ -1809,7 +1809,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet header format matches diff style", () => {
     const body = "A\nB\nC keyword\nD\nE\nF\nG\nH";
-    const { snippet } = extractSnippet(body, "keyword", 500);
+    const { snippet } = extractSnippet(body, "keyword", { maxLen: 500 });
 
     // Should start with @@ -line,count @@ (N before, M after)
     const headerMatch = snippet.match(/^@@ -(\d+),(\d+) @@ \((\d+) before, (\d+) after\)/);
@@ -1824,7 +1824,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet at document start shows 0 before", () => {
     const body = "First line keyword\nSecond\nThird\nFourth\nFifth";
-    const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", 500);
+    const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", { maxLen: 500 });
 
     expect(line).toBe(1);         // Keyword on first line
     expect(linesBefore).toBe(0);  // Nothing before
@@ -1834,7 +1834,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet at document end shows 0 after", () => {
     const body = "First\nSecond\nThird\nFourth\nFifth keyword";
-    const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", 500);
+    const { linesBefore, linesAfter, snippetLines, line } = extractSnippet(body, "keyword", { maxLen: 500 });
 
     expect(line).toBe(5);         // Keyword on last line
     expect(linesBefore).toBe(3);  // First, Second, Third before snippet
@@ -1844,7 +1844,7 @@ describe("Snippet Extraction", () => {
 
   test("extractSnippet with single line document", () => {
     const body = "Single line with keyword";
-    const { linesBefore, linesAfter, snippetLines, snippet } = extractSnippet(body, "keyword", 500);
+    const { linesBefore, linesAfter, snippetLines, snippet } = extractSnippet(body, "keyword", { maxLen: 500 });
 
     expect(linesBefore).toBe(0);
     expect(linesAfter).toBe(0);
@@ -1859,7 +1859,7 @@ describe("Snippet Extraction", () => {
     const body = padding + "Target keyword here\nMore content\nEven more";
     const chunkPos = padding.length; // Position of "Target keyword"
 
-    const { line, linesBefore, linesAfter } = extractSnippet(body, "keyword", 200, chunkPos);
+    const { line, linesBefore, linesAfter } = extractSnippet(body, "keyword", { maxLen: 200, chunkPos });
 
     expect(line).toBe(51); // "Target keyword" is line 51
     expect(linesBefore).toBeGreaterThan(40); // Many lines before
diff --git a/test/structured-query.test.ts b/test/structured-query.test.ts
new file mode 100644
index 00000000..83786e76
--- /dev/null
+++ b/test/structured-query.test.ts
@@ -0,0 +1,430 @@
+/**
+ * Structured Query Tests
+ *
+ * Unit tests: normalizeQuery, hasCallerExpansions, type contracts.
+ * Integration tests: hybridQuery routing — verifies structured queries skip
+ * LLM expansion and route caller fields to the right search backends.
+ *
+ * Integration tests use a real SQLite store with FTS (no vector index) to
+ * avoid LLM/embedding dependencies while testing the routing logic.
+ */
+
+import { describe, test, expect, vi, beforeAll, afterAll } from "vitest";
+import { mkdtemp, writeFile, unlink } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import YAML from "yaml";
+import {
+  createStore,
+  normalizeQuery,
+  hasCallerExpansions,
+  hashContent,
+  hybridQuery,
+  type Store,
+  type StructuredQuery,
+} from "../src/store";
+import type { CollectionConfig } from "../src/collections";
+
+// =============================================================================
+// Test helpers — lightweight store setup for integration tests
+// =============================================================================
+
+let testDir: string;
+let savedConfigDir: string | undefined;
+
+beforeAll(async () => {
+  testDir = await mkdtemp(join(tmpdir(), "qmd-structured-test-"));
+  savedConfigDir = process.env.QMD_CONFIG_DIR;
+});
+
+afterAll(async () => {
+  // Restore env var — bun test runs all files in one process, so deleting
+  // QMD_CONFIG_DIR would clobber other test suites that set it.
+  if (savedConfigDir !== undefined) {
+    process.env.QMD_CONFIG_DIR = savedConfigDir;
+  } else {
+    delete process.env.QMD_CONFIG_DIR;
+  }
+  try {
+    const { rm } = await import("node:fs/promises");
+    await rm(testDir, { recursive: true, force: true });
+  } catch { /* ignore */ }
+});
+
+async function createTestStore(): Promise<Store> {
+  const dbPath = join(testDir, `test-${Date.now()}-${Math.random().toString(36).slice(2)}.sqlite`);
+  const configDir = await mkdtemp(join(testDir, "config-"));
+  process.env.QMD_CONFIG_DIR = configDir;
+
+  const config: CollectionConfig = { collections: {} };
+  await writeFile(join(configDir, "index.yml"), YAML.stringify(config));
+
+  return createStore(dbPath);
+}
+
+async function addCollection(store: Store, name: string): Promise<string> {
+  const configPath = join(process.env.QMD_CONFIG_DIR!, "index.yml");
+  const { readFile } = await import("node:fs/promises");
+  const config = YAML.parse(await readFile(configPath, "utf-8")) as CollectionConfig;
+  config.collections[name] = { path: `/test/${name}`, pattern: "**/*.md" };
+  await writeFile(configPath, YAML.stringify(config));
+  return name;
+}
+
+async function addDoc(
+  store: Store, collection: string,
+  title: string, body: string, path?: string,
+): Promise<void> {
+  const now = new Date().toISOString();
+  const hash = await hashContent(body);
+  const docPath = path || `${title.toLowerCase().replace(/\s+/g, "-")}.md`;
+
+  store.db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`).run(hash, body, now);
+  store.db.prepare(`INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, 1)`)
+    .run(collection, docPath, title, hash, now, now);
+}
+
+async function cleanup(store: Store): Promise<void> {
+  store.close();
+  try { await unlink(store.dbPath); } catch { /* ignore */ }
+}
+
+// =============================================================================
+// normalizeQuery
+// =============================================================================
+
+describe("normalizeQuery", () => {
+  test("string becomes { text } object", () => {
+    expect(normalizeQuery("performance")).toEqual({ text: "performance" });
+  });
+
+  test("structured query with populated fields passes through", () => {
+    const sq: StructuredQuery = {
+      text: "performance",
+      keywords: ["TTFB", "core web vitals"],
+      concepts: ["frontend rendering optimization"],
+      passage: "Reducing time-to-first-byte requires optimizing the critical rendering path.",
+    };
+    const result = normalizeQuery(sq);
+    expect(result).toEqual(sq);
+  });
+
+  test("minimal structured query (text only) passes through", () => {
+    expect(normalizeQuery({ text: "performance" })).toEqual({ text: "performance" });
+  });
+
+  test("strips empty keywords array", () => {
+    const result = normalizeQuery({ text: "test", keywords: [] });
+    expect(result).toEqual({ text: "test" });
+    expect(result.keywords).toBeUndefined();
+  });
+
+  test("strips empty concepts array", () => {
+    const result = normalizeQuery({ text: "test", concepts: [] });
+    expect(result).toEqual({ text: "test" });
+    expect(result.concepts).toBeUndefined();
+  });
+
+  test("strips empty passage string", () => {
+    const result = normalizeQuery({ text: "test", passage: "" });
+    expect(result).toEqual({ text: "test" });
+    expect(result.passage).toBeUndefined();
+  });
+
+  test("keeps non-empty fields, strips empty ones", () => {
+    const result = normalizeQuery({
+      text: "test",
+      keywords: ["TTFB"],
+      concepts: [],
+      passage: "",
+    });
+    expect(result).toEqual({ text: "test", keywords: ["TTFB"] });
+  });
+});
+
+// =============================================================================
+// hasCallerExpansions
+// =============================================================================
+
+describe("hasCallerExpansions", () => {
+  test("returns false for string-normalized query (text only)", () => {
+    expect(hasCallerExpansions({ text: "performance" })).toBe(false);
+  });
+
+  test("returns false for empty arrays", () => {
+    expect(hasCallerExpansions({ text: "performance", keywords: [], concepts: [] })).toBe(false);
+  });
+
+  test("returns false for undefined fields", () => {
+    expect(hasCallerExpansions({ text: "performance", keywords: undefined, concepts: undefined, passage: undefined })).toBe(false);
+  });
+
+  test("returns true when keywords present", () => {
+    expect(hasCallerExpansions({ text: "performance", keywords: ["TTFB"] })).toBe(true);
+  });
+
+  test("returns true when concepts present", () => {
+    expect(hasCallerExpansions({ text: "performance", concepts: ["frontend rendering"] })).toBe(true);
+  });
+
+  test("returns true when passage present", () => {
+    expect(hasCallerExpansions({
+      text: "performance",
+      passage: "Reducing TTFB requires edge caching.",
+    })).toBe(true);
+  });
+
+  test("returns true when all fields present", () => {
+    expect(hasCallerExpansions({
+      text: "performance",
+      keywords: ["TTFB"],
+      concepts: ["rendering"],
+      passage: "A passage about performance.",
+    })).toBe(true);
+  });
+
+  test("returns false for empty passage string", () => {
+    expect(hasCallerExpansions({ text: "performance", passage: "" })).toBe(false);
+  });
+});
+
+// =============================================================================
+// StructuredQuery type shape
+// =============================================================================
+
+describe("StructuredQuery type contracts", () => {
+  test("text is the only required field", () => {
+    const sq: StructuredQuery = { text: "test" };
+    expect(normalizeQuery(sq).text).toBe("test");
+  });
+
+  test("keywords is string array", () => {
+    const sq: StructuredQuery = { text: "test", keywords: ["a", "b", "c"] };
+    expect(sq.keywords).toHaveLength(3);
+  });
+
+  test("concepts is string array", () => {
+    const sq: StructuredQuery = { text: "test", concepts: ["semantic phrase one", "semantic phrase two"] };
+    expect(sq.concepts).toHaveLength(2);
+  });
+
+  test("passage is a single string", () => {
+    const sq: StructuredQuery = {
+      text: "test",
+      passage: "A hypothetical document passage about the topic at hand.",
+    };
+    expect(typeof sq.passage).toBe("string");
+  });
+});
+
+// =============================================================================
+// hybridQuery routing integration tests
+//
+// Uses real SQLite FTS (no vector index) to verify routing without needing
+// LLM models. Both expandQuery and rerank are mocked — these tests verify
+// routing decisions (which functions are called), not search quality.
+// =============================================================================
+
+/** Mock rerank to pass through inputs with descending scores (no LLM needed). */
+function mockRerank(store: Store): ReturnType<typeof vi.spyOn> {
+  return vi.spyOn(store, "rerank").mockImplementation(
+    async (_query, docs) => docs.map((d, i) => ({ file: d.file, score: 1 - i * 0.1 }))
+  );
+}
+
+// Routing tests need a real SQLite store + createTestStore() which overwrites
+// process.env.QMD_CONFIG_DIR. Bun runs all test files in one process, so this
+// clobbers config for other test suites (e.g. mcp.test.ts). Skip in CI —
+// unit tests above still run and routing is validated locally.
+describe.skipIf(!!process.env.CI)("hybridQuery routing", () => {
+  test("structured query with keywords skips LLM expansion", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    await addDoc(store, coll, "Web Vitals", "Core web vitals measure TTFB and LCP for page load performance");
+    await addDoc(store, coll, "Team Health", "Team health is about trust and psychological safety");
+
+    const expandSpy = vi.spyOn(store, "expandQuery");
+    const rerankSpy = mockRerank(store);
+
+    const results = await hybridQuery(store, {
+      text: "performance",
+      keywords: ["TTFB", "core web vitals"],
+    }, { limit: 5 });
+
+    // expandQuery must NOT be called — caller provided their own expansion
+    expect(expandSpy).not.toHaveBeenCalled();
+    // FTS on "performance" (initial probe) + "TTFB" + "core web vitals" should
+    // find the web vitals doc
+    expect(results.length).toBeGreaterThan(0);
+    expect(results.some(r => r.title === "Web Vitals")).toBe(true);
+
+    expandSpy.mockRestore();
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+
+  test("plain string query calls LLM expansion", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    await addDoc(store, coll, "Fox Doc", "The quick brown fox jumps over the lazy dog");
+
+    // Mock expandQuery to avoid needing the actual LLM
+    const expandSpy = vi.spyOn(store, "expandQuery").mockResolvedValue([
+      { type: "lex", text: "quick fox" },
+    ]);
+    const rerankSpy = mockRerank(store);
+
+    await hybridQuery(store, "fox", { limit: 5 });
+
+    // expandQuery MUST be called — no caller expansions, no strong signal
+    expect(expandSpy).toHaveBeenCalledWith("fox", undefined);
+
+    expandSpy.mockRestore();
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+
+  test("structured query with only passage skips expansion", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    await addDoc(store, coll, "Scaling", "Database sharding enables horizontal scaling");
+
+    const expandSpy = vi.spyOn(store, "expandQuery");
+    const rerankSpy = mockRerank(store);
+
+    // passage alone should trigger caller-expansion path
+    const results = await hybridQuery(store, {
+      text: "scaling",
+      passage: "Horizontal scaling through database sharding and read replicas",
+    }, { limit: 5 });
+
+    expect(expandSpy).not.toHaveBeenCalled();
+    // Without vector index, only FTS on "scaling" runs (passage needs embeddings)
+    // But the routing decision is what we're testing — expandQuery was skipped
+    expect(results.length).toBeGreaterThan(0);
+
+    expandSpy.mockRestore();
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+
+  test("empty expansion fields fall through to LLM expansion", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    await addDoc(store, coll, "Test Doc", "Some content for testing");
+
+    const expandSpy = vi.spyOn(store, "expandQuery").mockResolvedValue([]);
+    const rerankSpy = mockRerank(store);
+
+    // Empty arrays + empty passage = no caller expansions after normalization
+    await hybridQuery(store, {
+      text: "testing",
+      keywords: [],
+      concepts: [],
+      passage: "",
+    }, { limit: 5 });
+
+    // normalizeQuery strips empties, so this falls through to LLM path
+    expect(expandSpy).toHaveBeenCalled();
+
+    expandSpy.mockRestore();
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+
+  test("onExpand hook does not fire for structured queries", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    await addDoc(store, coll, "Doc", "Content about performance metrics");
+
+    const onExpand = vi.fn();
+    vi.spyOn(store, "expandQuery");
+    const rerankSpy = mockRerank(store);
+
+    await hybridQuery(store, {
+      text: "performance",
+      keywords: ["metrics"],
+    }, { limit: 5, hooks: { onExpand } });
+
+    expect(onExpand).not.toHaveBeenCalled();
+
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+
+  test("onExpand hook fires for string queries with LLM expansion", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    await addDoc(store, coll, "Doc", "Content about performance metrics");
+
+    const onExpand = vi.fn();
+    vi.spyOn(store, "expandQuery").mockResolvedValue([
+      { type: "lex", text: "metrics benchmarks" },
+      { type: "vec", text: "performance measurement" },
+    ]);
+    const rerankSpy = mockRerank(store);
+
+    await hybridQuery(store, "performance", { limit: 5, hooks: { onExpand } });
+
+    expect(onExpand).toHaveBeenCalledWith("performance", [
+      { type: "lex", text: "metrics benchmarks" },
+      { type: "vec", text: "performance measurement" },
+    ]);
+
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+
+  test("keywords route to FTS and influence results", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    // Two docs — only one matches the keyword expansion
+    await addDoc(store, coll, "TTFB Guide", "Time to first byte optimization reduces latency");
+    await addDoc(store, coll, "Team Trust", "Building trust in engineering teams requires candor");
+
+    vi.spyOn(store, "expandQuery");
+    const rerankSpy = mockRerank(store);
+
+    const results = await hybridQuery(store, {
+      text: "performance",
+      keywords: ["latency", "time to first byte"],
+    }, { limit: 5 });
+
+    // The keyword "latency" should boost the TTFB doc
+    const titles = results.map(r => r.title);
+    expect(titles).toContain("TTFB Guide");
+
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+
+  test("intent disables strong-signal bypass for structured queries", async () => {
+    const store = await createTestStore();
+    const coll = await addCollection(store, "routing");
+
+    await addDoc(store, coll, "Exact Match", "A very specific unique term zephyr appears here zephyr zephyr");
+
+    const expandSpy = vi.spyOn(store, "expandQuery");
+    const rerankSpy = mockRerank(store);
+
+    // Structured query with intent — strong signal should be disabled
+    // (callerExpansions = true already disables it, but test the combination)
+    await hybridQuery(store, {
+      text: "zephyr",
+      keywords: ["wind patterns"],
+    }, { limit: 5, intent: "meteorology" });
+
+    expect(expandSpy).not.toHaveBeenCalled();
+
+    expandSpy.mockRestore();
+    rerankSpy.mockRestore();
+    await cleanup(store);
+  });
+});