diff --git a/index.ts b/index.ts
index 74cd7c6..c0ce8d5 100644
--- a/index.ts
+++ b/index.ts
@@ -24,12 +24,6 @@ import { appendSelfImprovementEntry, ensureSelfImprovementLearningFiles } from "
 import type { MdMirrorWriter } from "./src/tools.js";
 import { shouldSkipRetrieval } from "./src/adaptive-retrieval.js";
 import { parseClawteamScopes, applyClawteamScopes } from "./src/clawteam-scope.js";
-import {
-  runCompaction,
-  shouldRunCompaction,
-  recordCompactionRun,
-  type CompactionConfig,
-} from "./src/memory-compactor.js";
 import { runWithReflectionTransientRetryOnce } from "./src/reflection-retry.js";
 import { resolveReflectionSessionSearchDirs, stripResetSuffix } from "./src/session-recovery.js";
 import {
@@ -45,11 +39,9 @@ import { createReflectionEventId } from "./src/reflection-event-store.js";
 import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js";
 import { createMemoryCLI } from "./cli.js";
 import { isNoise } from "./src/noise-filter.js";
-import { normalizeAutoCaptureText } from "./src/auto-capture-cleanup.js";
 
 // Import smart extraction & lifecycle components
-import { SmartExtractor, createExtractionRateLimiter } from "./src/smart-extractor.js";
-import { compressTexts, estimateConversationValue } from "./src/session-compressor.js";
+import { SmartExtractor } from "./src/smart-extractor.js";
 import { NoisePrototypeBank } from "./src/noise-prototypes.js";
 import { createLlmClient } from "./src/llm-client.js";
 import { createDecayEngine, DEFAULT_DECAY_CONFIG } from "./src/decay-engine.js";
@@ -72,7 +64,6 @@ import {
   type AdmissionControlConfig,
   type AdmissionRejectionAuditEntry,
 } from "./src/admission-control.js";
-import { analyzeIntent, applyCategoryBoost } from "./src/intent-analyzer.js";
 
 // ============================================================================
 // Configuration & Types
@@ -85,7 +76,6 @@ interface PluginConfig {
     model?: string;
     baseURL?: string;
     dimensions?: number;
-    omitDimensions?: boolean;
     taskQuery?: string;
     taskPassage?: string;
     normalized?: boolean;
@@ -96,11 +86,10 @@ interface PluginConfig {
   autoRecall?: boolean;
   autoRecallMinLength?: number;
   autoRecallMinRepeated?: number;
-  autoRecallTimeoutMs?: number;
   autoRecallMaxItems?: number;
   autoRecallMaxChars?: number;
   autoRecallPerItemMaxChars?: number;
-  recallMode?: "full" | "summary" | "adaptive" | "off";
+  autoRecallTimeoutMs?: number;
   captureAssistant?: boolean;
   retrieval?: {
     mode?: "hybrid" | "vector";
@@ -119,6 +108,7 @@ interface PluginConfig {
       | "pinecone"
       | "dashscope"
       | "tei";
+    rerankTimeoutMs?: number;
     recencyHalfLifeDays?: number;
     recencyWeight?: number;
     filterNoise?: boolean;
@@ -195,22 +185,6 @@ interface PluginConfig {
   mdMirror?: { enabled?: boolean; dir?: string };
   workspaceBoundary?: WorkspaceBoundaryConfig;
   admissionControl?: AdmissionControlConfig;
-  memoryCompaction?: {
-    enabled?: boolean;
-    minAgeDays?: number;
-    similarityThreshold?: number;
-    minClusterSize?: number;
-    maxMemoriesToScan?: number;
-    cooldownHours?: number;
-  };
-  sessionCompression?: {
-    enabled?: boolean;
-    minScoreToKeep?: number;
-  };
-  extractionThrottle?: {
-    skipLowValue?: boolean;
-    maxExtractionsPerHour?: number;
-  };
 }
 
 type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high";
@@ -727,10 +701,65 @@ function shouldSkipReflectionMessage(role: string, text: string): boolean {
   return false;
 }
 
+const AUTO_CAPTURE_INBOUND_META_SENTINELS = [
+  "Conversation info (untrusted metadata):",
+  "Sender (untrusted metadata):",
+  "Thread starter (untrusted, for context):",
+  "Replied message (untrusted, for context):",
+  "Forwarded message context (untrusted metadata):",
+  "Chat history since last reply (untrusted, for context):",
+] as const;
+
+const AUTO_CAPTURE_SESSION_RESET_PREFIX =
+  "A new session was started via /new or /reset. Execute your Session Startup sequence now";
+const AUTO_CAPTURE_ADDRESSING_PREFIX_RE = /^(?:<@!?[0-9]+>|@[A-Za-z0-9_.-]+)\s*/;
 const AUTO_CAPTURE_MAP_MAX_ENTRIES = 2000;
 const AUTO_CAPTURE_EXPLICIT_REMEMBER_RE =
   /^(?:请|請)?(?:记住|記住|记一下|記一下|别忘了|別忘了)[。.!?？!]*$/u;
 
+function isAutoCaptureInboundMetaSentinelLine(line: string): boolean {
+  const trimmed = line.trim();
+  return AUTO_CAPTURE_INBOUND_META_SENTINELS.some((sentinel) => sentinel === trimmed);
+}
+
+function stripLeadingInboundMetadata(text: string): string {
+  if (!text || !AUTO_CAPTURE_INBOUND_META_SENTINELS.some((sentinel) => text.includes(sentinel))) {
+    return text;
+  }
+
+  const lines = text.split("\n");
+  let index = 0;
+  while (index < lines.length && lines[index].trim() === "") {
+    index++;
+  }
+
+  while (index < lines.length && isAutoCaptureInboundMetaSentinelLine(lines[index])) {
+    index++;
+    if (index < lines.length && lines[index].trim() === "```json") {
+      index++;
+      while (index < lines.length && lines[index].trim() !== "```") {
+        index++;
+      }
+      if (index < lines.length && lines[index].trim() === "```") {
+        index++;
+      }
+    } else {
+      // Sentinel line not followed by a ```json fenced block — unexpected format.
+      // Log and return original text to avoid lossy stripping.
+      _autoCaptureDebugLog(
+        `memory-lancedb-pro: stripLeadingInboundMetadata: sentinel line not followed by json fenced block at line ${index}, returning original text`,
+      );
+      return text;
+    }
+
+    while (index < lines.length && lines[index].trim() === "") {
+      index++;
+    }
+  }
+
+  return lines.slice(index).join("\n").trim();
+}
+
 /**
  * Prune a Map to stay within the given maximum number of entries.
  * Deletes the oldest (earliest-inserted) keys when over the limit.
@@ -745,6 +774,28 @@ function pruneMapIfOver<K, V>(map: Map<K, V>, maxEntries: number): void {
   }
 }
 
+function stripAutoCaptureSessionResetPrefix(text: string): string {
+  const trimmed = text.trim();
+  if (!trimmed.startsWith(AUTO_CAPTURE_SESSION_RESET_PREFIX)) {
+    return trimmed;
+  }
+
+  const blankLineIndex = trimmed.indexOf("\n\n");
+  if (blankLineIndex >= 0) {
+    return trimmed.slice(blankLineIndex + 2).trim();
+  }
+
+  const lines = trimmed.split("\n");
+  if (lines.length <= 2) {
+    return "";
+  }
+  return lines.slice(2).join("\n").trim();
+}
+
+function stripAutoCaptureAddressingPrefix(text: string): string {
+  return text.replace(AUTO_CAPTURE_ADDRESSING_PREFIX_RE, "").trim();
+}
+
 function isExplicitRememberCommand(text: string): boolean {
   return AUTO_CAPTURE_EXPLICIT_REMEMBER_RE.test(text.trim());
 }
@@ -774,6 +825,34 @@ function buildAutoCaptureConversationKeyFromSessionKey(sessionKey: string): stri
   return suffix || null;
 }
 
+function stripAutoCaptureInjectedPrefix(role: string, text: string): string {
+  if (role !== "user") {
+    return text.trim();
+  }
+
+  let normalized = text.trim();
+  normalized = normalized.replace(/^<relevant-memories>\s*[\s\S]*?<\/relevant-memories>\s*/i, "");
+  normalized = normalized.replace(
+    /^\[UNTRUSTED DATA[^\n]*\][\s\S]*?\[END UNTRUSTED DATA\]\s*/i,
+    "",
+  );
+  normalized = stripAutoCaptureSessionResetPrefix(normalized);
+  normalized = stripLeadingInboundMetadata(normalized);
+  normalized = stripAutoCaptureAddressingPrefix(normalized);
+  return normalized.trim();
+}
+
+/** Module-level debug logger for auto-capture helpers; set during plugin registration. */
+let _autoCaptureDebugLog: (msg: string) => void = () => { };
+
+function normalizeAutoCaptureText(role: unknown, text: string): string | null {
+  if (typeof role !== "string") return null;
+  const normalized = stripAutoCaptureInjectedPrefix(role, text);
+  if (!normalized) return null;
+  if (shouldSkipReflectionMessage(role, normalized)) return null;
+  return normalized;
+}
+
 function redactSecrets(text: string): string {
   const patterns: RegExp[] = [
     /Bearer\s+[A-Za-z0-9\-._~+/]+=*/g,
@@ -1577,6 +1656,8 @@ const pluginVersion = getPluginVersion();
 // Plugin Definition
 // ============================================================================
 
+let _initialized = false;
+
 const memoryLanceDBProPlugin = {
   id: "memory-lancedb-pro",
   name: "Memory (LanceDB Pro)",
@@ -1585,9 +1666,26 @@ const memoryLanceDBProPlugin = {
   kind: "memory" as const,
 
   register(api: OpenClawPluginApi) {
+
+    // Idempotent guard: skip re-init on repeated register() calls
+    if (_initialized) {
+      api.logger.debug("memory-lancedb-pro: register() called again — skipping re-init (idempotent)");
+      return;
+    }
+    _initialized = true;
+
     // Parse and validate configuration
     const config = parsePluginConfig(api.pluginConfig);
 
+    // Dual-memory model warning: help users understand the two-layer architecture
+    api.logger.info(
+      `[memory-lancedb-pro] memory_recall queries the plugin store (LanceDB), not MEMORY.md.\n` +
+      `  - Plugin memory (LanceDB) = primary recall source for semantic search\n` +
+      `  - MEMORY.md / memory/YYYY-MM-DD.md = startup context / journal only\n` +
+      `  - Use memory_store or auto-capture for recallable memories.\n` +
+      `  - Run: npx memory-lancedb-pro memory-pro import-markdown to migrate Markdown memories.`,
+    );
+
     const resolvedDbPath = api.resolvePath(config.dbPath || getDefaultDbPath());
 
     // Pre-flight: validate storage path (symlink resolution, mkdir, write check).
@@ -1614,7 +1712,6 @@ const memoryLanceDBProPlugin = {
       model: config.embedding.model || "text-embedding-3-small",
       baseURL: config.embedding.baseURL,
       dimensions: config.embedding.dimensions,
-      omitDimensions: config.embedding.omitDimensions,
       taskQuery: config.embedding.taskQuery,
       taskPassage: config.embedding.taskPassage,
       normalized: config.embedding.normalized,
@@ -1723,12 +1820,6 @@ const memoryLanceDBProPlugin = {
       }
     }
 
-    // Extraction rate limiter (Feature 7: Adaptive Extraction Throttling)
-    // NOTE: This rate limiter is global — shared across all agents in multi-agent setups.
-    const extractionRateLimiter = createExtractionRateLimiter({
-      maxExtractionsPerHour: config.extractionThrottle?.maxExtractionsPerHour,
-    });
-
     async function sleep(ms: number): Promise<void> {
       await new Promise(resolve => setTimeout(resolve, ms));
     }
@@ -1955,6 +2046,9 @@ const memoryLanceDBProPlugin = {
     const autoCapturePendingIngressTexts = new Map<string, string[]>();
     const autoCaptureRecentTexts = new Map<string, string[]>();
 
+    // Wire up the module-level debug logger for pure helper functions.
+    _autoCaptureDebugLog = (msg: string) => api.logger.debug(msg);
+
     api.logger.info(
       `memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})`
     );
@@ -1965,7 +2059,7 @@ const memoryLanceDBProPlugin = {
         ctx.channelId,
         ctx.conversationId,
       );
-      const normalized = normalizeAutoCaptureText("user", event.content, shouldSkipReflectionMessage);
+      const normalized = normalizeAutoCaptureText("user", event.content);
       if (conversationKey && normalized) {
         const queue = autoCapturePendingIngressTexts.get(conversationKey) || [];
         queue.push(normalized);
@@ -2019,128 +2113,6 @@ const memoryLanceDBProPlugin = {
       }
     );
 
-    // ========================================================================
-    // Memory Compaction (Progressive Summarization)
-    // ========================================================================
-
-    if (config.enableManagementTools) {
-      api.registerTool({
-        name: "memory_compact",
-        description:
-          "Consolidate semantically similar old memories into refined single entries " +
-          "(progressive summarization). Reduces noise and improves retrieval quality over time. " +
-          "Use dry_run:true first to preview the compaction plan without making changes.",
-        inputSchema: {
-          type: "object" as const,
-          properties: {
-            dry_run: {
-              type: "boolean",
-              description: "Preview clusters without writing changes. Default: false.",
-            },
-            min_age_days: {
-              type: "number",
-              description: "Only compact memories at least this many days old. Default: 7.",
-            },
-            similarity_threshold: {
-              type: "number",
-              description: "Cosine similarity threshold for clustering [0-1]. Default: 0.88.",
-            },
-            scopes: {
-              type: "array",
-              items: { type: "string" },
-              description: "Scope filter. Omit to compact all scopes.",
-            },
-          },
-          required: [],
-        },
-        execute: async (args: Record<string, unknown>) => {
-          const compactionCfg: CompactionConfig = {
-            enabled: true,
-            minAgeDays:
-              typeof args.min_age_days === "number"
-                ? args.min_age_days
-                : (config.memoryCompaction?.minAgeDays ?? 7),
-            similarityThreshold:
-              typeof args.similarity_threshold === "number"
-                ? Math.max(0, Math.min(1, args.similarity_threshold))
-                : (config.memoryCompaction?.similarityThreshold ?? 0.88),
-            minClusterSize: config.memoryCompaction?.minClusterSize ?? 2,
-            maxMemoriesToScan: config.memoryCompaction?.maxMemoriesToScan ?? 200,
-            dryRun: args.dry_run === true,
-            cooldownHours: config.memoryCompaction?.cooldownHours ?? 24,
-          };
-          const scopes =
-            Array.isArray(args.scopes) && args.scopes.length > 0
-              ? (args.scopes as string[])
-              : undefined;
-
-          const result = await runCompaction(
-            store,
-            embedder,
-            compactionCfg,
-            scopes,
-            api.logger,
-          );
-
-          return {
-            content: [
-              {
-                type: "text",
-                text: JSON.stringify(
-                  {
-                    scanned: result.scanned,
-                    clustersFound: result.clustersFound,
-                    memoriesDeleted: result.memoriesDeleted,
-                    memoriesCreated: result.memoriesCreated,
-                    dryRun: result.dryRun,
-                    summary: result.dryRun
-                      ? `Dry run: found ${result.clustersFound} cluster(s) in ${result.scanned} memories — no changes made.`
-                      : `Compacted ${result.memoriesDeleted} memories into ${result.memoriesCreated} consolidated entries.`,
-                  },
-                  null,
-                  2,
-                ),
-              },
-            ],
-          };
-        },
-      });
-    }
-
-    // Auto-compaction at gateway_start (if enabled, respects cooldown)
-    if (config.memoryCompaction?.enabled) {
-      api.on("gateway_start", () => {
-        const compactionStateFile = join(
-          dirname(resolvedDbPath),
-          ".compaction-state.json",
-        );
-        const compactionCfg: CompactionConfig = {
-          enabled: true,
-          minAgeDays: config.memoryCompaction!.minAgeDays ?? 7,
-          similarityThreshold: config.memoryCompaction!.similarityThreshold ?? 0.88,
-          minClusterSize: config.memoryCompaction!.minClusterSize ?? 2,
-          maxMemoriesToScan: config.memoryCompaction!.maxMemoriesToScan ?? 200,
-          dryRun: false,
-          cooldownHours: config.memoryCompaction!.cooldownHours ?? 24,
-        };
-
-        shouldRunCompaction(compactionStateFile, compactionCfg.cooldownHours)
-          .then(async (should) => {
-            if (!should) return;
-            await recordCompactionRun(compactionStateFile);
-            const result = await runCompaction(store, embedder, compactionCfg, undefined, api.logger);
-            if (result.clustersFound > 0) {
-              api.logger.info(
-                `memory-compactor [auto]: compacted ${result.memoriesDeleted} → ${result.memoriesCreated} entries`,
-              );
-            }
-          })
-          .catch((err) => {
-            api.logger.warn(`memory-compactor [auto]: failed: ${String(err)}`);
-          });
-      });
-    }
-
     // ========================================================================
     // Register CLI Commands
     // ========================================================================
@@ -2194,9 +2166,7 @@ const memoryLanceDBProPlugin = {
 
     // Auto-recall: inject relevant memories before agent starts
     // Default is OFF to prevent the model from accidentally echoing injected context.
-    // recallMode: "full" (default when autoRecall=true) | "summary" (L0 only) | "adaptive" (intent-based) | "off"
-    const recallMode = config.recallMode || "full";
-    if (config.autoRecall === true && recallMode !== "off") {
+    if (config.autoRecall === true) {
       // Cache the most recent raw user message per session so the
       // before_prompt_build gating can check the *user* text, not the full
       // assembled prompt (which includes system instructions and is too long
@@ -2213,7 +2183,7 @@ const memoryLanceDBProPlugin = {
         if (text) lastRawUserMessage.set(cacheKey, text);
       });
 
-      const AUTO_RECALL_TIMEOUT_MS = parsePositiveInt(config.autoRecallTimeoutMs) ?? 5_000; // configurable; default raised from 3s to 5s for remote embedding APIs behind proxies
+      const AUTO_RECALL_TIMEOUT_MS = config.autoRecallTimeoutMs ?? 3_000; // bounded timeout to prevent agent startup stall
       api.on("before_prompt_build", async (event: any, ctx: any) => {
         // Manually increment turn counter for this session
         const sessionId = ctx?.sessionId || "default";
@@ -2245,7 +2215,7 @@ const memoryLanceDBProPlugin = {
 
           // FR-04: Truncate long prompts (e.g. file attachments) before embedding.
           // Auto-recall only needs the user's intent, not full attachment text.
-          const MAX_RECALL_QUERY_LENGTH = 1_000;
+          const MAX_RECALL_QUERY_LENGTH = 5_000;
           let recallQuery = event.prompt;
           if (recallQuery.length > MAX_RECALL_QUERY_LENGTH) {
             const originalLength = recallQuery.length;
@@ -2260,14 +2230,6 @@ const memoryLanceDBProPlugin = {
           const autoRecallPerItemMaxChars = clampInt(config.autoRecallPerItemMaxChars ?? 180, 32, 1000);
           const retrieveLimit = clampInt(Math.max(autoRecallMaxItems * 2, autoRecallMaxItems), 1, 20);
 
-          // Adaptive intent analysis (zero-LLM-cost pattern matching)
-          const intent = recallMode === "adaptive" ? analyzeIntent(recallQuery) : undefined;
-          if (intent) {
-            api.logger.debug?.(
-              `memory-lancedb-pro: adaptive recall intent=${intent.label} depth=${intent.depth} confidence=${intent.confidence} categories=[${intent.categories.join(",")}]`,
-            );
-          }
-
           const results = filterUserMdExclusiveRecallResults(await retrieveWithRetry({
             query: recallQuery,
             limit: retrieveLimit,
@@ -2279,19 +2241,16 @@ const memoryLanceDBProPlugin = {
             return;
           }
 
-          // Apply intent-based category boost for adaptive mode
-          const rankedResults = intent ? applyCategoryBoost(results, intent) : results;
-
           // Filter out redundant memories based on session history
           const minRepeated = config.autoRecallMinRepeated ?? 8;
           let dedupFilteredCount = 0;
 
           // Only enable dedup logic when minRepeated > 0
-          let finalResults = rankedResults;
+          let finalResults = results;
 
           if (minRepeated > 0) {
             const sessionHistory = recallHistory.get(sessionId) || new Map<string, number>();
-            const filteredResults = rankedResults.filter((r) => {
+            const filteredResults = results.filter((r) => {
               const lastTurn = sessionHistory.get(r.entry.id) ?? -999;
               const diff = currentTurn - lastTurn;
               const isRedundant = diff < minRepeated;
@@ -2323,10 +2282,12 @@ const memoryLanceDBProPlugin = {
             const meta = parseSmartMetadata(r.entry.metadata, r.entry);
             if (meta.state !== "confirmed") {
               stateFilteredCount++;
+              api.logger.debug(`memory-lancedb-pro: governance: filtered id=${r.entry.id} reason=state(${meta.state}) score=${r.score?.toFixed(3)} text=${r.entry.text.slice(0, 50)}`);
               return false;
             }
             if (meta.memory_layer === "archive" || meta.memory_layer === "reflection") {
               stateFilteredCount++;
+              api.logger.debug(`memory-lancedb-pro: governance: filtered id=${r.entry.id} reason=layer(${meta.memory_layer}) score=${r.score?.toFixed(3)} text=${r.entry.text.slice(0, 50)}`);
               return false;
             }
             if (meta.suppressed_until_turn > 0 && currentTurn <= meta.suppressed_until_turn) {
@@ -2343,30 +2304,13 @@ const memoryLanceDBProPlugin = {
             return;
           }
 
-          // Determine effective per-item char limit based on recall mode and intent depth
-          const effectivePerItemMaxChars = (() => {
-            if (recallMode === "summary") return Math.min(autoRecallPerItemMaxChars, 80); // L0 only
-            if (!intent) return autoRecallPerItemMaxChars; // "full" mode
-            // Adaptive mode: depth determines char budget
-            switch (intent.depth) {
-              case "l0": return Math.min(autoRecallPerItemMaxChars, 80);
-              case "l1": return autoRecallPerItemMaxChars; // default budget
-              case "full": return Math.min(autoRecallPerItemMaxChars * 3, 1000);
-            }
-          })();
-
           const preBudgetCandidates = governanceEligible.map((r) => {
             const metaObj = parseSmartMetadata(r.entry.metadata, r.entry);
             const displayCategory = metaObj.memory_category || r.entry.category;
             const displayTier = metaObj.tier || "";
             const tierPrefix = displayTier ? `[${displayTier.charAt(0).toUpperCase()}]` : "";
-            // Select content tier based on recallMode/intent depth
-            const contentText = recallMode === "summary"
-              ? (metaObj.l0_abstract || r.entry.text)
-              : intent?.depth === "full"
-                ? (r.entry.text) // full text for deep queries
-                : (metaObj.l0_abstract || r.entry.text); // L0/L1 default
-            const summary = sanitizeForContext(contentText).slice(0, effectivePerItemMaxChars);
+            const abstract = metaObj.l0_abstract || r.entry.text;
+            const summary = sanitizeForContext(abstract).slice(0, autoRecallPerItemMaxChars);
             return {
               id: r.entry.id,
               prefix: `${tierPrefix}[${displayCategory}:${r.entry.scope}]`,
@@ -2516,14 +2460,6 @@ const memoryLanceDBProPlugin = {
         // See: https://github.com/CortexReach/memory-lancedb-pro/issues/260
         const backgroundRun = (async () => {
         try {
-          // Feature 7: Check extraction rate limit before any work
-          if (extractionRateLimiter.isRateLimited()) {
-            api.logger.debug(
-              `memory-lancedb-pro: auto-capture skipped (rate limited: ${extractionRateLimiter.getRecentCount()} extractions in last hour)`,
-            );
-            return;
-          }
-
           // Determine agent ID and default scope
           const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey);
           const accessibleScopes = resolveScopeFilter(scopeManager, agentId);
@@ -2557,7 +2493,7 @@ const memoryLanceDBProPlugin = {
             const content = msgObj.content;
 
             if (typeof content === "string") {
-              const normalized = normalizeAutoCaptureText(role, content, shouldSkipReflectionMessage);
+              const normalized = normalizeAutoCaptureText(role, content);
               if (!normalized) {
                 skippedAutoCaptureTexts++;
               } else {
@@ -2577,7 +2513,7 @@ const memoryLanceDBProPlugin = {
                   typeof (block as Record<string, unknown>).text === "string"
                 ) {
                   const text = (block as Record<string, unknown>).text as string;
-                  const normalized = normalizeAutoCaptureText(role, text, shouldSkipReflectionMessage);
+                  const normalized = normalizeAutoCaptureText(role, text);
                   if (!normalized) {
                     skippedAutoCaptureTexts++;
                   } else {
@@ -2652,39 +2588,8 @@ const memoryLanceDBProPlugin = {
             );
           }
 
-          // ----------------------------------------------------------------
-          // Feature 7: Skip low-value conversations
-          // ----------------------------------------------------------------
-          if (config.extractionThrottle?.skipLowValue === true) {
-            const conversationValue = estimateConversationValue(texts);
-            if (conversationValue < 0.2) {
-              api.logger.debug(
-                `memory-lancedb-pro: auto-capture skipped for agent ${agentId} (low conversation value: ${conversationValue.toFixed(2)})`,
-              );
-              return;
-            }
-          }
-
-          // ----------------------------------------------------------------
-          // Feature 1: Session compression — prioritize high-signal texts
-          // ----------------------------------------------------------------
-          if (config.sessionCompression?.enabled === true && texts.length > 0) {
-            const maxChars = config.extractMaxChars ?? 8000;
-            const compressed = compressTexts(texts, maxChars, {
-              minScoreToKeep: config.sessionCompression?.minScoreToKeep,
-            });
-            if (compressed.dropped > 0) {
-              api.logger.debug(
-                `memory-lancedb-pro: session compression for agent ${agentId}: dropped ${compressed.dropped}/${texts.length} texts (${compressed.totalChars} chars kept)`,
-              );
-              texts = compressed.texts;
-            }
-          }
-
           // ----------------------------------------------------------------
           // Smart Extraction (Phase 1: LLM-powered 6-category extraction)
-          // Rate limiter charged AFTER successful extraction, not before,
-          // so no-op sessions don't consume the hourly quota.
           // ----------------------------------------------------------------
           if (smartExtractor) {
             // Pre-filter: embedding-based noise detection (language-agnostic)
@@ -2704,8 +2609,6 @@ const memoryLanceDBProPlugin = {
                 conversationText, sessionKey,
                 { scope: defaultScope, scopeFilter: accessibleScopes },
               );
-              // Charge rate limiter only after successful extraction
-              extractionRateLimiter.recordExtraction();
               if (stats.created > 0 || stats.merged > 0) {
                 api.logger.info(
                   `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}`
@@ -2802,7 +2705,7 @@ const memoryLanceDBProPlugin = {
                     l2_content: text,
                     source_session: (event as any).sessionKey || "unknown",
                     source: "auto-capture",
-                    state: "pending",
+                    state: "confirmed",
                     memory_layer: "working",
                     injected_count: 0,
                     bad_recall_count: 0,
@@ -3736,10 +3639,6 @@ export function parsePluginConfig(value: unknown): PluginConfig {
       // Accept number, numeric string, or env-var string (e.g. "${EMBED_DIM}").
       // Also accept legacy top-level `dimensions` for convenience.
       dimensions: parsePositiveInt(embedding.dimensions ?? cfg.dimensions),
-      omitDimensions:
-        typeof embedding.omitDimensions === "boolean"
-          ? embedding.omitDimensions
-          : undefined,
       taskQuery:
         typeof embedding.taskQuery === "string"
           ? embedding.taskQuery
@@ -3766,6 +3665,7 @@ export function parsePluginConfig(value: unknown): PluginConfig {
     autoRecallMaxItems: parsePositiveInt(cfg.autoRecallMaxItems) ?? 3,
     autoRecallMaxChars: parsePositiveInt(cfg.autoRecallMaxChars) ?? 600,
     autoRecallPerItemMaxChars: parsePositiveInt(cfg.autoRecallPerItemMaxChars) ?? 180,
+    autoRecallTimeoutMs: parsePositiveInt(cfg.autoRecallTimeoutMs) ?? 3000,
     captureAssistant: cfg.captureAssistant === true,
     retrieval: typeof cfg.retrieval === "object" && cfg.retrieval !== null ? cfg.retrieval as any : undefined,
     decay: typeof cfg.decay === "object" && cfg.decay !== null ? cfg.decay as any : undefined,
@@ -3861,47 +3761,9 @@ export function parsePluginConfig(value: unknown): PluginConfig {
         }
         : undefined,
     admissionControl: normalizeAdmissionControlConfig(cfg.admissionControl),
-    memoryCompaction: (() => {
-      const raw =
-        typeof cfg.memoryCompaction === "object" && cfg.memoryCompaction !== null
-          ? (cfg.memoryCompaction as Record<string, unknown>)
-          : null;
-      if (!raw) return undefined;
-      return {
-        enabled: raw.enabled === true,
-        minAgeDays: parsePositiveInt(raw.minAgeDays) ?? 7,
-        similarityThreshold:
-          typeof raw.similarityThreshold === "number"
-            ? Math.max(0, Math.min(1, raw.similarityThreshold))
-            : 0.88,
-        minClusterSize: parsePositiveInt(raw.minClusterSize) ?? 2,
-        maxMemoriesToScan: parsePositiveInt(raw.maxMemoriesToScan) ?? 200,
-        cooldownHours: parsePositiveInt(raw.cooldownHours) ?? 24,
-      };
-    })(),
-    sessionCompression:
-      typeof cfg.sessionCompression === "object" && cfg.sessionCompression !== null
-        ? {
-            enabled:
-              (cfg.sessionCompression as Record<string, unknown>).enabled === true,
-            minScoreToKeep:
-              typeof (cfg.sessionCompression as Record<string, unknown>).minScoreToKeep === "number"
-                ? ((cfg.sessionCompression as Record<string, unknown>).minScoreToKeep as number)
-                : 0.3,
-          }
-        : { enabled: false, minScoreToKeep: 0.3 },
-    extractionThrottle:
-      typeof cfg.extractionThrottle === "object" && cfg.extractionThrottle !== null
-        ? {
-            skipLowValue:
-              (cfg.extractionThrottle as Record<string, unknown>).skipLowValue === true,
-            maxExtractionsPerHour:
-              typeof (cfg.extractionThrottle as Record<string, unknown>).maxExtractionsPerHour === "number"
-                ? ((cfg.extractionThrottle as Record<string, unknown>).maxExtractionsPerHour as number)
-                : 30,
-          }
-        : { skipLowValue: false, maxExtractionsPerHour: 30 },
   };
 }
 
+export function _resetInitialized() { _initialized = false; }
+
 export default memoryLanceDBProPlugin;
diff --git a/openclaw.plugin.json b/openclaw.plugin.json
index c05bf0f..a04bfce 100644
--- a/openclaw.plugin.json
+++ b/openclaw.plugin.json
@@ -46,10 +46,6 @@
             "type": "integer",
             "minimum": 1
           },
-          "omitDimensions": {
-            "type": "boolean",
-            "description": "When true, omit the dimensions parameter from embedding requests even if dimensions is configured"
-          },
           "taskQuery": {
             "type": "string",
             "description": "Embedding task for queries (provider-specific, e.g. Jina: retrieval.query)"
@@ -137,12 +133,6 @@
         "default": 180,
         "description": "Maximum character budget per auto-injected memory summary."
       },
-      "recallMode": {
-        "type": "string",
-        "enum": ["full", "summary", "adaptive", "off"],
-        "default": "full",
-        "description": "Auto-recall depth mode. 'full': inject with configured per-item budget. 'summary': L0 abstracts only (compact). 'adaptive': analyze query intent to auto-select category and depth. 'off': disable auto-recall injection."
-      },
       "captureAssistant": {
         "type": "boolean"
       },
@@ -238,23 +228,78 @@
             "type": "object",
             "additionalProperties": false,
             "properties": {
-              "utility": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 },
-              "confidence": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 },
-              "novelty": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 },
-              "recency": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 },
-              "typePrior": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.6 }
+              "utility": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.1
+              },
+              "confidence": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.1
+              },
+              "novelty": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.1
+              },
+              "recency": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.1
+              },
+              "typePrior": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.6
+              }
             }
           },
           "typePriors": {
             "type": "object",
             "additionalProperties": false,
             "properties": {
-              "profile": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.95 },
-              "preferences": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.9 },
-              "entities": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.75 },
-              "events": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.45 },
-              "cases": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.8 },
-              "patterns": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.85 }
+              "profile": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.95
+              },
+              "preferences": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.9
+              },
+              "entities": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.75
+              },
+              "events": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.45
+              },
+              "cases": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.8
+              },
+              "patterns": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "default": 0.85
+              }
             }
           }
         }
@@ -325,6 +370,11 @@
             "default": "jina",
             "description": "Reranker provider format. Determines request/response shape and auth header. Use tei for Hugging Face Text Embeddings Inference /rerank endpoints. DashScope uses gte-rerank-v2 with endpoint https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank."
           },
+          "rerankTimeoutMs": {
+            "type": "number",
+            "default": 5000,
+            "description": "Rerank API timeout in milliseconds. Increase for local/CPU-based rerank servers (e.g., 120000 for 2 minutes)."
+          },
           "candidatePoolSize": {
             "type": "integer",
             "minimum": 10,
@@ -745,86 +795,12 @@
           }
         }
       },
-      "memoryCompaction": {
-        "type": "object",
-        "additionalProperties": false,
-        "description": "Progressive summarization: periodically consolidate semantically similar old memories into refined single entries, reducing noise and improving retrieval quality over time.",
-        "properties": {
-          "enabled": {
-            "type": "boolean",
-            "default": false,
-            "description": "Enable automatic compaction at gateway startup (respects cooldownHours)"
-          },
-          "minAgeDays": {
-            "type": "integer",
-            "default": 7,
-            "minimum": 1,
-            "description": "Only compact memories at least this many days old"
-          },
-          "similarityThreshold": {
-            "type": "number",
-            "default": 0.88,
-            "minimum": 0,
-            "maximum": 1,
-            "description": "Cosine similarity threshold for clustering. Higher = more conservative merges."
-          },
-          "minClusterSize": {
-            "type": "integer",
-            "default": 2,
-            "minimum": 2,
-            "description": "Minimum cluster size required to trigger a merge"
-          },
-          "maxMemoriesToScan": {
-            "type": "integer",
-            "default": 200,
-            "minimum": 1,
-            "description": "Maximum number of memories to scan per compaction run"
-          },
-          "cooldownHours": {
-            "type": "integer",
-            "default": 24,
-            "minimum": 1,
-            "description": "Minimum hours between automatic compaction runs"
-          }
-        }
-      },
-      "sessionCompression": {
-        "type": "object",
-        "additionalProperties": false,
-        "description": "Session compression settings for auto-capture. Scores and compresses conversation texts to prioritize high-signal content.",
-        "properties": {
-          "enabled": {
-            "type": "boolean",
-            "default": false,
-            "description": "Enable session compression before auto-capture extraction"
-          },
-          "minScoreToKeep": {
-            "type": "number",
-            "minimum": 0,
-            "maximum": 1,
-            "default": 0.3,
-            "description": "Minimum score threshold. If all texts score below this, fallback to keeping at least the last few texts."
-          }
-        }
-      },
-      "extractionThrottle": {
-        "type": "object",
-        "additionalProperties": false,
-        "description": "Adaptive extraction throttling to reduce LLM cost on low-value or rapid-fire sessions.",
-        "properties": {
-          "skipLowValue": {
-            "type": "boolean",
-            "default": false,
-            "description": "Skip extraction for conversations with estimated value < 0.2"
-          },
-          "maxExtractionsPerHour": {
-            "type": "integer",
-            "minimum": 1,
-            "maximum": 200,
-            "default": 30,
-            "description": "Maximum number of auto-capture extractions allowed per hour"
-          }
-        }
+      "autoRecallTimeoutMs": {
+        "type": "integer",
+        "minimum": 1000,
+        "maximum": 120000,
+        "default": 3000,
+        "description": "Timeout in ms for auto-recall search before skipping injection. Prevents agent startup stall when embedding/rerank is slow."
       }
     },
     "required": [
@@ -855,11 +831,6 @@
       "help": "Override vector dimensions for custom models not in the built-in lookup table",
       "advanced": true
     },
-    "embedding.omitDimensions": {
-      "label": "Omit Request Dimensions",
-      "help": "Do not send the dimensions parameter to the embedding API even if embedding.dimensions is configured. Useful for local models like Qwen3-Embedding that reject the field.",
-      "advanced": true
-    },
     "embedding.taskQuery": {
       "label": "Query Task",
       "placeholder": "retrieval.query",
@@ -1012,11 +983,6 @@
       "help": "Maximum characters per injected memory summary.",
       "advanced": true
     },
-    "recallMode": {
-      "label": "Recall Mode",
-      "help": "Auto-recall depth: full (default), summary (L0 only), adaptive (intent-based category routing), off.",
-      "advanced": false
-    },
     "captureAssistant": {
       "label": "Capture Assistant Messages",
       "help": "Also auto-capture assistant messages (default false to reduce memory pollution)",
@@ -1298,43 +1264,6 @@
       "placeholder": "30000",
       "help": "Request timeout for the smart-extraction / upgrade LLM in milliseconds",
       "advanced": true
-    },
-    "memoryCompaction.enabled": {
-      "label": "Auto Compaction",
-      "help": "Automatically consolidate similar old memories at gateway startup. Also available on-demand via the memory_compact tool (requires enableManagementTools)."
-    },
-    "memoryCompaction.minAgeDays": {
-      "label": "Min Age (days)",
-      "help": "Memories younger than this are never touched by compaction",
-      "advanced": true
-    },
-    "memoryCompaction.similarityThreshold": {
-      "label": "Similarity Threshold",
-      "help": "How similar two memories must be to merge (0–1). 0.88 is a good starting point; raise to 0.92+ for conservative merges.",
-      "advanced": true
-    },
-    "memoryCompaction.cooldownHours": {
-      "label": "Cooldown (hours)",
-      "help": "Minimum gap between automatic compaction runs",
-      "advanced": true
-    },
-    "sessionCompression.enabled": {
-      "label": "Session Compression",
-      "help": "Score and compress conversation texts before auto-capture to prioritize high-signal content (corrections, decisions, tool calls)"
-    },
-    "sessionCompression.minScoreToKeep": {
-      "label": "Compression Min Score",
-      "help": "Minimum text score threshold. If all texts score below this, keep at least the last few texts as fallback.",
-      "advanced": true
-    },
-    "extractionThrottle.skipLowValue": {
-      "label": "Skip Low-Value Conversations",
-      "help": "Skip auto-capture for conversations estimated to have low memory value (< 0.2)"
-    },
-    "extractionThrottle.maxExtractionsPerHour": {
-      "label": "Max Extractions Per Hour",
-      "help": "Rate limit for auto-capture extractions. Prevents excessive LLM calls during rapid-fire sessions.",
-      "advanced": true
     }
   }
-}
+}
\ No newline at end of file
diff --git a/src/retriever.ts b/src/retriever.ts
index 900db75..dee206e 100644
--- a/src/retriever.ts
+++ b/src/retriever.ts
@@ -19,8 +19,6 @@ import {
   parseSmartMetadata,
   toLifecycleMemory,
 } from "./smart-metadata.js";
-import { TraceCollector, type RetrievalTrace } from "./retrieval-trace.js";
-import { RetrievalStatsCollector } from "./retrieval-stats.js";
 
 // ============================================================================
 // Types & Configuration
@@ -45,6 +43,8 @@ export interface RetrievalConfig {
   rerankModel?: string;
   /** Reranker API endpoint (default: https://api.jina.ai/v1/rerank). */
   rerankEndpoint?: string;
+  /** Rerank API timeout in milliseconds (default: 5000). Increase for local/CPU-based rerank servers. */
+  rerankTimeoutMs?: number;
   /** Reranker provider format. Determines request/response shape and auth header.
    *  - "jina" (default): Authorization: Bearer, string[] documents, results[].relevance_score
    *  - "siliconflow": same format as jina (alias, for clarity)
@@ -127,6 +127,7 @@ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = {
   filterNoise: true,
   rerankModel: "jina-reranker-v3",
   rerankEndpoint: "https://api.jina.ai/v1/rerank",
+  rerankTimeoutMs: 5000,
   lengthNormAnchor: 500,
   hardMinScore: 0.35,
   timeDecayHalfLifeDays: 60,
@@ -361,7 +362,6 @@ function cosineSimilarity(a: number[], b: number[]): number {
 export class MemoryRetriever {
   private accessTracker: AccessTracker | null = null;
   private tierManager: TierManager | null = null;
-  private _statsCollector: RetrievalStatsCollector | null = null;
 
   constructor(
     private store: MemoryStore,
@@ -374,16 +374,6 @@ export class MemoryRetriever {
     this.accessTracker = tracker;
   }
 
-  /** Enable aggregate retrieval statistics collection. */
-  setStatsCollector(collector: RetrievalStatsCollector): void {
-    this._statsCollector = collector;
-  }
-
-  /** Get the stats collector (if set). */
-  getStatsCollector(): RetrievalStatsCollector | null {
-    return this._statsCollector;
-  }
-
   private filterActiveResults<T extends MemorySearchResult>(results: T[]): T[] {
     return results.filter((result) =>
       isMemoryActiveAt(parseSmartMetadata(result.entry.metadata, result.entry)),
@@ -394,35 +384,34 @@ export class MemoryRetriever {
     const { query, limit, scopeFilter, category, source } = context;
     const safeLimit = clampInt(limit, 1, 20);
 
-    // Create trace only when stats collector is active (zero overhead otherwise)
-    const trace = this._statsCollector ? new TraceCollector() : undefined;
-
     // Check if query contains tag prefixes -> use BM25-only + mustContain
     const tagTokens = this.extractTagTokens(query);
     let results: RetrievalResult[];
-
+    
     if (tagTokens.length > 0) {
       results = await this.bm25OnlyRetrieval(
-        query, tagTokens, safeLimit, scopeFilter, category, trace,
+        query,
+        tagTokens,
+        safeLimit,
+        scopeFilter,
+        category,
       );
     } else if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
       results = await this.vectorOnlyRetrieval(
-        query, safeLimit, scopeFilter, category, trace,
+        query,
+        safeLimit,
+        scopeFilter,
+        category,
       );
     } else {
       results = await this.hybridRetrieval(
-        query, safeLimit, scopeFilter, category, trace,
+        query,
+        safeLimit,
+        scopeFilter,
+        category,
       );
     }
 
-    // Feed completed trace to stats collector
-    if (trace && this._statsCollector) {
-      const mode = tagTokens.length > 0 ? "bm25"
-        : (this.config.mode === "vector" || !this.store.hasFtsSupport) ? "vector" : "hybrid";
-      const finalTrace = trace.finalize(query, mode);
-      this._statsCollector.recordQuery(finalTrace, source || "unknown");
-    }
-
     // Record access for reinforcement (manual recall only)
     if (this.accessTracker && source === "manual" && results.length > 0) {
       this.accessTracker.recordAccess(results.map((r) => r.entry.id));
@@ -431,49 +420,6 @@ export class MemoryRetriever {
     return results;
   }
 
-  /**
-   * Retrieve with full trace, used by the memory_debug tool.
-   * Always collects a trace regardless of stats collector state.
-   */
-  async retrieveWithTrace(
-    context: RetrievalContext,
-  ): Promise<{ results: RetrievalResult[]; trace: RetrievalTrace }> {
-    const { query, limit, scopeFilter, category, source } = context;
-    const safeLimit = clampInt(limit, 1, 20);
-    const trace = new TraceCollector();
-
-    const tagTokens = this.extractTagTokens(query);
-    let results: RetrievalResult[];
-
-    if (tagTokens.length > 0) {
-      results = await this.bm25OnlyRetrieval(
-        query, tagTokens, safeLimit, scopeFilter, category, trace,
-      );
-    } else if (this.config.mode === "vector" || !this.store.hasFtsSupport) {
-      results = await this.vectorOnlyRetrieval(
-        query, safeLimit, scopeFilter, category, trace,
-      );
-    } else {
-      results = await this.hybridRetrieval(
-        query, safeLimit, scopeFilter, category, trace,
-      );
-    }
-
-    const mode = tagTokens.length > 0 ? "bm25"
-      : (this.config.mode === "vector" || !this.store.hasFtsSupport) ? "vector" : "hybrid";
-    const finalTrace = trace.finalize(query, mode);
-
-    if (this._statsCollector) {
-      this._statsCollector.recordQuery(finalTrace, source || "debug");
-    }
-
-    if (this.accessTracker && source === "manual" && results.length > 0) {
-      this.accessTracker.recordAccess(results.map((r) => r.entry.id));
-    }
-
-    return { results, trace: finalTrace };
-  }
-
   private extractTagTokens(query: string): string[] {
     if (!this.config.tagPrefixes?.length) return [];
     
@@ -488,64 +434,45 @@ export class MemoryRetriever {
     limit: number,
     scopeFilter?: string[],
     category?: string,
-    trace?: TraceCollector,
   ): Promise<RetrievalResult[]> {
     const queryVector = await this.embedder.embedQuery(query);
-
-    trace?.startStage("vector_search", []);
     const results = await this.store.vectorSearch(
-      queryVector, limit, this.config.minScore, scopeFilter, { excludeInactive: true },
+      queryVector,
+      limit,
+      this.config.minScore,
+      scopeFilter,
+      { excludeInactive: true },
     );
+
+    // Filter by category if specified
     const filtered = category
-      ? results.filter((r) => r.entry.category === category) : results;
+      ? results.filter((r) => r.entry.category === category)
+      : results;
+
     const mapped = filtered.map(
       (result, index) =>
-        ({ ...result, sources: { vector: { score: result.score, rank: index + 1 } } }) as RetrievalResult,
+        ({
+          ...result,
+          sources: {
+            vector: { score: result.score, rank: index + 1 },
+          },
+        }) as RetrievalResult,
     );
-    if (trace) {
-      trace.endStage(mapped.map((r) => r.entry.id), mapped.map((r) => r.score));
-    }
 
-    let weighted: RetrievalResult[];
-    if (this.decayEngine) {
-      weighted = mapped;
-    } else {
-      trace?.startStage("recency_boost", mapped.map((r) => r.entry.id));
-      const boosted = this.applyRecencyBoost(mapped);
-      trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score));
-
-      trace?.startStage("importance_weight", boosted.map((r) => r.entry.id));
-      weighted = this.applyImportanceWeight(boosted);
-      trace?.endStage(weighted.map((r) => r.entry.id), weighted.map((r) => r.score));
-    }
-
-    trace?.startStage("length_normalization", weighted.map((r) => r.entry.id));
+    const weighted = this.decayEngine ? mapped : this.applyImportanceWeight(this.applyRecencyBoost(mapped));
     const lengthNormalized = this.applyLengthNormalization(weighted);
-    trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score));
-
-    trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id));
     const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
-    trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score));
-
-    const decayStageName = this.decayEngine ? "decay_boost" : "time_decay";
-    trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id));
     const lifecycleRanked = this.decayEngine
       ? this.applyDecayBoost(hardFiltered)
       : this.applyTimeDecay(hardFiltered);
-    trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score));
-
-    trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id));
     const denoised = this.config.filterNoise
       ? filterNoise(lifecycleRanked, r => r.entry.text)
       : lifecycleRanked;
-    trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score));
 
-    trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id));
+    // MMR deduplication: avoid top-k filled with near-identical memories
     const deduplicated = this.applyMMRDiversity(denoised);
-    const finalResults = deduplicated.slice(0, limit);
-    trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score));
 
-    return finalResults;
+    return deduplicated.slice(0, limit);
   }
 
   private async bm25OnlyRetrieval(
@@ -554,64 +481,56 @@ export class MemoryRetriever {
     limit: number,
     scopeFilter?: string[],
     category?: string,
-    trace?: TraceCollector,
   ): Promise<RetrievalResult[]> {
     const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
-
-    trace?.startStage("bm25_search", []);
+    
+    // Run BM25 search
     const bm25Results = await this.store.bm25Search(
-      query, candidatePoolSize, scopeFilter, { excludeInactive: true },
+      query,
+      candidatePoolSize,
+      scopeFilter,
+      { excludeInactive: true },
     );
+
+    // Filter by category if specified
     const categoryFiltered = category
-      ? bm25Results.filter((r) => r.entry.category === category) : bm25Results;
+      ? bm25Results.filter((r) => r.entry.category === category)
+      : bm25Results;
+
+    // mustContain: only keep entries that literally contain all tag tokens (case-insensitive)
     const mustContainFiltered = categoryFiltered.filter((r) => {
       const textLower = r.entry.text.toLowerCase();
       return tagTokens.every((t) => textLower.includes(t.toLowerCase()));
     });
+
     const mapped = mustContainFiltered.map(
       (result, index) =>
-        ({ ...result, sources: { bm25: { score: result.score, rank: index + 1 } } }) as RetrievalResult,
+        ({
+          ...result,
+          sources: {
+            bm25: { score: result.score, rank: index + 1 },
+          },
+        }) as RetrievalResult,
     );
-    trace?.endStage(mapped.map((r) => r.entry.id), mapped.map((r) => r.score));
 
-    let temporallyRanked: RetrievalResult[];
-    if (this.decayEngine) {
-      temporallyRanked = mapped;
-    } else {
-      trace?.startStage("recency_boost", mapped.map((r) => r.entry.id));
-      const boosted = this.applyRecencyBoost(mapped);
-      trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score));
-
-      trace?.startStage("importance_weight", boosted.map((r) => r.entry.id));
-      temporallyRanked = this.applyImportanceWeight(boosted);
-      trace?.endStage(temporallyRanked.map((r) => r.entry.id), temporallyRanked.map((r) => r.score));
-    }
+    // Apply same post-processing as hybrid retrieval to avoid behavior regression
+    const temporallyRanked = this.decayEngine
+      ? mapped
+      : this.applyImportanceWeight(this.applyRecencyBoost(mapped));
 
-    trace?.startStage("length_normalization", temporallyRanked.map((r) => r.entry.id));
     const lengthNormalized = this.applyLengthNormalization(temporallyRanked);
-    trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score));
-
-    trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id));
     const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
-    trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score));
 
-    const decayStageName = this.decayEngine ? "decay_boost" : "time_decay";
-    trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id));
     const lifecycleRanked = this.decayEngine
-      ? this.applyDecayBoost(hardFiltered) : this.applyTimeDecay(hardFiltered);
-    trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score));
+      ? this.applyDecayBoost(hardFiltered)
+      : this.applyTimeDecay(hardFiltered);
 
-    trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id));
     const denoised = this.config.filterNoise
-      ? filterNoise(lifecycleRanked, r => r.entry.text) : lifecycleRanked;
-    trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score));
+      ? filterNoise(lifecycleRanked, r => r.entry.text)
+      : lifecycleRanked;
 
-    trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id));
     const deduplicated = this.applyMMRDiversity(denoised);
-    const finalResults = deduplicated.slice(0, limit);
-    trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score));
-
-    return finalResults;
+    return deduplicated.slice(0, limit);
   }
 
   private async hybridRetrieval(
@@ -619,88 +538,70 @@ export class MemoryRetriever {
     limit: number,
     scopeFilter?: string[],
     category?: string,
-    trace?: TraceCollector,
   ): Promise<RetrievalResult[]> {
-    const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2);
+    const candidatePoolSize = Math.max(
+      this.config.candidatePoolSize,
+      limit * 2,
+    );
+
+    // Compute query embedding once, reuse for vector search + reranking
     const queryVector = await this.embedder.embedQuery(query);
 
-    // Run vector and BM25 searches in parallel.
-    // Trace as a single "parallel_search" stage since both run concurrently —
-    // splitting into separate sequential stages would misrepresent timing.
-    trace?.startStage("parallel_search", []);
+    // Run vector and BM25 searches in parallel
     const [vectorResults, bm25Results] = await Promise.all([
-      this.runVectorSearch(queryVector, candidatePoolSize, scopeFilter, category),
+      this.runVectorSearch(
+        queryVector,
+        candidatePoolSize,
+        scopeFilter,
+        category,
+      ),
       this.runBM25Search(query, candidatePoolSize, scopeFilter, category),
     ]);
-    if (trace) {
-      const allSearchIds = [
-        ...new Set([...vectorResults.map((r) => r.entry.id), ...bm25Results.map((r) => r.entry.id)]),
-      ];
-      const allScores = [...vectorResults.map((r) => r.score), ...bm25Results.map((r) => r.score)];
-      trace.endStage(allSearchIds, allScores);
-    }
 
-    // Fuse results using RRF
-    const allInputIds = [
-      ...new Set([...vectorResults.map((r) => r.entry.id), ...bm25Results.map((r) => r.entry.id)]),
-    ];
-    trace?.startStage("rrf_fusion", allInputIds);
+    // Fuse results using RRF (async: validates BM25-only entries exist in store)
     const fusedResults = await this.fuseResults(vectorResults, bm25Results);
-    trace?.endStage(fusedResults.map((r) => r.entry.id), fusedResults.map((r) => r.score));
 
     // Apply minimum score threshold
-    trace?.startStage("min_score_filter", fusedResults.map((r) => r.entry.id));
-    const filtered = fusedResults.filter((r) => r.score >= this.config.minScore);
-    trace?.endStage(filtered.map((r) => r.entry.id), filtered.map((r) => r.score));
-
-    // Rerank if enabled — only emit trace stage when rerank actually runs
-    let reranked: RetrievalResult[];
-    if (this.config.rerank !== "none") {
-      trace?.startStage("rerank", filtered.map((r) => r.entry.id));
-      reranked = await this.rerankResults(query, queryVector, filtered.slice(0, limit * 2));
-      trace?.endStage(reranked.map((r) => r.entry.id), reranked.map((r) => r.score));
-    } else {
-      reranked = filtered;
-    }
+    const filtered = fusedResults.filter(
+      (r) => r.score >= this.config.minScore,
+    );
 
-    let temporallyRanked: RetrievalResult[];
-    if (this.decayEngine) {
-      temporallyRanked = reranked;
-    } else {
-      trace?.startStage("recency_boost", reranked.map((r) => r.entry.id));
-      const boosted = this.applyRecencyBoost(reranked);
-      trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score));
+    // Rerank if enabled
+    const reranked =
+      this.config.rerank !== "none"
+        ? await this.rerankResults(
+          query,
+          queryVector,
+          filtered.slice(0, limit * 2),
+        )
+        : filtered;
 
-      trace?.startStage("importance_weight", boosted.map((r) => r.entry.id));
-      temporallyRanked = this.applyImportanceWeight(boosted);
-      trace?.endStage(temporallyRanked.map((r) => r.entry.id), temporallyRanked.map((r) => r.score));
-    }
+    const temporallyRanked = this.decayEngine
+      ? reranked
+      : this.applyImportanceWeight(this.applyRecencyBoost(reranked));
 
-    trace?.startStage("length_normalization", temporallyRanked.map((r) => r.entry.id));
+    // Apply length normalization (penalize long entries dominating via keyword density)
     const lengthNormalized = this.applyLengthNormalization(temporallyRanked);
-    trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score));
 
-    trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id));
+    // Hard minimum score cutoff should be based on semantic / lexical relevance.
+    // Lifecycle decay and time-decay are used for re-ranking, not for dropping
+    // otherwise relevant fresh memories.
     const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore);
-    trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score));
 
-    const decayStageName = this.decayEngine ? "decay_boost" : "time_decay";
-    trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id));
+    // Apply lifecycle-aware decay or legacy time decay after thresholding
     const lifecycleRanked = this.decayEngine
-      ? this.applyDecayBoost(hardFiltered) : this.applyTimeDecay(hardFiltered);
-    trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score));
+      ? this.applyDecayBoost(hardFiltered)
+      : this.applyTimeDecay(hardFiltered);
 
-    trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id));
+    // Filter noise
     const denoised = this.config.filterNoise
-      ? filterNoise(lifecycleRanked, r => r.entry.text) : lifecycleRanked;
-    trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score));
+      ? filterNoise(lifecycleRanked, r => r.entry.text)
+      : lifecycleRanked;
 
-    trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id));
+    // MMR deduplication: avoid top-k filled with near-identical memories
     const deduplicated = this.applyMMRDiversity(denoised);
-    const finalResults = deduplicated.slice(0, limit);
-    trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score));
 
-    return finalResults;
+    return deduplicated.slice(0, limit);
   }
 
   private async runVectorSearch(
@@ -858,18 +759,22 @@ export class MemoryRetriever {
           results.length,
         );
 
-        // Timeout: 5 seconds to prevent stalling retrieval pipeline
+        // Timeout: prevent stalling retrieval pipeline (configurable via rerankTimeoutMs)
         const controller = new AbortController();
-        const timeout = setTimeout(() => controller.abort(), 5000);
-
-        const response = await fetch(endpoint, {
-          method: "POST",
-          headers,
-          body: JSON.stringify(body),
-          signal: controller.signal,
-        });
+        const timeoutMs = this.config.rerankTimeoutMs ?? 5000;
+        const timeout = setTimeout(() => controller.abort(), timeoutMs);
 
-        clearTimeout(timeout);
+        let response: Response;
+        try {
+          response = await fetch(endpoint, {
+            method: "POST",
+            headers,
+            body: JSON.stringify(body),
+            signal: controller.signal,
+          });
+        } finally {
+          clearTimeout(timeout);
+        }
 
         if (response.ok) {
           const data: unknown = await response.json();
@@ -928,7 +833,7 @@ export class MemoryRetriever {
         }
       } catch (error) {
         if (error instanceof Error && error.name === "AbortError") {
-          console.warn("Rerank API timed out (5s), falling back to cosine");
+          console.warn(`Rerank API timed out (${this.config.rerankTimeoutMs ?? 5000}ms), falling back to cosine`);
         } else {
           console.warn("Rerank API failed, falling back to cosine:", error);
         }