diff --git a/index.ts b/index.ts index 74cd7c6..c0ce8d5 100644 --- a/index.ts +++ b/index.ts @@ -24,12 +24,6 @@ import { appendSelfImprovementEntry, ensureSelfImprovementLearningFiles } from " import type { MdMirrorWriter } from "./src/tools.js"; import { shouldSkipRetrieval } from "./src/adaptive-retrieval.js"; import { parseClawteamScopes, applyClawteamScopes } from "./src/clawteam-scope.js"; -import { - runCompaction, - shouldRunCompaction, - recordCompactionRun, - type CompactionConfig, -} from "./src/memory-compactor.js"; import { runWithReflectionTransientRetryOnce } from "./src/reflection-retry.js"; import { resolveReflectionSessionSearchDirs, stripResetSuffix } from "./src/session-recovery.js"; import { @@ -45,11 +39,9 @@ import { createReflectionEventId } from "./src/reflection-event-store.js"; import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js"; import { createMemoryCLI } from "./cli.js"; import { isNoise } from "./src/noise-filter.js"; -import { normalizeAutoCaptureText } from "./src/auto-capture-cleanup.js"; // Import smart extraction & lifecycle components -import { SmartExtractor, createExtractionRateLimiter } from "./src/smart-extractor.js"; -import { compressTexts, estimateConversationValue } from "./src/session-compressor.js"; +import { SmartExtractor } from "./src/smart-extractor.js"; import { NoisePrototypeBank } from "./src/noise-prototypes.js"; import { createLlmClient } from "./src/llm-client.js"; import { createDecayEngine, DEFAULT_DECAY_CONFIG } from "./src/decay-engine.js"; @@ -72,7 +64,6 @@ import { type AdmissionControlConfig, type AdmissionRejectionAuditEntry, } from "./src/admission-control.js"; -import { analyzeIntent, applyCategoryBoost } from "./src/intent-analyzer.js"; // ============================================================================ // Configuration & Types @@ -85,7 +76,6 @@ interface PluginConfig { model?: string; baseURL?: string; dimensions?: number; - omitDimensions?: boolean; taskQuery?: string; taskPassage?: string; normalized?: boolean; @@ -96,11 +86,10 @@ interface PluginConfig { autoRecall?: boolean; autoRecallMinLength?: number; autoRecallMinRepeated?: number; - autoRecallTimeoutMs?: number; autoRecallMaxItems?: number; autoRecallMaxChars?: number; autoRecallPerItemMaxChars?: number; - recallMode?: "full" | "summary" | "adaptive" | "off"; + autoRecallTimeoutMs?: number; captureAssistant?: boolean; retrieval?: { mode?: "hybrid" | "vector"; @@ -119,6 +108,7 @@ interface PluginConfig { | "pinecone" | "dashscope" | "tei"; + rerankTimeoutMs?: number; recencyHalfLifeDays?: number; recencyWeight?: number; filterNoise?: boolean; @@ -195,22 +185,6 @@ interface PluginConfig { mdMirror?: { enabled?: boolean; dir?: string }; workspaceBoundary?: WorkspaceBoundaryConfig; admissionControl?: AdmissionControlConfig; - memoryCompaction?: { - enabled?: boolean; - minAgeDays?: number; - similarityThreshold?: number; - minClusterSize?: number; - maxMemoriesToScan?: number; - cooldownHours?: number; - }; - sessionCompression?: { - enabled?: boolean; - minScoreToKeep?: number; - }; - extractionThrottle?: { - skipLowValue?: boolean; - maxExtractionsPerHour?: number; - }; } type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; @@ -727,10 +701,65 @@ function shouldSkipReflectionMessage(role: string, text: string): boolean { return false; } +const AUTO_CAPTURE_INBOUND_META_SENTINELS = [ + "Conversation info (untrusted metadata):", + "Sender (untrusted metadata):", + "Thread starter (untrusted, for context):", + "Replied message (untrusted, for context):", + "Forwarded message context (untrusted metadata):", + "Chat history since last reply (untrusted, for context):", +] as const; + +const AUTO_CAPTURE_SESSION_RESET_PREFIX = + "A new session was started via /new or /reset. Execute your Session Startup sequence now"; +const AUTO_CAPTURE_ADDRESSING_PREFIX_RE = /^(?:<@!?[0-9]+>|@[A-Za-z0-9_.-]+)\s*/; const AUTO_CAPTURE_MAP_MAX_ENTRIES = 2000; const AUTO_CAPTURE_EXPLICIT_REMEMBER_RE = /^(?:请|請)?(?:记住|記住|记一下|記一下|别忘了|別忘了)[。.!??!]*$/u; +function isAutoCaptureInboundMetaSentinelLine(line: string): boolean { + const trimmed = line.trim(); + return AUTO_CAPTURE_INBOUND_META_SENTINELS.some((sentinel) => sentinel === trimmed); +} + +function stripLeadingInboundMetadata(text: string): string { + if (!text || !AUTO_CAPTURE_INBOUND_META_SENTINELS.some((sentinel) => text.includes(sentinel))) { + return text; + } + + const lines = text.split("\n"); + let index = 0; + while (index < lines.length && lines[index].trim() === "") { + index++; + } + + while (index < lines.length && isAutoCaptureInboundMetaSentinelLine(lines[index])) { + index++; + if (index < lines.length && lines[index].trim() === "```json") { + index++; + while (index < lines.length && lines[index].trim() !== "```") { + index++; + } + if (index < lines.length && lines[index].trim() === "```") { + index++; + } + } else { + // Sentinel line not followed by a ```json fenced block — unexpected format. + // Log and return original text to avoid lossy stripping. + _autoCaptureDebugLog( + `memory-lancedb-pro: stripLeadingInboundMetadata: sentinel line not followed by json fenced block at line ${index}, returning original text`, + ); + return text; + } + + while (index < lines.length && lines[index].trim() === "") { + index++; + } + } + + return lines.slice(index).join("\n").trim(); +} + /** * Prune a Map to stay within the given maximum number of entries. * Deletes the oldest (earliest-inserted) keys when over the limit. @@ -745,6 +774,28 @@ function pruneMapIfOver(map: Map, maxEntries: number): void { } } +function stripAutoCaptureSessionResetPrefix(text: string): string { + const trimmed = text.trim(); + if (!trimmed.startsWith(AUTO_CAPTURE_SESSION_RESET_PREFIX)) { + return trimmed; + } + + const blankLineIndex = trimmed.indexOf("\n\n"); + if (blankLineIndex >= 0) { + return trimmed.slice(blankLineIndex + 2).trim(); + } + + const lines = trimmed.split("\n"); + if (lines.length <= 2) { + return ""; + } + return lines.slice(2).join("\n").trim(); +} + +function stripAutoCaptureAddressingPrefix(text: string): string { + return text.replace(AUTO_CAPTURE_ADDRESSING_PREFIX_RE, "").trim(); +} + function isExplicitRememberCommand(text: string): boolean { return AUTO_CAPTURE_EXPLICIT_REMEMBER_RE.test(text.trim()); } @@ -774,6 +825,34 @@ function buildAutoCaptureConversationKeyFromSessionKey(sessionKey: string): stri return suffix || null; } +function stripAutoCaptureInjectedPrefix(role: string, text: string): string { + if (role !== "user") { + return text.trim(); + } + + let normalized = text.trim(); + normalized = normalized.replace(/^\s*[\s\S]*?<\/relevant-memories>\s*/i, ""); + normalized = normalized.replace( + /^\[UNTRUSTED DATA[^\n]*\][\s\S]*?\[END UNTRUSTED DATA\]\s*/i, + "", + ); + normalized = stripAutoCaptureSessionResetPrefix(normalized); + normalized = stripLeadingInboundMetadata(normalized); + normalized = stripAutoCaptureAddressingPrefix(normalized); + return normalized.trim(); +} + +/** Module-level debug logger for auto-capture helpers; set during plugin registration. */ +let _autoCaptureDebugLog: (msg: string) => void = () => { }; + +function normalizeAutoCaptureText(role: unknown, text: string): string | null { + if (typeof role !== "string") return null; + const normalized = stripAutoCaptureInjectedPrefix(role, text); + if (!normalized) return null; + if (shouldSkipReflectionMessage(role, normalized)) return null; + return normalized; +} + function redactSecrets(text: string): string { const patterns: RegExp[] = [ /Bearer\s+[A-Za-z0-9\-._~+/]+=*/g, @@ -1577,6 +1656,8 @@ const pluginVersion = getPluginVersion(); // Plugin Definition // ============================================================================ +let _initialized = false; + const memoryLanceDBProPlugin = { id: "memory-lancedb-pro", name: "Memory (LanceDB Pro)", @@ -1585,9 +1666,26 @@ const memoryLanceDBProPlugin = { kind: "memory" as const, register(api: OpenClawPluginApi) { + + // Idempotent guard: skip re-init on repeated register() calls + if (_initialized) { + api.logger.debug("memory-lancedb-pro: register() called again — skipping re-init (idempotent)"); + return; + } + _initialized = true; + // Parse and validate configuration const config = parsePluginConfig(api.pluginConfig); + // Dual-memory model warning: help users understand the two-layer architecture + api.logger.info( + `[memory-lancedb-pro] memory_recall queries the plugin store (LanceDB), not MEMORY.md.\n` + + ` - Plugin memory (LanceDB) = primary recall source for semantic search\n` + + ` - MEMORY.md / memory/YYYY-MM-DD.md = startup context / journal only\n` + + ` - Use memory_store or auto-capture for recallable memories.\n` + + ` - Run: npx memory-lancedb-pro memory-pro import-markdown to migrate Markdown memories.`, + ); + const resolvedDbPath = api.resolvePath(config.dbPath || getDefaultDbPath()); // Pre-flight: validate storage path (symlink resolution, mkdir, write check). @@ -1614,7 +1712,6 @@ const memoryLanceDBProPlugin = { model: config.embedding.model || "text-embedding-3-small", baseURL: config.embedding.baseURL, dimensions: config.embedding.dimensions, - omitDimensions: config.embedding.omitDimensions, taskQuery: config.embedding.taskQuery, taskPassage: config.embedding.taskPassage, normalized: config.embedding.normalized, @@ -1723,12 +1820,6 @@ const memoryLanceDBProPlugin = { } } - // Extraction rate limiter (Feature 7: Adaptive Extraction Throttling) - // NOTE: This rate limiter is global — shared across all agents in multi-agent setups. - const extractionRateLimiter = createExtractionRateLimiter({ - maxExtractionsPerHour: config.extractionThrottle?.maxExtractionsPerHour, - }); - async function sleep(ms: number): Promise { await new Promise(resolve => setTimeout(resolve, ms)); } @@ -1955,6 +2046,9 @@ const memoryLanceDBProPlugin = { const autoCapturePendingIngressTexts = new Map(); const autoCaptureRecentTexts = new Map(); + // Wire up the module-level debug logger for pure helper functions. + _autoCaptureDebugLog = (msg: string) => api.logger.debug(msg); + api.logger.info( `memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})` ); @@ -1965,7 +2059,7 @@ const memoryLanceDBProPlugin = { ctx.channelId, ctx.conversationId, ); - const normalized = normalizeAutoCaptureText("user", event.content, shouldSkipReflectionMessage); + const normalized = normalizeAutoCaptureText("user", event.content); if (conversationKey && normalized) { const queue = autoCapturePendingIngressTexts.get(conversationKey) || []; queue.push(normalized); @@ -2019,128 +2113,6 @@ const memoryLanceDBProPlugin = { } ); - // ======================================================================== - // Memory Compaction (Progressive Summarization) - // ======================================================================== - - if (config.enableManagementTools) { - api.registerTool({ - name: "memory_compact", - description: - "Consolidate semantically similar old memories into refined single entries " + - "(progressive summarization). Reduces noise and improves retrieval quality over time. " + - "Use dry_run:true first to preview the compaction plan without making changes.", - inputSchema: { - type: "object" as const, - properties: { - dry_run: { - type: "boolean", - description: "Preview clusters without writing changes. Default: false.", - }, - min_age_days: { - type: "number", - description: "Only compact memories at least this many days old. Default: 7.", - }, - similarity_threshold: { - type: "number", - description: "Cosine similarity threshold for clustering [0-1]. Default: 0.88.", - }, - scopes: { - type: "array", - items: { type: "string" }, - description: "Scope filter. Omit to compact all scopes.", - }, - }, - required: [], - }, - execute: async (args: Record) => { - const compactionCfg: CompactionConfig = { - enabled: true, - minAgeDays: - typeof args.min_age_days === "number" - ? args.min_age_days - : (config.memoryCompaction?.minAgeDays ?? 7), - similarityThreshold: - typeof args.similarity_threshold === "number" - ? Math.max(0, Math.min(1, args.similarity_threshold)) - : (config.memoryCompaction?.similarityThreshold ?? 0.88), - minClusterSize: config.memoryCompaction?.minClusterSize ?? 2, - maxMemoriesToScan: config.memoryCompaction?.maxMemoriesToScan ?? 200, - dryRun: args.dry_run === true, - cooldownHours: config.memoryCompaction?.cooldownHours ?? 24, - }; - const scopes = - Array.isArray(args.scopes) && args.scopes.length > 0 - ? (args.scopes as string[]) - : undefined; - - const result = await runCompaction( - store, - embedder, - compactionCfg, - scopes, - api.logger, - ); - - return { - content: [ - { - type: "text", - text: JSON.stringify( - { - scanned: result.scanned, - clustersFound: result.clustersFound, - memoriesDeleted: result.memoriesDeleted, - memoriesCreated: result.memoriesCreated, - dryRun: result.dryRun, - summary: result.dryRun - ? `Dry run: found ${result.clustersFound} cluster(s) in ${result.scanned} memories — no changes made.` - : `Compacted ${result.memoriesDeleted} memories into ${result.memoriesCreated} consolidated entries.`, - }, - null, - 2, - ), - }, - ], - }; - }, - }); - } - - // Auto-compaction at gateway_start (if enabled, respects cooldown) - if (config.memoryCompaction?.enabled) { - api.on("gateway_start", () => { - const compactionStateFile = join( - dirname(resolvedDbPath), - ".compaction-state.json", - ); - const compactionCfg: CompactionConfig = { - enabled: true, - minAgeDays: config.memoryCompaction!.minAgeDays ?? 7, - similarityThreshold: config.memoryCompaction!.similarityThreshold ?? 0.88, - minClusterSize: config.memoryCompaction!.minClusterSize ?? 2, - maxMemoriesToScan: config.memoryCompaction!.maxMemoriesToScan ?? 200, - dryRun: false, - cooldownHours: config.memoryCompaction!.cooldownHours ?? 24, - }; - - shouldRunCompaction(compactionStateFile, compactionCfg.cooldownHours) - .then(async (should) => { - if (!should) return; - await recordCompactionRun(compactionStateFile); - const result = await runCompaction(store, embedder, compactionCfg, undefined, api.logger); - if (result.clustersFound > 0) { - api.logger.info( - `memory-compactor [auto]: compacted ${result.memoriesDeleted} → ${result.memoriesCreated} entries`, - ); - } - }) - .catch((err) => { - api.logger.warn(`memory-compactor [auto]: failed: ${String(err)}`); - }); - }); - } - // ======================================================================== // Register CLI Commands // ======================================================================== @@ -2194,9 +2166,7 @@ const memoryLanceDBProPlugin = { // Auto-recall: inject relevant memories before agent starts // Default is OFF to prevent the model from accidentally echoing injected context. - // recallMode: "full" (default when autoRecall=true) | "summary" (L0 only) | "adaptive" (intent-based) | "off" - const recallMode = config.recallMode || "full"; - if (config.autoRecall === true && recallMode !== "off") { + if (config.autoRecall === true) { // Cache the most recent raw user message per session so the // before_prompt_build gating can check the *user* text, not the full // assembled prompt (which includes system instructions and is too long @@ -2213,7 +2183,7 @@ const memoryLanceDBProPlugin = { if (text) lastRawUserMessage.set(cacheKey, text); }); - const AUTO_RECALL_TIMEOUT_MS = parsePositiveInt(config.autoRecallTimeoutMs) ?? 5_000; // configurable; default raised from 3s to 5s for remote embedding APIs behind proxies + const AUTO_RECALL_TIMEOUT_MS = config.autoRecallTimeoutMs ?? 3_000; // bounded timeout to prevent agent startup stall api.on("before_prompt_build", async (event: any, ctx: any) => { // Manually increment turn counter for this session const sessionId = ctx?.sessionId || "default"; @@ -2245,7 +2215,7 @@ const memoryLanceDBProPlugin = { // FR-04: Truncate long prompts (e.g. file attachments) before embedding. // Auto-recall only needs the user's intent, not full attachment text. - const MAX_RECALL_QUERY_LENGTH = 1_000; + const MAX_RECALL_QUERY_LENGTH = 5_000; let recallQuery = event.prompt; if (recallQuery.length > MAX_RECALL_QUERY_LENGTH) { const originalLength = recallQuery.length; @@ -2260,14 +2230,6 @@ const memoryLanceDBProPlugin = { const autoRecallPerItemMaxChars = clampInt(config.autoRecallPerItemMaxChars ?? 180, 32, 1000); const retrieveLimit = clampInt(Math.max(autoRecallMaxItems * 2, autoRecallMaxItems), 1, 20); - // Adaptive intent analysis (zero-LLM-cost pattern matching) - const intent = recallMode === "adaptive" ? analyzeIntent(recallQuery) : undefined; - if (intent) { - api.logger.debug?.( - `memory-lancedb-pro: adaptive recall intent=${intent.label} depth=${intent.depth} confidence=${intent.confidence} categories=[${intent.categories.join(",")}]`, - ); - } - const results = filterUserMdExclusiveRecallResults(await retrieveWithRetry({ query: recallQuery, limit: retrieveLimit, @@ -2279,19 +2241,16 @@ const memoryLanceDBProPlugin = { return; } - // Apply intent-based category boost for adaptive mode - const rankedResults = intent ? applyCategoryBoost(results, intent) : results; - // Filter out redundant memories based on session history const minRepeated = config.autoRecallMinRepeated ?? 8; let dedupFilteredCount = 0; // Only enable dedup logic when minRepeated > 0 - let finalResults = rankedResults; + let finalResults = results; if (minRepeated > 0) { const sessionHistory = recallHistory.get(sessionId) || new Map(); - const filteredResults = rankedResults.filter((r) => { + const filteredResults = results.filter((r) => { const lastTurn = sessionHistory.get(r.entry.id) ?? -999; const diff = currentTurn - lastTurn; const isRedundant = diff < minRepeated; @@ -2323,10 +2282,12 @@ const memoryLanceDBProPlugin = { const meta = parseSmartMetadata(r.entry.metadata, r.entry); if (meta.state !== "confirmed") { stateFilteredCount++; + api.logger.debug(`memory-lancedb-pro: governance: filtered id=${r.entry.id} reason=state(${meta.state}) score=${r.score?.toFixed(3)} text=${r.entry.text.slice(0, 50)}`); return false; } if (meta.memory_layer === "archive" || meta.memory_layer === "reflection") { stateFilteredCount++; + api.logger.debug(`memory-lancedb-pro: governance: filtered id=${r.entry.id} reason=layer(${meta.memory_layer}) score=${r.score?.toFixed(3)} text=${r.entry.text.slice(0, 50)}`); return false; } if (meta.suppressed_until_turn > 0 && currentTurn <= meta.suppressed_until_turn) { @@ -2343,30 +2304,13 @@ const memoryLanceDBProPlugin = { return; } - // Determine effective per-item char limit based on recall mode and intent depth - const effectivePerItemMaxChars = (() => { - if (recallMode === "summary") return Math.min(autoRecallPerItemMaxChars, 80); // L0 only - if (!intent) return autoRecallPerItemMaxChars; // "full" mode - // Adaptive mode: depth determines char budget - switch (intent.depth) { - case "l0": return Math.min(autoRecallPerItemMaxChars, 80); - case "l1": return autoRecallPerItemMaxChars; // default budget - case "full": return Math.min(autoRecallPerItemMaxChars * 3, 1000); - } - })(); - const preBudgetCandidates = governanceEligible.map((r) => { const metaObj = parseSmartMetadata(r.entry.metadata, r.entry); const displayCategory = metaObj.memory_category || r.entry.category; const displayTier = metaObj.tier || ""; const tierPrefix = displayTier ? `[${displayTier.charAt(0).toUpperCase()}]` : ""; - // Select content tier based on recallMode/intent depth - const contentText = recallMode === "summary" - ? (metaObj.l0_abstract || r.entry.text) - : intent?.depth === "full" - ? (r.entry.text) // full text for deep queries - : (metaObj.l0_abstract || r.entry.text); // L0/L1 default - const summary = sanitizeForContext(contentText).slice(0, effectivePerItemMaxChars); + const abstract = metaObj.l0_abstract || r.entry.text; + const summary = sanitizeForContext(abstract).slice(0, autoRecallPerItemMaxChars); return { id: r.entry.id, prefix: `${tierPrefix}[${displayCategory}:${r.entry.scope}]`, @@ -2516,14 +2460,6 @@ const memoryLanceDBProPlugin = { // See: https://github.com/CortexReach/memory-lancedb-pro/issues/260 const backgroundRun = (async () => { try { - // Feature 7: Check extraction rate limit before any work - if (extractionRateLimiter.isRateLimited()) { - api.logger.debug( - `memory-lancedb-pro: auto-capture skipped (rate limited: ${extractionRateLimiter.getRecentCount()} extractions in last hour)`, - ); - return; - } - // Determine agent ID and default scope const agentId = resolveHookAgentId(ctx?.agentId, (event as any).sessionKey); const accessibleScopes = resolveScopeFilter(scopeManager, agentId); @@ -2557,7 +2493,7 @@ const memoryLanceDBProPlugin = { const content = msgObj.content; if (typeof content === "string") { - const normalized = normalizeAutoCaptureText(role, content, shouldSkipReflectionMessage); + const normalized = normalizeAutoCaptureText(role, content); if (!normalized) { skippedAutoCaptureTexts++; } else { @@ -2577,7 +2513,7 @@ const memoryLanceDBProPlugin = { typeof (block as Record).text === "string" ) { const text = (block as Record).text as string; - const normalized = normalizeAutoCaptureText(role, text, shouldSkipReflectionMessage); + const normalized = normalizeAutoCaptureText(role, text); if (!normalized) { skippedAutoCaptureTexts++; } else { @@ -2652,39 +2588,8 @@ const memoryLanceDBProPlugin = { ); } - // ---------------------------------------------------------------- - // Feature 7: Skip low-value conversations - // ---------------------------------------------------------------- - if (config.extractionThrottle?.skipLowValue === true) { - const conversationValue = estimateConversationValue(texts); - if (conversationValue < 0.2) { - api.logger.debug( - `memory-lancedb-pro: auto-capture skipped for agent ${agentId} (low conversation value: ${conversationValue.toFixed(2)})`, - ); - return; - } - } - - // ---------------------------------------------------------------- - // Feature 1: Session compression — prioritize high-signal texts - // ---------------------------------------------------------------- - if (config.sessionCompression?.enabled === true && texts.length > 0) { - const maxChars = config.extractMaxChars ?? 8000; - const compressed = compressTexts(texts, maxChars, { - minScoreToKeep: config.sessionCompression?.minScoreToKeep, - }); - if (compressed.dropped > 0) { - api.logger.debug( - `memory-lancedb-pro: session compression for agent ${agentId}: dropped ${compressed.dropped}/${texts.length} texts (${compressed.totalChars} chars kept)`, - ); - texts = compressed.texts; - } - } - // ---------------------------------------------------------------- // Smart Extraction (Phase 1: LLM-powered 6-category extraction) - // Rate limiter charged AFTER successful extraction, not before, - // so no-op sessions don't consume the hourly quota. // ---------------------------------------------------------------- if (smartExtractor) { // Pre-filter: embedding-based noise detection (language-agnostic) @@ -2704,8 +2609,6 @@ const memoryLanceDBProPlugin = { conversationText, sessionKey, { scope: defaultScope, scopeFilter: accessibleScopes }, ); - // Charge rate limiter only after successful extraction - extractionRateLimiter.recordExtraction(); if (stats.created > 0 || stats.merged > 0) { api.logger.info( `memory-lancedb-pro: smart-extracted ${stats.created} created, ${stats.merged} merged, ${stats.skipped} skipped for agent ${agentId}` @@ -2802,7 +2705,7 @@ const memoryLanceDBProPlugin = { l2_content: text, source_session: (event as any).sessionKey || "unknown", source: "auto-capture", - state: "pending", + state: "confirmed", memory_layer: "working", injected_count: 0, bad_recall_count: 0, @@ -3736,10 +3639,6 @@ export function parsePluginConfig(value: unknown): PluginConfig { // Accept number, numeric string, or env-var string (e.g. "${EMBED_DIM}"). // Also accept legacy top-level `dimensions` for convenience. dimensions: parsePositiveInt(embedding.dimensions ?? cfg.dimensions), - omitDimensions: - typeof embedding.omitDimensions === "boolean" - ? embedding.omitDimensions - : undefined, taskQuery: typeof embedding.taskQuery === "string" ? embedding.taskQuery @@ -3766,6 +3665,7 @@ export function parsePluginConfig(value: unknown): PluginConfig { autoRecallMaxItems: parsePositiveInt(cfg.autoRecallMaxItems) ?? 3, autoRecallMaxChars: parsePositiveInt(cfg.autoRecallMaxChars) ?? 600, autoRecallPerItemMaxChars: parsePositiveInt(cfg.autoRecallPerItemMaxChars) ?? 180, + autoRecallTimeoutMs: parsePositiveInt(cfg.autoRecallTimeoutMs) ?? 3000, captureAssistant: cfg.captureAssistant === true, retrieval: typeof cfg.retrieval === "object" && cfg.retrieval !== null ? cfg.retrieval as any : undefined, decay: typeof cfg.decay === "object" && cfg.decay !== null ? cfg.decay as any : undefined, @@ -3861,47 +3761,9 @@ export function parsePluginConfig(value: unknown): PluginConfig { } : undefined, admissionControl: normalizeAdmissionControlConfig(cfg.admissionControl), - memoryCompaction: (() => { - const raw = - typeof cfg.memoryCompaction === "object" && cfg.memoryCompaction !== null - ? (cfg.memoryCompaction as Record) - : null; - if (!raw) return undefined; - return { - enabled: raw.enabled === true, - minAgeDays: parsePositiveInt(raw.minAgeDays) ?? 7, - similarityThreshold: - typeof raw.similarityThreshold === "number" - ? Math.max(0, Math.min(1, raw.similarityThreshold)) - : 0.88, - minClusterSize: parsePositiveInt(raw.minClusterSize) ?? 2, - maxMemoriesToScan: parsePositiveInt(raw.maxMemoriesToScan) ?? 200, - cooldownHours: parsePositiveInt(raw.cooldownHours) ?? 24, - }; - })(), - sessionCompression: - typeof cfg.sessionCompression === "object" && cfg.sessionCompression !== null - ? { - enabled: - (cfg.sessionCompression as Record).enabled === true, - minScoreToKeep: - typeof (cfg.sessionCompression as Record).minScoreToKeep === "number" - ? ((cfg.sessionCompression as Record).minScoreToKeep as number) - : 0.3, - } - : { enabled: false, minScoreToKeep: 0.3 }, - extractionThrottle: - typeof cfg.extractionThrottle === "object" && cfg.extractionThrottle !== null - ? { - skipLowValue: - (cfg.extractionThrottle as Record).skipLowValue === true, - maxExtractionsPerHour: - typeof (cfg.extractionThrottle as Record).maxExtractionsPerHour === "number" - ? ((cfg.extractionThrottle as Record).maxExtractionsPerHour as number) - : 30, - } - : { skipLowValue: false, maxExtractionsPerHour: 30 }, }; } +export function _resetInitialized() { _initialized = false; } + export default memoryLanceDBProPlugin; diff --git a/openclaw.plugin.json b/openclaw.plugin.json index c05bf0f..a04bfce 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -46,10 +46,6 @@ "type": "integer", "minimum": 1 }, - "omitDimensions": { - "type": "boolean", - "description": "When true, omit the dimensions parameter from embedding requests even if dimensions is configured" - }, "taskQuery": { "type": "string", "description": "Embedding task for queries (provider-specific, e.g. Jina: retrieval.query)" @@ -137,12 +133,6 @@ "default": 180, "description": "Maximum character budget per auto-injected memory summary." }, - "recallMode": { - "type": "string", - "enum": ["full", "summary", "adaptive", "off"], - "default": "full", - "description": "Auto-recall depth mode. 'full': inject with configured per-item budget. 'summary': L0 abstracts only (compact). 'adaptive': analyze query intent to auto-select category and depth. 'off': disable auto-recall injection." - }, "captureAssistant": { "type": "boolean" }, @@ -238,23 +228,78 @@ "type": "object", "additionalProperties": false, "properties": { - "utility": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 }, - "confidence": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 }, - "novelty": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 }, - "recency": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.1 }, - "typePrior": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.6 } + "utility": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.1 + }, + "confidence": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.1 + }, + "novelty": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.1 + }, + "recency": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.1 + }, + "typePrior": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.6 + } } }, "typePriors": { "type": "object", "additionalProperties": false, "properties": { - "profile": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.95 }, - "preferences": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.9 }, - "entities": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.75 }, - "events": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.45 }, - "cases": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.8 }, - "patterns": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.85 } + "profile": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.95 + }, + "preferences": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.9 + }, + "entities": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.75 + }, + "events": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.45 + }, + "cases": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.8 + }, + "patterns": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0.85 + } } } } @@ -325,6 +370,11 @@ "default": "jina", "description": "Reranker provider format. Determines request/response shape and auth header. Use tei for Hugging Face Text Embeddings Inference /rerank endpoints. DashScope uses gte-rerank-v2 with endpoint https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank." }, + "rerankTimeoutMs": { + "type": "number", + "default": 5000, + "description": "Rerank API timeout in milliseconds. Increase for local/CPU-based rerank servers (e.g., 120000 for 2 minutes)." + }, "candidatePoolSize": { "type": "integer", "minimum": 10, @@ -745,86 +795,12 @@ } } }, - "memoryCompaction": { - "type": "object", - "additionalProperties": false, - "description": "Progressive summarization: periodically consolidate semantically similar old memories into refined single entries, reducing noise and improving retrieval quality over time.", - "properties": { - "enabled": { - "type": "boolean", - "default": false, - "description": "Enable automatic compaction at gateway startup (respects cooldownHours)" - }, - "minAgeDays": { - "type": "integer", - "default": 7, - "minimum": 1, - "description": "Only compact memories at least this many days old" - }, - "similarityThreshold": { - "type": "number", - "default": 0.88, - "minimum": 0, - "maximum": 1, - "description": "Cosine similarity threshold for clustering. Higher = more conservative merges." - }, - "minClusterSize": { - "type": "integer", - "default": 2, - "minimum": 2, - "description": "Minimum cluster size required to trigger a merge" - }, - "maxMemoriesToScan": { - "type": "integer", - "default": 200, - "minimum": 1, - "description": "Maximum number of memories to scan per compaction run" - }, - "cooldownHours": { - "type": "integer", - "default": 24, - "minimum": 1, - "description": "Minimum hours between automatic compaction runs" - } - } - }, - "sessionCompression": { - "type": "object", - "additionalProperties": false, - "description": "Session compression settings for auto-capture. Scores and compresses conversation texts to prioritize high-signal content.", - "properties": { - "enabled": { - "type": "boolean", - "default": false, - "description": "Enable session compression before auto-capture extraction" - }, - "minScoreToKeep": { - "type": "number", - "minimum": 0, - "maximum": 1, - "default": 0.3, - "description": "Minimum score threshold. If all texts score below this, fallback to keeping at least the last few texts." - } - } - }, - "extractionThrottle": { - "type": "object", - "additionalProperties": false, - "description": "Adaptive extraction throttling to reduce LLM cost on low-value or rapid-fire sessions.", - "properties": { - "skipLowValue": { - "type": "boolean", - "default": false, - "description": "Skip extraction for conversations with estimated value < 0.2" - }, - "maxExtractionsPerHour": { - "type": "integer", - "minimum": 1, - "maximum": 200, - "default": 30, - "description": "Maximum number of auto-capture extractions allowed per hour" - } - } + "autoRecallTimeoutMs": { + "type": "integer", + "minimum": 1000, + "maximum": 120000, + "default": 3000, + "description": "Timeout in ms for auto-recall search before skipping injection. Prevents agent startup stall when embedding/rerank is slow." } }, "required": [ @@ -855,11 +831,6 @@ "help": "Override vector dimensions for custom models not in the built-in lookup table", "advanced": true }, - "embedding.omitDimensions": { - "label": "Omit Request Dimensions", - "help": "Do not send the dimensions parameter to the embedding API even if embedding.dimensions is configured. Useful for local models like Qwen3-Embedding that reject the field.", - "advanced": true - }, "embedding.taskQuery": { "label": "Query Task", "placeholder": "retrieval.query", @@ -1012,11 +983,6 @@ "help": "Maximum characters per injected memory summary.", "advanced": true }, - "recallMode": { - "label": "Recall Mode", - "help": "Auto-recall depth: full (default), summary (L0 only), adaptive (intent-based category routing), off.", - "advanced": false - }, "captureAssistant": { "label": "Capture Assistant Messages", "help": "Also auto-capture assistant messages (default false to reduce memory pollution)", @@ -1298,43 +1264,6 @@ "placeholder": "30000", "help": "Request timeout for the smart-extraction / upgrade LLM in milliseconds", "advanced": true - }, - "memoryCompaction.enabled": { - "label": "Auto Compaction", - "help": "Automatically consolidate similar old memories at gateway startup. Also available on-demand via the memory_compact tool (requires enableManagementTools)." - }, - "memoryCompaction.minAgeDays": { - "label": "Min Age (days)", - "help": "Memories younger than this are never touched by compaction", - "advanced": true - }, - "memoryCompaction.similarityThreshold": { - "label": "Similarity Threshold", - "help": "How similar two memories must be to merge (0–1). 0.88 is a good starting point; raise to 0.92+ for conservative merges.", - "advanced": true - }, - "memoryCompaction.cooldownHours": { - "label": "Cooldown (hours)", - "help": "Minimum gap between automatic compaction runs", - "advanced": true - }, - "sessionCompression.enabled": { - "label": "Session Compression", - "help": "Score and compress conversation texts before auto-capture to prioritize high-signal content (corrections, decisions, tool calls)" - }, - "sessionCompression.minScoreToKeep": { - "label": "Compression Min Score", - "help": "Minimum text score threshold. If all texts score below this, keep at least the last few texts as fallback.", - "advanced": true - }, - "extractionThrottle.skipLowValue": { - "label": "Skip Low-Value Conversations", - "help": "Skip auto-capture for conversations estimated to have low memory value (< 0.2)" - }, - "extractionThrottle.maxExtractionsPerHour": { - "label": "Max Extractions Per Hour", - "help": "Rate limit for auto-capture extractions. Prevents excessive LLM calls during rapid-fire sessions.", - "advanced": true } } -} +} \ No newline at end of file diff --git a/src/retriever.ts b/src/retriever.ts index 900db75..dee206e 100644 --- a/src/retriever.ts +++ b/src/retriever.ts @@ -19,8 +19,6 @@ import { parseSmartMetadata, toLifecycleMemory, } from "./smart-metadata.js"; -import { TraceCollector, type RetrievalTrace } from "./retrieval-trace.js"; -import { RetrievalStatsCollector } from "./retrieval-stats.js"; // ============================================================================ // Types & Configuration @@ -45,6 +43,8 @@ export interface RetrievalConfig { rerankModel?: string; /** Reranker API endpoint (default: https://api.jina.ai/v1/rerank). */ rerankEndpoint?: string; + /** Rerank API timeout in milliseconds (default: 5000). Increase for local/CPU-based rerank servers. */ + rerankTimeoutMs?: number; /** Reranker provider format. Determines request/response shape and auth header. * - "jina" (default): Authorization: Bearer, string[] documents, results[].relevance_score * - "siliconflow": same format as jina (alias, for clarity) @@ -127,6 +127,7 @@ export const DEFAULT_RETRIEVAL_CONFIG: RetrievalConfig = { filterNoise: true, rerankModel: "jina-reranker-v3", rerankEndpoint: "https://api.jina.ai/v1/rerank", + rerankTimeoutMs: 5000, lengthNormAnchor: 500, hardMinScore: 0.35, timeDecayHalfLifeDays: 60, @@ -361,7 +362,6 @@ function cosineSimilarity(a: number[], b: number[]): number { export class MemoryRetriever { private accessTracker: AccessTracker | null = null; private tierManager: TierManager | null = null; - private _statsCollector: RetrievalStatsCollector | null = null; constructor( private store: MemoryStore, @@ -374,16 +374,6 @@ export class MemoryRetriever { this.accessTracker = tracker; } - /** Enable aggregate retrieval statistics collection. */ - setStatsCollector(collector: RetrievalStatsCollector): void { - this._statsCollector = collector; - } - - /** Get the stats collector (if set). */ - getStatsCollector(): RetrievalStatsCollector | null { - return this._statsCollector; - } - private filterActiveResults(results: T[]): T[] { return results.filter((result) => isMemoryActiveAt(parseSmartMetadata(result.entry.metadata, result.entry)), @@ -394,35 +384,34 @@ export class MemoryRetriever { const { query, limit, scopeFilter, category, source } = context; const safeLimit = clampInt(limit, 1, 20); - // Create trace only when stats collector is active (zero overhead otherwise) - const trace = this._statsCollector ? new TraceCollector() : undefined; - // Check if query contains tag prefixes -> use BM25-only + mustContain const tagTokens = this.extractTagTokens(query); let results: RetrievalResult[]; - + if (tagTokens.length > 0) { results = await this.bm25OnlyRetrieval( - query, tagTokens, safeLimit, scopeFilter, category, trace, + query, + tagTokens, + safeLimit, + scopeFilter, + category, ); } else if (this.config.mode === "vector" || !this.store.hasFtsSupport) { results = await this.vectorOnlyRetrieval( - query, safeLimit, scopeFilter, category, trace, + query, + safeLimit, + scopeFilter, + category, ); } else { results = await this.hybridRetrieval( - query, safeLimit, scopeFilter, category, trace, + query, + safeLimit, + scopeFilter, + category, ); } - // Feed completed trace to stats collector - if (trace && this._statsCollector) { - const mode = tagTokens.length > 0 ? "bm25" - : (this.config.mode === "vector" || !this.store.hasFtsSupport) ? "vector" : "hybrid"; - const finalTrace = trace.finalize(query, mode); - this._statsCollector.recordQuery(finalTrace, source || "unknown"); - } - // Record access for reinforcement (manual recall only) if (this.accessTracker && source === "manual" && results.length > 0) { this.accessTracker.recordAccess(results.map((r) => r.entry.id)); @@ -431,49 +420,6 @@ export class MemoryRetriever { return results; } - /** - * Retrieve with full trace, used by the memory_debug tool. - * Always collects a trace regardless of stats collector state. - */ - async retrieveWithTrace( - context: RetrievalContext, - ): Promise<{ results: RetrievalResult[]; trace: RetrievalTrace }> { - const { query, limit, scopeFilter, category, source } = context; - const safeLimit = clampInt(limit, 1, 20); - const trace = new TraceCollector(); - - const tagTokens = this.extractTagTokens(query); - let results: RetrievalResult[]; - - if (tagTokens.length > 0) { - results = await this.bm25OnlyRetrieval( - query, tagTokens, safeLimit, scopeFilter, category, trace, - ); - } else if (this.config.mode === "vector" || !this.store.hasFtsSupport) { - results = await this.vectorOnlyRetrieval( - query, safeLimit, scopeFilter, category, trace, - ); - } else { - results = await this.hybridRetrieval( - query, safeLimit, scopeFilter, category, trace, - ); - } - - const mode = tagTokens.length > 0 ? "bm25" - : (this.config.mode === "vector" || !this.store.hasFtsSupport) ? "vector" : "hybrid"; - const finalTrace = trace.finalize(query, mode); - - if (this._statsCollector) { - this._statsCollector.recordQuery(finalTrace, source || "debug"); - } - - if (this.accessTracker && source === "manual" && results.length > 0) { - this.accessTracker.recordAccess(results.map((r) => r.entry.id)); - } - - return { results, trace: finalTrace }; - } - private extractTagTokens(query: string): string[] { if (!this.config.tagPrefixes?.length) return []; @@ -488,64 +434,45 @@ export class MemoryRetriever { limit: number, scopeFilter?: string[], category?: string, - trace?: TraceCollector, ): Promise { const queryVector = await this.embedder.embedQuery(query); - - trace?.startStage("vector_search", []); const results = await this.store.vectorSearch( - queryVector, limit, this.config.minScore, scopeFilter, { excludeInactive: true }, + queryVector, + limit, + this.config.minScore, + scopeFilter, + { excludeInactive: true }, ); + + // Filter by category if specified const filtered = category - ? results.filter((r) => r.entry.category === category) : results; + ? results.filter((r) => r.entry.category === category) + : results; + const mapped = filtered.map( (result, index) => - ({ ...result, sources: { vector: { score: result.score, rank: index + 1 } } }) as RetrievalResult, + ({ + ...result, + sources: { + vector: { score: result.score, rank: index + 1 }, + }, + }) as RetrievalResult, ); - if (trace) { - trace.endStage(mapped.map((r) => r.entry.id), mapped.map((r) => r.score)); - } - let weighted: RetrievalResult[]; - if (this.decayEngine) { - weighted = mapped; - } else { - trace?.startStage("recency_boost", mapped.map((r) => r.entry.id)); - const boosted = this.applyRecencyBoost(mapped); - trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score)); - - trace?.startStage("importance_weight", boosted.map((r) => r.entry.id)); - weighted = this.applyImportanceWeight(boosted); - trace?.endStage(weighted.map((r) => r.entry.id), weighted.map((r) => r.score)); - } - - trace?.startStage("length_normalization", weighted.map((r) => r.entry.id)); + const weighted = this.decayEngine ? mapped : this.applyImportanceWeight(this.applyRecencyBoost(mapped)); const lengthNormalized = this.applyLengthNormalization(weighted); - trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score)); - - trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id)); const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore); - trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score)); - - const decayStageName = this.decayEngine ? "decay_boost" : "time_decay"; - trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id)); const lifecycleRanked = this.decayEngine ? this.applyDecayBoost(hardFiltered) : this.applyTimeDecay(hardFiltered); - trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score)); - - trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id)); const denoised = this.config.filterNoise ? filterNoise(lifecycleRanked, r => r.entry.text) : lifecycleRanked; - trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score)); - trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id)); + // MMR deduplication: avoid top-k filled with near-identical memories const deduplicated = this.applyMMRDiversity(denoised); - const finalResults = deduplicated.slice(0, limit); - trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score)); - return finalResults; + return deduplicated.slice(0, limit); } private async bm25OnlyRetrieval( @@ -554,64 +481,56 @@ export class MemoryRetriever { limit: number, scopeFilter?: string[], category?: string, - trace?: TraceCollector, ): Promise { const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2); - - trace?.startStage("bm25_search", []); + + // Run BM25 search const bm25Results = await this.store.bm25Search( - query, candidatePoolSize, scopeFilter, { excludeInactive: true }, + query, + candidatePoolSize, + scopeFilter, + { excludeInactive: true }, ); + + // Filter by category if specified const categoryFiltered = category - ? bm25Results.filter((r) => r.entry.category === category) : bm25Results; + ? bm25Results.filter((r) => r.entry.category === category) + : bm25Results; + + // mustContain: only keep entries that literally contain all tag tokens (case-insensitive) const mustContainFiltered = categoryFiltered.filter((r) => { const textLower = r.entry.text.toLowerCase(); return tagTokens.every((t) => textLower.includes(t.toLowerCase())); }); + const mapped = mustContainFiltered.map( (result, index) => - ({ ...result, sources: { bm25: { score: result.score, rank: index + 1 } } }) as RetrievalResult, + ({ + ...result, + sources: { + bm25: { score: result.score, rank: index + 1 }, + }, + }) as RetrievalResult, ); - trace?.endStage(mapped.map((r) => r.entry.id), mapped.map((r) => r.score)); - let temporallyRanked: RetrievalResult[]; - if (this.decayEngine) { - temporallyRanked = mapped; - } else { - trace?.startStage("recency_boost", mapped.map((r) => r.entry.id)); - const boosted = this.applyRecencyBoost(mapped); - trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score)); - - trace?.startStage("importance_weight", boosted.map((r) => r.entry.id)); - temporallyRanked = this.applyImportanceWeight(boosted); - trace?.endStage(temporallyRanked.map((r) => r.entry.id), temporallyRanked.map((r) => r.score)); - } + // Apply same post-processing as hybrid retrieval to avoid behavior regression + const temporallyRanked = this.decayEngine + ? mapped + : this.applyImportanceWeight(this.applyRecencyBoost(mapped)); - trace?.startStage("length_normalization", temporallyRanked.map((r) => r.entry.id)); const lengthNormalized = this.applyLengthNormalization(temporallyRanked); - trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score)); - - trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id)); const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore); - trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score)); - const decayStageName = this.decayEngine ? "decay_boost" : "time_decay"; - trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id)); const lifecycleRanked = this.decayEngine - ? this.applyDecayBoost(hardFiltered) : this.applyTimeDecay(hardFiltered); - trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score)); + ? this.applyDecayBoost(hardFiltered) + : this.applyTimeDecay(hardFiltered); - trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id)); const denoised = this.config.filterNoise - ? filterNoise(lifecycleRanked, r => r.entry.text) : lifecycleRanked; - trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score)); + ? filterNoise(lifecycleRanked, r => r.entry.text) + : lifecycleRanked; - trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id)); const deduplicated = this.applyMMRDiversity(denoised); - const finalResults = deduplicated.slice(0, limit); - trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score)); - - return finalResults; + return deduplicated.slice(0, limit); } private async hybridRetrieval( @@ -619,88 +538,70 @@ export class MemoryRetriever { limit: number, scopeFilter?: string[], category?: string, - trace?: TraceCollector, ): Promise { - const candidatePoolSize = Math.max(this.config.candidatePoolSize, limit * 2); + const candidatePoolSize = Math.max( + this.config.candidatePoolSize, + limit * 2, + ); + + // Compute query embedding once, reuse for vector search + reranking const queryVector = await this.embedder.embedQuery(query); - // Run vector and BM25 searches in parallel. - // Trace as a single "parallel_search" stage since both run concurrently — - // splitting into separate sequential stages would misrepresent timing. - trace?.startStage("parallel_search", []); + // Run vector and BM25 searches in parallel const [vectorResults, bm25Results] = await Promise.all([ - this.runVectorSearch(queryVector, candidatePoolSize, scopeFilter, category), + this.runVectorSearch( + queryVector, + candidatePoolSize, + scopeFilter, + category, + ), this.runBM25Search(query, candidatePoolSize, scopeFilter, category), ]); - if (trace) { - const allSearchIds = [ - ...new Set([...vectorResults.map((r) => r.entry.id), ...bm25Results.map((r) => r.entry.id)]), - ]; - const allScores = [...vectorResults.map((r) => r.score), ...bm25Results.map((r) => r.score)]; - trace.endStage(allSearchIds, allScores); - } - // Fuse results using RRF - const allInputIds = [ - ...new Set([...vectorResults.map((r) => r.entry.id), ...bm25Results.map((r) => r.entry.id)]), - ]; - trace?.startStage("rrf_fusion", allInputIds); + // Fuse results using RRF (async: validates BM25-only entries exist in store) const fusedResults = await this.fuseResults(vectorResults, bm25Results); - trace?.endStage(fusedResults.map((r) => r.entry.id), fusedResults.map((r) => r.score)); // Apply minimum score threshold - trace?.startStage("min_score_filter", fusedResults.map((r) => r.entry.id)); - const filtered = fusedResults.filter((r) => r.score >= this.config.minScore); - trace?.endStage(filtered.map((r) => r.entry.id), filtered.map((r) => r.score)); - - // Rerank if enabled — only emit trace stage when rerank actually runs - let reranked: RetrievalResult[]; - if (this.config.rerank !== "none") { - trace?.startStage("rerank", filtered.map((r) => r.entry.id)); - reranked = await this.rerankResults(query, queryVector, filtered.slice(0, limit * 2)); - trace?.endStage(reranked.map((r) => r.entry.id), reranked.map((r) => r.score)); - } else { - reranked = filtered; - } + const filtered = fusedResults.filter( + (r) => r.score >= this.config.minScore, + ); - let temporallyRanked: RetrievalResult[]; - if (this.decayEngine) { - temporallyRanked = reranked; - } else { - trace?.startStage("recency_boost", reranked.map((r) => r.entry.id)); - const boosted = this.applyRecencyBoost(reranked); - trace?.endStage(boosted.map((r) => r.entry.id), boosted.map((r) => r.score)); + // Rerank if enabled + const reranked = + this.config.rerank !== "none" + ? await this.rerankResults( + query, + queryVector, + filtered.slice(0, limit * 2), + ) + : filtered; - trace?.startStage("importance_weight", boosted.map((r) => r.entry.id)); - temporallyRanked = this.applyImportanceWeight(boosted); - trace?.endStage(temporallyRanked.map((r) => r.entry.id), temporallyRanked.map((r) => r.score)); - } + const temporallyRanked = this.decayEngine + ? reranked + : this.applyImportanceWeight(this.applyRecencyBoost(reranked)); - trace?.startStage("length_normalization", temporallyRanked.map((r) => r.entry.id)); + // Apply length normalization (penalize long entries dominating via keyword density) const lengthNormalized = this.applyLengthNormalization(temporallyRanked); - trace?.endStage(lengthNormalized.map((r) => r.entry.id), lengthNormalized.map((r) => r.score)); - trace?.startStage("hard_cutoff", lengthNormalized.map((r) => r.entry.id)); + // Hard minimum score cutoff should be based on semantic / lexical relevance. + // Lifecycle decay and time-decay are used for re-ranking, not for dropping + // otherwise relevant fresh memories. const hardFiltered = lengthNormalized.filter(r => r.score >= this.config.hardMinScore); - trace?.endStage(hardFiltered.map((r) => r.entry.id), hardFiltered.map((r) => r.score)); - const decayStageName = this.decayEngine ? "decay_boost" : "time_decay"; - trace?.startStage(decayStageName, hardFiltered.map((r) => r.entry.id)); + // Apply lifecycle-aware decay or legacy time decay after thresholding const lifecycleRanked = this.decayEngine - ? this.applyDecayBoost(hardFiltered) : this.applyTimeDecay(hardFiltered); - trace?.endStage(lifecycleRanked.map((r) => r.entry.id), lifecycleRanked.map((r) => r.score)); + ? this.applyDecayBoost(hardFiltered) + : this.applyTimeDecay(hardFiltered); - trace?.startStage("noise_filter", lifecycleRanked.map((r) => r.entry.id)); + // Filter noise const denoised = this.config.filterNoise - ? filterNoise(lifecycleRanked, r => r.entry.text) : lifecycleRanked; - trace?.endStage(denoised.map((r) => r.entry.id), denoised.map((r) => r.score)); + ? filterNoise(lifecycleRanked, r => r.entry.text) + : lifecycleRanked; - trace?.startStage("mmr_diversity", denoised.map((r) => r.entry.id)); + // MMR deduplication: avoid top-k filled with near-identical memories const deduplicated = this.applyMMRDiversity(denoised); - const finalResults = deduplicated.slice(0, limit); - trace?.endStage(finalResults.map((r) => r.entry.id), finalResults.map((r) => r.score)); - return finalResults; + return deduplicated.slice(0, limit); } private async runVectorSearch( @@ -858,18 +759,22 @@ export class MemoryRetriever { results.length, ); - // Timeout: 5 seconds to prevent stalling retrieval pipeline + // Timeout: prevent stalling retrieval pipeline (configurable via rerankTimeoutMs) const controller = new AbortController(); - const timeout = setTimeout(() => controller.abort(), 5000); - - const response = await fetch(endpoint, { - method: "POST", - headers, - body: JSON.stringify(body), - signal: controller.signal, - }); + const timeoutMs = this.config.rerankTimeoutMs ?? 5000; + const timeout = setTimeout(() => controller.abort(), timeoutMs); - clearTimeout(timeout); + let response: Response; + try { + response = await fetch(endpoint, { + method: "POST", + headers, + body: JSON.stringify(body), + signal: controller.signal, + }); + } finally { + clearTimeout(timeout); + } if (response.ok) { const data: unknown = await response.json(); @@ -928,7 +833,7 @@ export class MemoryRetriever { } } catch (error) { if (error instanceof Error && error.name === "AbortError") { - console.warn("Rerank API timed out (5s), falling back to cosine"); + console.warn(`Rerank API timed out (${this.config.rerankTimeoutMs ?? 5000}ms), falling back to cosine`); } else { console.warn("Rerank API failed, falling back to cosine:", error); }