From 02cffbe1b466a2a943e9add837fca6532c353752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=89=E9=97=B2=E9=9D=99=E9=9B=85?= Date: Wed, 25 Mar 2026 01:28:36 +0800 Subject: [PATCH] feat: add recallMode with adaptive intent routing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add recallMode config (full/summary/adaptive/off) for granular control over auto-recall injection depth. Adaptive mode uses zero-LLM-cost pattern matching to analyze query intent and route to appropriate memory categories with depth-aware formatting. - New: src/intent-analyzer.ts — analyzeIntent(), applyCategoryBoost() - New: test/intent-analyzer.test.mjs — 23 tests (all passing) - Modified: index.ts — integrate intent analysis into before_prompt_build - Modified: openclaw.plugin.json — add recallMode schema + UI field Rewritten for v1.1.0-beta.10 (replaces PR #313 which was based on beta.9). Co-Authored-By: Claude Opus 4.6 (1M context) --- index.ts | 42 +++++- openclaw.plugin.json | 11 ++ src/intent-analyzer.ts | 259 ++++++++++++++++++++++++++++++++++ test/intent-analyzer.test.mjs | 209 +++++++++++++++++++++++++++ 4 files changed, 516 insertions(+), 5 deletions(-) create mode 100644 src/intent-analyzer.ts create mode 100644 test/intent-analyzer.test.mjs diff --git a/index.ts b/index.ts index 212dede..4cb2945 100644 --- a/index.ts +++ b/index.ts @@ -64,6 +64,7 @@ import { type AdmissionControlConfig, type AdmissionRejectionAuditEntry, } from "./src/admission-control.js"; +import { analyzeIntent, applyCategoryBoost } from "./src/intent-analyzer.js"; // ============================================================================ // Configuration & Types @@ -90,6 +91,7 @@ interface PluginConfig { autoRecallMaxItems?: number; autoRecallMaxChars?: number; autoRecallPerItemMaxChars?: number; + recallMode?: "full" | "summary" | "adaptive" | "off"; captureAssistant?: boolean; retrieval?: { mode?: "hybrid" | "vector"; @@ -2146,7 +2148,9 @@ const memoryLanceDBProPlugin = { // Auto-recall: inject relevant memories before agent starts // Default is OFF to prevent the model from accidentally echoing injected context. - if (config.autoRecall === true) { + // recallMode: "full" (default when autoRecall=true) | "summary" (L0 only) | "adaptive" (intent-based) | "off" + const recallMode = config.recallMode || "full"; + if (config.autoRecall === true && recallMode !== "off") { // Cache the most recent raw user message per session so the // before_prompt_build gating can check the *user* text, not the full // assembled prompt (which includes system instructions and is too long @@ -2210,6 +2214,14 @@ const memoryLanceDBProPlugin = { const autoRecallPerItemMaxChars = clampInt(config.autoRecallPerItemMaxChars ?? 180, 32, 1000); const retrieveLimit = clampInt(Math.max(autoRecallMaxItems * 2, autoRecallMaxItems), 1, 20); + // Adaptive intent analysis (zero-LLM-cost pattern matching) + const intent = recallMode === "adaptive" ? analyzeIntent(recallQuery) : undefined; + if (intent) { + api.logger.debug?.( + `memory-lancedb-pro: adaptive recall intent=${intent.label} depth=${intent.depth} confidence=${intent.confidence} categories=[${intent.categories.join(",")}]`, + ); + } + const results = filterUserMdExclusiveRecallResults(await retrieveWithRetry({ query: recallQuery, limit: retrieveLimit, @@ -2221,16 +2233,19 @@ const memoryLanceDBProPlugin = { return; } + // Apply intent-based category boost for adaptive mode + const rankedResults = intent ? applyCategoryBoost(results, intent) : results; + // Filter out redundant memories based on session history const minRepeated = config.autoRecallMinRepeated ?? 8; let dedupFilteredCount = 0; // Only enable dedup logic when minRepeated > 0 - let finalResults = results; + let finalResults = rankedResults; if (minRepeated > 0) { const sessionHistory = recallHistory.get(sessionId) || new Map(); - const filteredResults = results.filter((r) => { + const filteredResults = rankedResults.filter((r) => { const lastTurn = sessionHistory.get(r.entry.id) ?? -999; const diff = currentTurn - lastTurn; const isRedundant = diff < minRepeated; @@ -2282,13 +2297,30 @@ const memoryLanceDBProPlugin = { return; } + // Determine effective per-item char limit based on recall mode and intent depth + const effectivePerItemMaxChars = (() => { + if (recallMode === "summary") return Math.min(autoRecallPerItemMaxChars, 80); // L0 only + if (!intent) return autoRecallPerItemMaxChars; // "full" mode + // Adaptive mode: depth determines char budget + switch (intent.depth) { + case "l0": return Math.min(autoRecallPerItemMaxChars, 80); + case "l1": return autoRecallPerItemMaxChars; // default budget + case "full": return Math.min(autoRecallPerItemMaxChars * 3, 1000); + } + })(); + const preBudgetCandidates = governanceEligible.map((r) => { const metaObj = parseSmartMetadata(r.entry.metadata, r.entry); const displayCategory = metaObj.memory_category || r.entry.category; const displayTier = metaObj.tier || ""; const tierPrefix = displayTier ? `[${displayTier.charAt(0).toUpperCase()}]` : ""; - const abstract = metaObj.l0_abstract || r.entry.text; - const summary = sanitizeForContext(abstract).slice(0, autoRecallPerItemMaxChars); + // Select content tier based on recallMode/intent depth + const contentText = recallMode === "summary" + ? (metaObj.l0_abstract || r.entry.text) + : intent?.depth === "full" + ? (r.entry.text) // full text for deep queries + : (metaObj.l0_abstract || r.entry.text); // L0/L1 default + const summary = sanitizeForContext(contentText).slice(0, effectivePerItemMaxChars); return { id: r.entry.id, prefix: `${tierPrefix}[${displayCategory}:${r.entry.scope}]`, diff --git a/openclaw.plugin.json b/openclaw.plugin.json index a6b1db1..dff2538 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -133,6 +133,12 @@ "default": 180, "description": "Maximum character budget per auto-injected memory summary." }, + "recallMode": { + "type": "string", + "enum": ["full", "summary", "adaptive", "off"], + "default": "full", + "description": "Auto-recall depth mode. 'full': inject with configured per-item budget. 'summary': L0 abstracts only (compact). 'adaptive': analyze query intent to auto-select category and depth. 'off': disable auto-recall injection." + }, "captureAssistant": { "type": "boolean" }, @@ -916,6 +922,11 @@ "help": "Maximum characters per injected memory summary.", "advanced": true }, + "recallMode": { + "label": "Recall Mode", + "help": "Auto-recall depth: full (default), summary (L0 only), adaptive (intent-based category routing), off.", + "advanced": false + }, "captureAssistant": { "label": "Capture Assistant Messages", "help": "Also auto-capture assistant messages (default false to reduce memory pollution)", diff --git a/src/intent-analyzer.ts b/src/intent-analyzer.ts new file mode 100644 index 0000000..58c3428 --- /dev/null +++ b/src/intent-analyzer.ts @@ -0,0 +1,259 @@ +/** + * Intent Analyzer for Adaptive Recall + * + * Lightweight, rule-based intent analysis that determines which memory categories + * are most relevant for a given query and what recall depth to use. + * + * Inspired by OpenViking's hierarchical retrieval intent routing, adapted for + * memory-lancedb-pro's flat category model. No LLM calls — pure pattern matching + * for minimal latency impact on auto-recall. + * + * @see https://github.com/volcengine/OpenViking — hierarchical_retriever.py intent analysis + */ + +// ============================================================================ +// Types +// ============================================================================ + +/** + * Intent categories map to actual stored MemoryEntry categories. + * Note: "event" is NOT a stored category — event queries route to + * entity + decision (the categories most likely to contain timeline data). + */ +export type MemoryCategoryIntent = + | "preference" + | "fact" + | "decision" + | "entity" + | "other"; + +export type RecallDepth = "l0" | "l1" | "full"; + +export interface IntentSignal { + /** Categories to prioritize (ordered by relevance). */ + categories: MemoryCategoryIntent[]; + /** Recommended recall depth for this intent. */ + depth: RecallDepth; + /** Confidence level of the intent classification. */ + confidence: "high" | "medium" | "low"; + /** Short label for logging. */ + label: string; +} + +// ============================================================================ +// Intent Patterns +// ============================================================================ + +interface IntentRule { + label: string; + patterns: RegExp[]; + categories: MemoryCategoryIntent[]; + depth: RecallDepth; +} + +/** + * Intent rules ordered by specificity (most specific first). + * First match wins — keep high-confidence patterns at the top. + */ +const INTENT_RULES: IntentRule[] = [ + // --- Preference / Style queries --- + { + label: "preference", + patterns: [ + /\b(prefer|preference|style|convention|like|dislike|favorite|habit)\b/i, + /\b(how do (i|we) usually|what('s| is) (my|our) (style|convention|approach))\b/i, + /(偏好|喜欢|习惯|风格|惯例|常用|不喜欢|不要用|别用)/, + ], + categories: ["preference", "decision"], + depth: "l0", + }, + + // --- Decision / Rationale queries --- + { + label: "decision", + patterns: [ + /\b(why did (we|i)|decision|decided|chose|rationale|trade-?off|reason for)\b/i, + /\b(what was the (reason|rationale|decision))\b/i, + /(为什么选|决定|选择了|取舍|权衡|原因是|当时决定)/, + ], + categories: ["decision", "fact"], + depth: "l1", + }, + + // --- Entity / People / Project queries --- + // Narrowed patterns to avoid over-matching: require "who is" / "tell me about" + // style phrasing, not bare nouns like "tool" or "component". + { + label: "entity", + patterns: [ + /\b(who is|who are|tell me about|info on|details about|contact info)\b/i, + /\b(who('s| is) (the|our|my)|what team|which (person|team))\b/i, + /(谁是|告诉我关于|详情|联系方式|哪个团队)/, + ], + categories: ["entity", "fact"], + depth: "l1", + }, + + // --- Event / Timeline queries --- + // Note: "event" is not a stored category. Route to entity + decision + // (the categories most likely to contain timeline/incident data). + { + label: "event", + patterns: [ + /\b(when did|what happened|timeline|incident|outage|deploy|release|shipped)\b/i, + /\b(last (week|month|time|sprint)|recently|yesterday|today)\b/i, + /(什么时候|发生了什么|时间线|事件|上线|部署|发布|上次|最近)/, + ], + categories: ["entity", "decision"], + depth: "full", + }, + + // --- Fact / Knowledge queries --- + { + label: "fact", + patterns: [ + /\b(how (does|do|to)|what (does|do|is)|explain|documentation|spec)\b/i, + /\b(config|configuration|setup|install|architecture|api|endpoint)\b/i, + /(怎么|如何|是什么|解释|文档|规范|配置|安装|架构|接口)/, + ], + categories: ["fact", "entity"], + depth: "l1", + }, +]; + +// ============================================================================ +// Analyzer +// ============================================================================ + +/** + * Analyze a query to determine which memory categories and recall depth + * are most appropriate. + * + * Returns a default "broad" signal if no specific intent is detected, + * so callers can always use the result without null checks. + */ +export function analyzeIntent(query: string): IntentSignal { + const trimmed = query.trim(); + if (!trimmed) { + return { + categories: [], + depth: "l0", + confidence: "low", + label: "empty", + }; + } + + for (const rule of INTENT_RULES) { + if (rule.patterns.some((p) => p.test(trimmed))) { + return { + categories: rule.categories, + depth: rule.depth, + confidence: "high", + label: rule.label, + }; + } + } + + // No specific intent detected — return broad signal. + // All categories are eligible; use L0 to minimize token cost. + return { + categories: [], + depth: "l0", + confidence: "low", + label: "broad", + }; +} + +/** + * Apply intent-based category boost to retrieval results. + * + * Instead of filtering (which would lose potentially relevant results), + * this boosts scores of results matching the detected intent categories. + * Non-matching results are kept but ranked lower. + * + * @param results - Retrieval results with scores + * @param intent - Detected intent signal + * @param boostFactor - Score multiplier for matching categories (default: 1.15) + * @returns Results with adjusted scores, re-sorted + */ +export function applyCategoryBoost< + T extends { entry: { category: string }; score: number }, +>(results: T[], intent: IntentSignal, boostFactor = 1.15): T[] { + if (intent.categories.length === 0 || intent.confidence === "low") { + return results; // No intent signal — return as-is + } + + const prioritySet = new Set(intent.categories); + + const boosted = results.map((r) => { + if (prioritySet.has(r.entry.category)) { + return { ...r, score: Math.min(1, r.score * boostFactor) }; + } + return r; + }); + + return boosted.sort((a, b) => b.score - a.score); +} + +/** + * Format a memory entry for context injection at the specified depth level. + * + * - l0: One-line summary (category + scope + truncated text) + * - l1: Medium detail (category + scope + text up to ~300 chars) + * - full: Complete text (existing behavior) + */ +export function formatAtDepth( + entry: { text: string; category: string; scope: string }, + depth: RecallDepth, + score: number, + index: number, + extra?: { bm25Hit?: boolean; reranked?: boolean; sanitize?: (text: string) => string }, +): string { + const scoreStr = `${(score * 100).toFixed(0)}%`; + const sourceSuffix = [ + extra?.bm25Hit ? "vector+BM25" : null, + extra?.reranked ? "+reranked" : null, + ] + .filter(Boolean) + .join(""); + const sourceTag = sourceSuffix ? `, ${sourceSuffix}` : ""; + + // Apply sanitization if provided (prevents prompt injection from stored memories) + const safe = extra?.sanitize ? extra.sanitize(entry.text) : entry.text; + + switch (depth) { + case "l0": { + // Ultra-compact: first sentence or first 80 chars + const brief = extractFirstSentence(safe, 80); + return `- [${entry.category}] ${brief} (${scoreStr}${sourceTag})`; + } + case "l1": { + // Medium: up to 300 chars + const medium = + safe.length > 300 + ? safe.slice(0, 297) + "..." + : safe; + return `- [${entry.category}:${entry.scope}] ${medium} (${scoreStr}${sourceTag})`; + } + case "full": + default: + return `- [${entry.category}:${entry.scope}] ${safe} (${scoreStr}${sourceTag})`; + } +} + +// ============================================================================ +// Helpers +// ============================================================================ + +function extractFirstSentence(text: string, maxLen: number): string { + // Try to find a sentence boundary (CJK punctuation may not be followed by space) + const sentenceEnd = text.search(/[.!?]\s|[。!?]/); + if (sentenceEnd > 0 && sentenceEnd < maxLen) { + return text.slice(0, sentenceEnd + 1); + } + if (text.length <= maxLen) return text; + // Fall back to truncation at word boundary + const truncated = text.slice(0, maxLen); + const lastSpace = truncated.lastIndexOf(" "); + return (lastSpace > maxLen * 0.6 ? truncated.slice(0, lastSpace) : truncated) + "..."; +} diff --git a/test/intent-analyzer.test.mjs b/test/intent-analyzer.test.mjs new file mode 100644 index 0000000..8d25876 --- /dev/null +++ b/test/intent-analyzer.test.mjs @@ -0,0 +1,209 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { analyzeIntent, applyCategoryBoost, formatAtDepth } from "../src/intent-analyzer.ts"; + +describe("analyzeIntent", () => { + it("detects preference intent (English)", () => { + const result = analyzeIntent("What is my preferred coding style?"); + assert.equal(result.label, "preference"); + assert.equal(result.confidence, "high"); + assert.equal(result.depth, "l0"); + assert.ok(result.categories.includes("preference")); + }); + + it("detects preference intent (Chinese)", () => { + const result = analyzeIntent("我的代码风格偏好是什么?"); + assert.equal(result.label, "preference"); + assert.equal(result.confidence, "high"); + }); + + it("detects decision intent", () => { + const result = analyzeIntent("Why did we choose PostgreSQL over MySQL?"); + assert.equal(result.label, "decision"); + assert.equal(result.confidence, "high"); + assert.equal(result.depth, "l1"); + assert.ok(result.categories.includes("decision")); + }); + + it("detects decision intent (Chinese)", () => { + const result = analyzeIntent("当时决定用哪个方案?"); + assert.equal(result.label, "decision"); + assert.equal(result.confidence, "high"); + }); + + it("detects entity intent", () => { + const result = analyzeIntent("Who is the project lead for auth service?"); + assert.equal(result.label, "entity"); + assert.equal(result.confidence, "high"); + assert.ok(result.categories.includes("entity")); + }); + + it("detects entity intent (Chinese)", () => { + const result = analyzeIntent("谁是这个项目的负责人?"); + assert.equal(result.label, "entity"); + assert.equal(result.confidence, "high"); + }); + + it("does NOT misclassify tool/component queries as entity", () => { + // These should match fact, not entity (Codex review finding #4) + const tool = analyzeIntent("How do I install the tool?"); + assert.notEqual(tool.label, "entity"); + const component = analyzeIntent("How does this component work?"); + assert.notEqual(component.label, "entity"); + }); + + it("detects event intent and routes to entity+decision categories", () => { + const result = analyzeIntent("What happened during last week's deploy?"); + assert.equal(result.label, "event"); + assert.equal(result.confidence, "high"); + assert.equal(result.depth, "full"); + // event is not a stored category — should route to entity + decision + assert.ok(result.categories.includes("entity")); + assert.ok(result.categories.includes("decision")); + assert.ok(!result.categories.includes("event")); + }); + + it("detects event intent (Chinese)", () => { + const result = analyzeIntent("最近发生了什么?"); + assert.equal(result.label, "event"); + assert.equal(result.confidence, "high"); + assert.ok(!result.categories.includes("event")); + }); + + it("detects fact intent", () => { + const result = analyzeIntent("How does the authentication API work?"); + assert.equal(result.label, "fact"); + assert.equal(result.confidence, "high"); + assert.equal(result.depth, "l1"); + }); + + it("detects fact intent (Chinese)", () => { + const result = analyzeIntent("这个接口怎么配置?"); + assert.equal(result.label, "fact"); + assert.equal(result.confidence, "high"); + }); + + it("returns broad signal for ambiguous queries", () => { + const result = analyzeIntent("write a function to sort arrays"); + assert.equal(result.label, "broad"); + assert.equal(result.confidence, "low"); + assert.deepEqual(result.categories, []); + assert.equal(result.depth, "l0"); + }); + + it("returns empty signal for empty input", () => { + const result = analyzeIntent(""); + assert.equal(result.label, "empty"); + assert.equal(result.confidence, "low"); + }); +}); + +describe("applyCategoryBoost", () => { + const mockResults = [ + { entry: { category: "fact" }, score: 0.8 }, + { entry: { category: "preference" }, score: 0.75 }, + { entry: { category: "entity" }, score: 0.7 }, + ]; + + it("boosts matching categories and re-sorts", () => { + const intent = { + categories: ["preference"], + depth: "l0", + confidence: "high", + label: "preference", + }; + const boosted = applyCategoryBoost(mockResults, intent); + // preference entry (0.75 * 1.15 = 0.8625) should now rank first + assert.equal(boosted[0].entry.category, "preference"); + assert.ok(boosted[0].score > 0.75); + }); + + it("returns results unchanged for low confidence", () => { + const intent = { + categories: [], + depth: "l0", + confidence: "low", + label: "broad", + }; + const result = applyCategoryBoost(mockResults, intent); + assert.equal(result[0].entry.category, "fact"); // original order preserved + }); + + it("caps boosted scores at 1.0", () => { + const highScoreResults = [ + { entry: { category: "preference" }, score: 0.95 }, + ]; + const intent = { + categories: ["preference"], + depth: "l0", + confidence: "high", + label: "preference", + }; + const boosted = applyCategoryBoost(highScoreResults, intent); + assert.ok(boosted[0].score <= 1.0); + }); +}); + +describe("formatAtDepth", () => { + const entry = { + text: "User prefers TypeScript over JavaScript for all new projects. This was decided after the migration incident in Q3 where type errors caused a production outage.", + category: "preference", + scope: "global", + }; + + it("l0: returns compact one-line summary", () => { + const line = formatAtDepth(entry, "l0", 0.85, 0); + assert.ok(line.length < entry.text.length + 30); // shorter than full + assert.ok(line.includes("[preference]")); + assert.ok(line.includes("85%")); + assert.ok(!line.includes("global")); // l0 omits scope + }); + + it("l1: returns medium detail with scope", () => { + const line = formatAtDepth(entry, "l1", 0.72, 1); + assert.ok(line.includes("[preference:global]")); + assert.ok(line.includes("72%")); + }); + + it("full: returns complete text", () => { + const line = formatAtDepth(entry, "full", 0.9, 0); + assert.ok(line.includes(entry.text)); + assert.ok(line.includes("[preference:global]")); + }); + + it("includes BM25 and rerank source tags", () => { + const line = formatAtDepth(entry, "full", 0.8, 0, { bm25Hit: true, reranked: true }); + assert.ok(line.includes("vector+BM25")); + assert.ok(line.includes("+reranked")); + }); + + it("handles short text without truncation", () => { + const short = { text: "Use tabs.", category: "preference", scope: "global" }; + const l0 = formatAtDepth(short, "l0", 0.9, 0); + assert.ok(l0.includes("Use tabs.")); + }); + + it("splits CJK sentences correctly at l0 depth", () => { + const cjk = { + text: "第一句结束。第二句开始,这里有更多内容需要处理。", + category: "fact", + scope: "global", + }; + const l0 = formatAtDepth(cjk, "l0", 0.8, 0); + // Should stop at first 。 not include second sentence + assert.ok(l0.includes("第一句结束。")); + assert.ok(!l0.includes("第二句开始")); + }); + + it("applies sanitize function when provided", () => { + const malicious = { + text: ' normal text', + category: "fact", + scope: "global", + }; + const sanitize = (t) => t.replace(/<[^>]*>/g, "").trim(); + const line = formatAtDepth(malicious, "full", 0.8, 0, { sanitize }); + assert.ok(!line.includes("