Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import {
type AdmissionControlConfig,
type AdmissionRejectionAuditEntry,
} from "./src/admission-control.js";
import { analyzeIntent, applyCategoryBoost } from "./src/intent-analyzer.js";

// ============================================================================
// Configuration & Types
Expand All @@ -90,6 +91,7 @@ interface PluginConfig {
autoRecallMaxItems?: number;
autoRecallMaxChars?: number;
autoRecallPerItemMaxChars?: number;
recallMode?: "full" | "summary" | "adaptive" | "off";
captureAssistant?: boolean;
retrieval?: {
mode?: "hybrid" | "vector";
Expand Down Expand Up @@ -2146,7 +2148,9 @@ const memoryLanceDBProPlugin = {

// Auto-recall: inject relevant memories before agent starts
// Default is OFF to prevent the model from accidentally echoing injected context.
if (config.autoRecall === true) {
// recallMode: "full" (default when autoRecall=true) | "summary" (L0 only) | "adaptive" (intent-based) | "off"
const recallMode = config.recallMode || "full";
if (config.autoRecall === true && recallMode !== "off") {
// Cache the most recent raw user message per session so the
// before_prompt_build gating can check the *user* text, not the full
// assembled prompt (which includes system instructions and is too long
Expand Down Expand Up @@ -2210,6 +2214,14 @@ const memoryLanceDBProPlugin = {
const autoRecallPerItemMaxChars = clampInt(config.autoRecallPerItemMaxChars ?? 180, 32, 1000);
const retrieveLimit = clampInt(Math.max(autoRecallMaxItems * 2, autoRecallMaxItems), 1, 20);

// Adaptive intent analysis (zero-LLM-cost pattern matching)
const intent = recallMode === "adaptive" ? analyzeIntent(recallQuery) : undefined;
if (intent) {
api.logger.debug?.(
`memory-lancedb-pro: adaptive recall intent=${intent.label} depth=${intent.depth} confidence=${intent.confidence} categories=[${intent.categories.join(",")}]`,
);
}

const results = filterUserMdExclusiveRecallResults(await retrieveWithRetry({
query: recallQuery,
limit: retrieveLimit,
Expand All @@ -2221,16 +2233,19 @@ const memoryLanceDBProPlugin = {
return;
}

// Apply intent-based category boost for adaptive mode
const rankedResults = intent ? applyCategoryBoost(results, intent) : results;

// Filter out redundant memories based on session history
const minRepeated = config.autoRecallMinRepeated ?? 8;
let dedupFilteredCount = 0;

// Only enable dedup logic when minRepeated > 0
let finalResults = results;
let finalResults = rankedResults;

if (minRepeated > 0) {
const sessionHistory = recallHistory.get(sessionId) || new Map<string, number>();
const filteredResults = results.filter((r) => {
const filteredResults = rankedResults.filter((r) => {
const lastTurn = sessionHistory.get(r.entry.id) ?? -999;
const diff = currentTurn - lastTurn;
const isRedundant = diff < minRepeated;
Expand Down Expand Up @@ -2282,13 +2297,30 @@ const memoryLanceDBProPlugin = {
return;
}

// Determine effective per-item char limit based on recall mode and intent depth
const effectivePerItemMaxChars = (() => {
if (recallMode === "summary") return Math.min(autoRecallPerItemMaxChars, 80); // L0 only
if (!intent) return autoRecallPerItemMaxChars; // "full" mode
// Adaptive mode: depth determines char budget
switch (intent.depth) {
case "l0": return Math.min(autoRecallPerItemMaxChars, 80);
case "l1": return autoRecallPerItemMaxChars; // default budget
case "full": return Math.min(autoRecallPerItemMaxChars * 3, 1000);
}
})();

const preBudgetCandidates = governanceEligible.map((r) => {
const metaObj = parseSmartMetadata(r.entry.metadata, r.entry);
const displayCategory = metaObj.memory_category || r.entry.category;
const displayTier = metaObj.tier || "";
const tierPrefix = displayTier ? `[${displayTier.charAt(0).toUpperCase()}]` : "";
const abstract = metaObj.l0_abstract || r.entry.text;
const summary = sanitizeForContext(abstract).slice(0, autoRecallPerItemMaxChars);
// Select content tier based on recallMode/intent depth
const contentText = recallMode === "summary"
? (metaObj.l0_abstract || r.entry.text)
: intent?.depth === "full"
? (r.entry.text) // full text for deep queries
: (metaObj.l0_abstract || r.entry.text); // L0/L1 default
const summary = sanitizeForContext(contentText).slice(0, effectivePerItemMaxChars);
return {
id: r.entry.id,
prefix: `${tierPrefix}[${displayCategory}:${r.entry.scope}]`,
Expand Down
11 changes: 11 additions & 0 deletions openclaw.plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@
"default": 180,
"description": "Maximum character budget per auto-injected memory summary."
},
"recallMode": {
"type": "string",
"enum": ["full", "summary", "adaptive", "off"],
"default": "full",
"description": "Auto-recall depth mode. 'full': inject with configured per-item budget. 'summary': L0 abstracts only (compact). 'adaptive': analyze query intent to auto-select category and depth. 'off': disable auto-recall injection."
},
"captureAssistant": {
"type": "boolean"
},
Expand Down Expand Up @@ -916,6 +922,11 @@
"help": "Maximum characters per injected memory summary.",
"advanced": true
},
"recallMode": {
"label": "Recall Mode",
"help": "Auto-recall depth: full (default), summary (L0 only), adaptive (intent-based category routing), off.",
"advanced": false
},
"captureAssistant": {
"label": "Capture Assistant Messages",
"help": "Also auto-capture assistant messages (default false to reduce memory pollution)",
Expand Down
259 changes: 259 additions & 0 deletions src/intent-analyzer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
/**
* Intent Analyzer for Adaptive Recall
*
* Lightweight, rule-based intent analysis that determines which memory categories
* are most relevant for a given query and what recall depth to use.
*
* Inspired by OpenViking's hierarchical retrieval intent routing, adapted for
* memory-lancedb-pro's flat category model. No LLM calls — pure pattern matching
* for minimal latency impact on auto-recall.
*
* @see https://github.com/volcengine/OpenViking — hierarchical_retriever.py intent analysis
*/

// ============================================================================
// Types
// ============================================================================

/**
* Intent categories map to actual stored MemoryEntry categories.
* Note: "event" is NOT a stored category — event queries route to
* entity + decision (the categories most likely to contain timeline data).
*/
export type MemoryCategoryIntent =
| "preference"
| "fact"
| "decision"
| "entity"
| "other";

export type RecallDepth = "l0" | "l1" | "full";

export interface IntentSignal {
/** Categories to prioritize (ordered by relevance). */
categories: MemoryCategoryIntent[];
/** Recommended recall depth for this intent. */
depth: RecallDepth;
/** Confidence level of the intent classification. */
confidence: "high" | "medium" | "low";
/** Short label for logging. */
label: string;
}

// ============================================================================
// Intent Patterns
// ============================================================================

interface IntentRule {
label: string;
patterns: RegExp[];
categories: MemoryCategoryIntent[];
depth: RecallDepth;
}

/**
* Intent rules ordered by specificity (most specific first).
* First match wins — keep high-confidence patterns at the top.
*/
const INTENT_RULES: IntentRule[] = [
// --- Preference / Style queries ---
{
label: "preference",
patterns: [
/\b(prefer|preference|style|convention|like|dislike|favorite|habit)\b/i,
/\b(how do (i|we) usually|what('s| is) (my|our) (style|convention|approach))\b/i,
/(偏好|喜欢|习惯|风格|惯例|常用|不喜欢|不要用|别用)/,
],
categories: ["preference", "decision"],
depth: "l0",
},

// --- Decision / Rationale queries ---
{
label: "decision",
patterns: [
/\b(why did (we|i)|decision|decided|chose|rationale|trade-?off|reason for)\b/i,
/\b(what was the (reason|rationale|decision))\b/i,
/(为什么选|决定|选择了|取舍|权衡|原因是|当时决定)/,
],
categories: ["decision", "fact"],
depth: "l1",
},

// --- Entity / People / Project queries ---
// Narrowed patterns to avoid over-matching: require "who is" / "tell me about"
// style phrasing, not bare nouns like "tool" or "component".
{
label: "entity",
patterns: [
/\b(who is|who are|tell me about|info on|details about|contact info)\b/i,
/\b(who('s| is) (the|our|my)|what team|which (person|team))\b/i,
/(谁是|告诉我关于|详情|联系方式|哪个团队)/,
],
categories: ["entity", "fact"],
depth: "l1",
},

// --- Event / Timeline queries ---
// Note: "event" is not a stored category. Route to entity + decision
// (the categories most likely to contain timeline/incident data).
{
label: "event",
patterns: [
/\b(when did|what happened|timeline|incident|outage|deploy|release|shipped)\b/i,
/\b(last (week|month|time|sprint)|recently|yesterday|today)\b/i,
/(什么时候|发生了什么|时间线|事件|上线|部署|发布|上次|最近)/,
],
categories: ["entity", "decision"],
depth: "full",
},

// --- Fact / Knowledge queries ---
{
label: "fact",
patterns: [
/\b(how (does|do|to)|what (does|do|is)|explain|documentation|spec)\b/i,
/\b(config|configuration|setup|install|architecture|api|endpoint)\b/i,
/(怎么|如何|是什么|解释|文档|规范|配置|安装|架构|接口)/,
],
categories: ["fact", "entity"],
depth: "l1",
},
];

// ============================================================================
// Analyzer
// ============================================================================

/**
* Analyze a query to determine which memory categories and recall depth
* are most appropriate.
*
* Returns a default "broad" signal if no specific intent is detected,
* so callers can always use the result without null checks.
*/
export function analyzeIntent(query: string): IntentSignal {
const trimmed = query.trim();
if (!trimmed) {
return {
categories: [],
depth: "l0",
confidence: "low",
label: "empty",
};
}

for (const rule of INTENT_RULES) {
if (rule.patterns.some((p) => p.test(trimmed))) {
return {
categories: rule.categories,
depth: rule.depth,
confidence: "high",
label: rule.label,
};
}
}

// No specific intent detected — return broad signal.
// All categories are eligible; use L0 to minimize token cost.
return {
categories: [],
depth: "l0",
confidence: "low",
label: "broad",
};
}

/**
* Apply intent-based category boost to retrieval results.
*
* Instead of filtering (which would lose potentially relevant results),
* this boosts scores of results matching the detected intent categories.
* Non-matching results are kept but ranked lower.
*
* @param results - Retrieval results with scores
* @param intent - Detected intent signal
* @param boostFactor - Score multiplier for matching categories (default: 1.15)
* @returns Results with adjusted scores, re-sorted
*/
export function applyCategoryBoost<
T extends { entry: { category: string }; score: number },
>(results: T[], intent: IntentSignal, boostFactor = 1.15): T[] {
if (intent.categories.length === 0 || intent.confidence === "low") {
return results; // No intent signal — return as-is
}

const prioritySet = new Set<string>(intent.categories);

const boosted = results.map((r) => {
if (prioritySet.has(r.entry.category)) {
return { ...r, score: Math.min(1, r.score * boostFactor) };
}
return r;
});

return boosted.sort((a, b) => b.score - a.score);
}

/**
* Format a memory entry for context injection at the specified depth level.
*
* - l0: One-line summary (category + scope + truncated text)
* - l1: Medium detail (category + scope + text up to ~300 chars)
* - full: Complete text (existing behavior)
*/
export function formatAtDepth(
entry: { text: string; category: string; scope: string },
depth: RecallDepth,
score: number,
index: number,
extra?: { bm25Hit?: boolean; reranked?: boolean; sanitize?: (text: string) => string },
): string {
const scoreStr = `${(score * 100).toFixed(0)}%`;
const sourceSuffix = [
extra?.bm25Hit ? "vector+BM25" : null,
extra?.reranked ? "+reranked" : null,
]
.filter(Boolean)
.join("");
const sourceTag = sourceSuffix ? `, ${sourceSuffix}` : "";

// Apply sanitization if provided (prevents prompt injection from stored memories)
const safe = extra?.sanitize ? extra.sanitize(entry.text) : entry.text;

switch (depth) {
case "l0": {
// Ultra-compact: first sentence or first 80 chars
const brief = extractFirstSentence(safe, 80);
return `- [${entry.category}] ${brief} (${scoreStr}${sourceTag})`;
}
case "l1": {
// Medium: up to 300 chars
const medium =
safe.length > 300
? safe.slice(0, 297) + "..."
: safe;
return `- [${entry.category}:${entry.scope}] ${medium} (${scoreStr}${sourceTag})`;
}
case "full":
default:
return `- [${entry.category}:${entry.scope}] ${safe} (${scoreStr}${sourceTag})`;
}
}

// ============================================================================
// Helpers
// ============================================================================

function extractFirstSentence(text: string, maxLen: number): string {
// Try to find a sentence boundary (CJK punctuation may not be followed by space)
const sentenceEnd = text.search(/[.!?]\s|[。!?]/);
if (sentenceEnd > 0 && sentenceEnd < maxLen) {
return text.slice(0, sentenceEnd + 1);
}
if (text.length <= maxLen) return text;
// Fall back to truncation at word boundary
const truncated = text.slice(0, maxLen);
const lastSpace = truncated.lastIndexOf(" ");
return (lastSpace > maxLen * 0.6 ? truncated.slice(0, lastSpace) : truncated) + "...";
}
Loading
Loading