Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 111 additions & 5 deletions cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,71 @@ function formatJson(obj: any): string {
return JSON.stringify(obj, null, 2);
}

function formatRetrievalDiagnosticsLines(diagnostics: {
originalQuery: string;
bm25Query: string | null;
queryExpanded: boolean;
vectorResultCount: number;
bm25ResultCount: number;
fusedResultCount: number;
finalResultCount: number;
stageCounts: {
afterMinScore: number;
rerankInput: number;
afterRerank: number;
afterHardMinScore: number;
afterNoiseFilter: number;
afterDiversity: number;
};
dropSummary: Array<{ stage: string; dropped: number; before: number; after: number }>;
failureStage?: string;
errorMessage?: string;
}): string[] {
const topDrops =
diagnostics.dropSummary.length > 0
? diagnostics.dropSummary
.slice(0, 3)
.map(
(drop) => `${drop.stage} -${drop.dropped} (${drop.before}->${drop.after})`,
)
.join(", ")
: "none";

const lines = [
"Retrieval diagnostics:",
` • Original query: ${diagnostics.originalQuery}`,
` • BM25 query: ${diagnostics.bm25Query ?? "(disabled)"}`,
` • Query expanded: ${diagnostics.queryExpanded ? "Yes" : "No"}`,
` • Counts: vector=${diagnostics.vectorResultCount}, bm25=${diagnostics.bm25ResultCount}, fused=${diagnostics.fusedResultCount}, final=${diagnostics.finalResultCount}`,
` • Stages: min=${diagnostics.stageCounts.afterMinScore}, rerankIn=${diagnostics.stageCounts.rerankInput}, rerank=${diagnostics.stageCounts.afterRerank}, hard=${diagnostics.stageCounts.afterHardMinScore}, noise=${diagnostics.stageCounts.afterNoiseFilter}, diversity=${diagnostics.stageCounts.afterDiversity}`,
` • Drops: ${topDrops}`,
];

if (diagnostics.failureStage) {
lines.push(` • Failure stage: ${diagnostics.failureStage}`);
}
if (diagnostics.errorMessage) {
lines.push(` • Error: ${diagnostics.errorMessage}`);
}

return lines;
}

function buildSearchErrorPayload(
error: unknown,
diagnostics: unknown,
includeDiagnostics: boolean,
): Record<string, unknown> {
const message = error instanceof Error ? error.message : String(error);
return {
error: {
code: "search_failed",
message,
},
...(includeDiagnostics && diagnostics ? { diagnostics } : {}),
};
}

async function sleep(ms: number): Promise<void> {
await new Promise(resolve => setTimeout(resolve, ms));
}
Expand All @@ -431,7 +496,8 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
scopeFilter?: string[],
category?: string,
) => {
let results = await getSearchRetriever().retrieve({
const retriever = getSearchRetriever();
let results = await retriever.retrieve({
query,
limit,
scopeFilter,
Expand All @@ -441,16 +507,30 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {

if (results.length === 0 && context.embedder) {
await sleep(75);
results = await getSearchRetriever().retrieve({
const retryRetriever = getSearchRetriever();
results = await retryRetriever.retrieve({
query,
limit,
scopeFilter,
category,
source: "cli",
});
return {
results,
diagnostics:
typeof retryRetriever.getLastDiagnostics === "function"
? retryRetriever.getLastDiagnostics()
: null,
};
}

return results;
return {
results,
diagnostics:
typeof retriever.getLastDiagnostics === "function"
? retriever.getLastDiagnostics()
: null,
};
};

const memory = program
Expand Down Expand Up @@ -697,6 +777,7 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
.option("--scope <scope>", "Search within specific scope")
.option("--category <category>", "Filter by category")
.option("--limit <n>", "Maximum number of results", "10")
.option("--debug", "Show retrieval diagnostics")
.option("--json", "Output as JSON")
.action(async (query, options) => {
try {
Expand All @@ -707,11 +788,24 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
scopeFilter = [options.scope];
}

const results = await runSearch(query, limit, scopeFilter, options.category);
const { results, diagnostics } = await runSearch(
query,
limit,
scopeFilter,
options.category,
);

if (options.json) {
console.log(formatJson(results));
console.log(
formatJson(options.debug ? { diagnostics, results } : results),
);
} else {
if (options.debug && diagnostics) {
for (const line of formatRetrievalDiagnosticsLines(diagnostics)) {
console.log(line);
}
console.log();
}
if (results.length === 0) {
console.log("No relevant memories found.");
} else {
Expand All @@ -730,6 +824,18 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
}
}
} catch (error) {
const diagnostics = options.debug ? context.retriever.getLastDiagnostics?.() : null;
if (options.json) {
console.log(
formatJson(buildSearchErrorPayload(error, diagnostics, options.debug)),
);
process.exit(1);
}
if (diagnostics) {
for (const line of formatRetrievalDiagnosticsLines(diagnostics)) {
console.error(line);
}
}
console.error("Search failed:", error);
process.exit(1);
}
Expand Down
13 changes: 13 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"@lancedb/lancedb": "^0.26.2",
"@sinclair/typebox": "0.34.48",
"apache-arrow": "18.1.0",
"json5": "^2.2.3",
"openai": "^6.21.0"
},
"openclaw": {
Expand Down
116 changes: 116 additions & 0 deletions src/query-expander.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/**
* Lightweight Chinese query expansion for BM25.
* Keeps the vector query untouched and only appends a few high-signal synonyms.
*/

const MAX_EXPANSION_TERMS = 5;

interface SynonymEntry {
cn: string[];
en: string[];
expansions: string[];
}

const SYNONYM_MAP: SynonymEntry[] = [
{
cn: ["挂了", "挂掉", "宕机"],
en: ["shutdown", "crashed"],
expansions: ["崩溃", "crash", "error", "报错", "宕机", "失败"],
},
{
cn: ["卡住", "卡死", "没反应"],
en: ["hung", "frozen"],
expansions: ["hang", "timeout", "超时", "无响应", "stuck"],
},
{
cn: ["炸了", "爆了"],
en: ["oom"],
expansions: ["崩溃", "crash", "OOM", "内存溢出", "error"],
},
{
cn: ["配置", "设置"],
en: ["config", "configuration"],
expansions: ["配置", "config", "configuration", "settings", "设置"],
},
{
cn: ["部署", "上线"],
en: ["deploy", "deployment"],
expansions: ["deploy", "部署", "上线", "发布", "release"],
},
{
cn: ["容器"],
en: ["docker", "container"],
expansions: ["Docker", "容器", "container", "docker-compose"],
},
{
cn: ["报错", "出错", "错误"],
en: ["error", "exception"],
expansions: ["error", "报错", "exception", "错误", "失败", "bug"],
},
{
cn: ["修复", "修了", "修好"],
en: ["bugfix", "hotfix"],
expansions: ["fix", "修复", "patch", "解决"],
},
{
cn: ["踩坑"],
en: ["troubleshoot"],
expansions: ["踩坑", "bug", "问题", "教训", "排查", "troubleshoot"],
},
{
cn: ["记忆", "记忆系统"],
en: ["memory"],
expansions: ["记忆", "memory", "记忆系统", "LanceDB", "索引"],
},
{
cn: ["搜索", "查找", "找不到"],
en: ["search", "retrieval"],
expansions: ["搜索", "search", "retrieval", "检索", "查找"],
},
{
cn: ["推送"],
en: ["git push"],
expansions: ["push", "推送", "git push", "commit"],
},
{
cn: ["日志"],
en: ["logfile", "logging"],
expansions: ["日志", "log", "logging", "输出", "打印"],
},
{
cn: ["权限"],
en: ["permission", "authorization"],
expansions: ["权限", "permission", "access", "授权", "认证"],
},
];

function buildWordBoundaryRegex(term: string): RegExp {
const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
return new RegExp(`\\b${escaped}\\b`, "i");
}

export function expandQuery(query: string): string {
if (!query || query.trim().length < 2) return query;

const lower = query.toLowerCase();
const additions = new Set<string>();

for (const entry of SYNONYM_MAP) {
const cnMatch = entry.cn.some((term) => lower.includes(term.toLowerCase()));
const enMatch = entry.en.some((term) => buildWordBoundaryRegex(term).test(query));

if (!cnMatch && !enMatch) continue;

for (const expansion of entry.expansions) {
if (!lower.includes(expansion.toLowerCase())) {
additions.add(expansion);
}
if (additions.size >= MAX_EXPANSION_TERMS) break;
}

if (additions.size >= MAX_EXPANSION_TERMS) break;
}

if (additions.size === 0) return query;
return `${query} ${[...additions].join(" ")}`;
}
Loading
Loading