Skip to content

Commit 5f466bf

Browse files
committed
Fix hybrid search returning empty results
Two bugs in all 5 search modules (docs, code, knowledge, tasks, skills): 1. RRF/BM25 scores were not normalized to 0-1, so minScore (default 0.5) filtered out all results (RRF scores are always < 0.033) 2. Zero-score vector results were included in RRF fusion, producing false positives for unknown queries Fix: filter zero-score vectors before fusion, normalize RRF/BM25 scores to 0-1, and fall back to pure BM25 when vector returns nothing.
1 parent 3726dac commit 5f466bf

5 files changed

Lines changed: 67 additions & 31 deletions

File tree

src/lib/search/code.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,22 +51,29 @@ export function searchCode(
5151

5252
if (useBm25) {
5353
const bm25Scores = bm25Index!.score(queryText!);
54-
if (useVector && scored.length > 0) {
55-
const vectorMap = new Map(scored.map(s => [s.id, s.score]));
54+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
55+
if (positiveScored.length > 0) {
56+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
5657
const fused = rrfFuse(vectorMap, bm25Scores, rrfK);
5758
scored.length = 0;
5859
for (const [id, score] of fused) scored.push({ id, score });
59-
} else if (!useVector) {
60+
} else {
61+
scored.length = 0;
6062
for (const [id, score] of bm25Scores) scored.push({ id, score });
6163
}
64+
// Normalize scores to 0–1 so minScore threshold works uniformly
65+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
66+
if (maxScore > 0) {
67+
for (const s of scored) s.score /= maxScore;
68+
}
6269
}
6370

6471
if (scored.length === 0) return [];
6572

6673
scored.sort((a, b) => b.score - a.score);
6774

6875
// --- 2. Filter seeds ---
69-
const minS = useBm25 && !useVector ? 0 : minScore;
76+
const minS = minScore;
7077
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
7178
if (seeds.length === 0) return [];
7279

@@ -88,7 +95,7 @@ export function searchCode(
8895
if (item.score > prev) scoreMap.set(item.id, item.score);
8996

9097
if (item.depth >= bfsDepth) continue;
91-
if (item.score * bfsDecay < minScore) continue;
98+
if (item.score * bfsDecay < minS) continue;
9299

93100
const nextScore = item.score * bfsDecay;
94101
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
@@ -102,7 +109,7 @@ export function searchCode(
102109

103110
// --- 4. Build results ---
104111
return [...scoreMap.entries()]
105-
.filter(([, score]) => score >= minScore)
112+
.filter(([, score]) => score >= minS)
106113
.map(([id, score]) => {
107114
const attrs = graph.getNodeAttributes(id);
108115
return {

src/lib/search/docs.ts

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,24 +51,32 @@ export function search(
5151

5252
if (useBm25) {
5353
const bm25Scores = bm25Index!.score(queryText!);
54-
if (useVector && scored.length > 0) {
54+
// Only include vector results with positive scores for fusion
55+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
56+
if (positiveScored.length > 0) {
5557
// RRF fusion
56-
const vectorMap = new Map(scored.map(s => [s.id, s.score]));
58+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
5759
const fused = rrfFuse(vectorMap, bm25Scores, rrfK);
5860
scored.length = 0;
5961
for (const [id, score] of fused) scored.push({ id, score });
60-
} else if (!useVector) {
61-
// BM25-only mode
62+
} else {
63+
// BM25-only or vector returned nothing — use BM25 as fallback
64+
scored.length = 0;
6265
for (const [id, score] of bm25Scores) scored.push({ id, score });
6366
}
67+
// Normalize scores to 0–1 so minScore threshold works uniformly
68+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
69+
if (maxScore > 0) {
70+
for (const s of scored) s.score /= maxScore;
71+
}
6472
}
6573

6674
if (scored.length === 0) return [];
6775

6876
scored.sort((a, b) => b.score - a.score);
6977

7078
// --- 2. Filter seeds by minScore, then take topK ---
71-
const minS = useBm25 && !useVector ? 0 : minScore; // keyword-only: no minScore (BM25 scores are unbounded)
79+
const minS = minScore;
7280
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
7381
if (seeds.length === 0) return [];
7482

@@ -92,7 +100,7 @@ export function search(
92100
if (item.score > prev) scoreMap.set(item.id, item.score);
93101

94102
if (item.depth >= bfsDepth) continue;
95-
if (item.score * bfsDecay < minScore) continue; // prune: deeper hops won't pass minScore
103+
if (item.score * bfsDecay < minS) continue; // prune: deeper hops won't pass threshold
96104

97105
const nextScore = item.score * bfsDecay;
98106
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
@@ -106,7 +114,7 @@ export function search(
106114

107115
// --- 4. Build results from scoreMap, apply minScore filter, sort, cap ---
108116
return [...scoreMap.entries()]
109-
.filter(([, score]) => score >= minScore)
117+
.filter(([, score]) => score >= minS)
110118
.map(([id, score]) => {
111119
const attrs = graph.getNodeAttributes(id);
112120
return {

src/lib/search/knowledge.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,22 +48,29 @@ export function searchKnowledge(
4848

4949
if (useBm25) {
5050
const bm25Scores = bm25Index!.score(queryText!);
51-
if (useVector && scored.length > 0) {
52-
const vectorMap = new Map(scored.map(s => [s.id, s.score]));
51+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
52+
if (positiveScored.length > 0) {
53+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
5354
const fused = rrfFuse(vectorMap, bm25Scores, rrfK);
5455
scored.length = 0;
5556
for (const [id, score] of fused) scored.push({ id, score });
56-
} else if (!useVector) {
57+
} else {
58+
scored.length = 0;
5759
for (const [id, score] of bm25Scores) scored.push({ id, score });
5860
}
61+
// Normalize scores to 0–1 so minScore threshold works uniformly
62+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
63+
if (maxScore > 0) {
64+
for (const s of scored) s.score /= maxScore;
65+
}
5966
}
6067

6168
if (scored.length === 0) return [];
6269

6370
scored.sort((a, b) => b.score - a.score);
6471

6572
// --- 2. Filter seeds ---
66-
const minS = useBm25 && !useVector ? 0 : minScore;
73+
const minS = minScore;
6774
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
6875
if (seeds.length === 0) return [];
6976

@@ -85,7 +92,7 @@ export function searchKnowledge(
8592
if (item.score > prev) scoreMap.set(item.id, item.score);
8693

8794
if (item.depth >= bfsDepth) continue;
88-
if (item.score * bfsDecay < minScore) continue;
95+
if (item.score * bfsDecay < minS) continue;
8996

9097
const nextScore = item.score * bfsDecay;
9198
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
@@ -99,7 +106,7 @@ export function searchKnowledge(
99106

100107
// --- 4. Build results (exclude proxy nodes) ---
101108
return [...scoreMap.entries()]
102-
.filter(([id, score]) => score >= minScore && !graph.getNodeAttribute(id, 'proxyFor'))
109+
.filter(([id, score]) => score >= minS && !graph.getNodeAttribute(id, 'proxyFor'))
103110
.map(([id, score]) => {
104111
const attrs = graph.getNodeAttributes(id);
105112
return {

src/lib/search/skills.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,22 +51,29 @@ export function searchSkills(
5151

5252
if (useBm25) {
5353
const bm25Scores = bm25Index!.score(queryText!);
54-
if (useVector && scored.length > 0) {
55-
const vectorMap = new Map(scored.map(s => [s.id, s.score]));
54+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
55+
if (positiveScored.length > 0) {
56+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
5657
const fused = rrfFuse(vectorMap, bm25Scores, rrfK);
5758
scored.length = 0;
5859
for (const [id, score] of fused) scored.push({ id, score });
59-
} else if (!useVector) {
60+
} else {
61+
scored.length = 0;
6062
for (const [id, score] of bm25Scores) scored.push({ id, score });
6163
}
64+
// Normalize scores to 0–1 so minScore threshold works uniformly
65+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
66+
if (maxScore > 0) {
67+
for (const s of scored) s.score /= maxScore;
68+
}
6269
}
6370

6471
if (scored.length === 0) return [];
6572

6673
scored.sort((a, b) => b.score - a.score);
6774

6875
// --- 2. Filter seeds ---
69-
const minS = useBm25 && !useVector ? 0 : minScore;
76+
const minS = minScore;
7077
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
7178
if (seeds.length === 0) return [];
7279

@@ -88,7 +95,7 @@ export function searchSkills(
8895
if (item.score > prev) scoreMap.set(item.id, item.score);
8996

9097
if (item.depth >= bfsDepth) continue;
91-
if (item.score * bfsDecay < minScore) continue;
98+
if (item.score * bfsDecay < minS) continue;
9299

93100
const nextScore = item.score * bfsDecay;
94101
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
@@ -102,7 +109,7 @@ export function searchSkills(
102109

103110
// --- 4. Build results (exclude proxy nodes) ---
104111
return [...scoreMap.entries()]
105-
.filter(([id, score]) => score >= minScore && !graph.getNodeAttribute(id, 'proxyFor'))
112+
.filter(([id, score]) => score >= minS && !graph.getNodeAttribute(id, 'proxyFor'))
106113
.map(([id, score]) => {
107114
const attrs = graph.getNodeAttributes(id);
108115
return {

src/lib/search/tasks.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,29 @@ export function searchTasks(
5050

5151
if (useBm25) {
5252
const bm25Scores = bm25Index!.score(queryText!);
53-
if (useVector && scored.length > 0) {
54-
const vectorMap = new Map(scored.map(s => [s.id, s.score]));
53+
const positiveScored = useVector ? scored.filter(s => s.score > 0) : [];
54+
if (positiveScored.length > 0) {
55+
const vectorMap = new Map(positiveScored.map(s => [s.id, s.score]));
5556
const fused = rrfFuse(vectorMap, bm25Scores, rrfK);
5657
scored.length = 0;
5758
for (const [id, score] of fused) scored.push({ id, score });
58-
} else if (!useVector) {
59+
} else {
60+
scored.length = 0;
5961
for (const [id, score] of bm25Scores) scored.push({ id, score });
6062
}
63+
// Normalize scores to 0–1 so minScore threshold works uniformly
64+
const maxScore = scored.reduce((m, s) => Math.max(m, s.score), 0);
65+
if (maxScore > 0) {
66+
for (const s of scored) s.score /= maxScore;
67+
}
6168
}
6269

6370
if (scored.length === 0) return [];
6471

6572
scored.sort((a, b) => b.score - a.score);
6673

6774
// --- 2. Filter seeds ---
68-
const minS = useBm25 && !useVector ? 0 : minScore;
75+
const minS = minScore;
6976
const seeds = scored.filter(s => s.score >= minS).slice(0, topK);
7077
if (seeds.length === 0) return [];
7178

@@ -87,7 +94,7 @@ export function searchTasks(
8794
if (item.score > prev) scoreMap.set(item.id, item.score);
8895

8996
if (item.depth >= bfsDepth) continue;
90-
if (item.score * bfsDecay < minScore) continue;
97+
if (item.score * bfsDecay < minS) continue;
9198

9299
const nextScore = item.score * bfsDecay;
93100
graph.outNeighbors(item.id).forEach(n => queue.push({ id: n, depth: item.depth + 1, score: nextScore }));
@@ -101,7 +108,7 @@ export function searchTasks(
101108

102109
// --- 4. Build results (exclude proxy nodes) ---
103110
return [...scoreMap.entries()]
104-
.filter(([id, score]) => score >= minScore && !graph.getNodeAttribute(id, 'proxyFor'))
111+
.filter(([id, score]) => score >= minS && !graph.getNodeAttribute(id, 'proxyFor'))
105112
.map(([id, score]) => {
106113
const attrs = graph.getNodeAttributes(id);
107114
return {

0 commit comments

Comments
 (0)