Skip to content

Commit bbf89ef

Browse files
authored
Merge pull request #2 from coe0718/memory-ranking-reinforcement-decay
Improve episodic memory ranking with reinforcement and decay
2 parents b76e539 + beb9041 commit bbf89ef

7 files changed

Lines changed: 326 additions & 30 deletions

File tree

docs/memory.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ Session transcripts stored as embeddings. Each episode contains:
1717

1818
Search: "What happened last time I worked on the auth service?"
1919

20+
Episode ranking is not raw vector score alone. Retrieval blends semantic match with importance, reinforcement from repeated access, and decay over time so durable memories stay available while stale one-off memories fade.
21+
2022
### Tier 2: Semantic Memory
2123

2224
Accumulated facts with contradiction detection and temporal validity:
@@ -50,7 +52,8 @@ Before each agent invocation, the context builder:
5052
3. Searches semantic memory (top 20 facts)
5153
4. Searches procedural memory (top 5 procedures)
5254
5. Budgets results to fit within the token limit (default: 50,000 tokens)
53-
6. Formats results into the memory section of the system prompt
55+
6. Filters out stale, low-signal episodic memories before prompt injection
56+
7. Formats results into the memory section of the system prompt
5457

5558
## Consolidation
5659

src/memory/__tests__/context-builder.test.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,61 @@ describe("MemoryContextBuilder", () => {
124124
expect(result).toContain("success");
125125
});
126126

127+
test("filters stale low-signal episodes from prompt context", async () => {
128+
const memory = createMockMemorySystem({
129+
episodes: Promise.resolve([
130+
{
131+
id: "stale-ep",
132+
type: "task" as const,
133+
summary: "One-off stale note",
134+
detail: "No longer important",
135+
parent_id: null,
136+
session_id: "s1",
137+
user_id: "u1",
138+
tools_used: [],
139+
files_touched: [],
140+
outcome: "success" as const,
141+
outcome_detail: "",
142+
lessons: [],
143+
started_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
144+
ended_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
145+
duration_seconds: 300,
146+
importance: 0.2,
147+
access_count: 0,
148+
last_accessed_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
149+
decay_rate: 1.0,
150+
},
151+
{
152+
id: "durable-ep",
153+
type: "task" as const,
154+
summary: "Repeated deployment pattern",
155+
detail: "Still referenced often",
156+
parent_id: null,
157+
session_id: "s2",
158+
user_id: "u1",
159+
tools_used: ["Bash"],
160+
files_touched: [],
161+
outcome: "success" as const,
162+
outcome_detail: "",
163+
lessons: [],
164+
started_at: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
165+
ended_at: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
166+
duration_seconds: 300,
167+
importance: 0.8,
168+
access_count: 4,
169+
last_accessed_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
170+
decay_rate: 1.0,
171+
},
172+
]),
173+
});
174+
175+
const builder = new MemoryContextBuilder(memory, TEST_CONFIG);
176+
const result = await builder.build("deployment");
177+
178+
expect(result).toContain("Repeated deployment pattern");
179+
expect(result).not.toContain("One-off stale note");
180+
});
181+
127182
test("formats procedure section correctly", async () => {
128183
const memory = createMockMemorySystem({
129184
procedure: Promise.resolve({

src/memory/__tests__/episodic.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,66 @@ describe("EpisodicStore", () => {
234234
expect(episodes[0].id).toBe("new-ep");
235235
expect(episodes[1].id).toBe("old-ep");
236236
});
237+
238+
test("recall() metadata strategy favors reinforced memories", async () => {
239+
const vec = make768dVector();
240+
const now = Date.now();
241+
242+
globalThis.fetch = mock((url: string | Request) => {
243+
const urlStr = typeof url === "string" ? url : url.url;
244+
245+
if (urlStr.includes("/api/embed")) {
246+
return Promise.resolve(new Response(JSON.stringify({ embeddings: [vec] }), { status: 200 }));
247+
}
248+
249+
if (urlStr.includes("/points/query")) {
250+
return Promise.resolve(
251+
new Response(
252+
JSON.stringify({
253+
result: {
254+
points: [
255+
{
256+
id: "stale-ep",
257+
score: 0.82,
258+
payload: {
259+
type: "task",
260+
summary: "Stale one-off episode",
261+
importance: 0.3,
262+
access_count: 0,
263+
last_accessed_at: new Date(now - 45 * 24 * 3600 * 1000).toISOString(),
264+
started_at: now - 45 * 24 * 3600 * 1000,
265+
},
266+
},
267+
{
268+
id: "durable-ep",
269+
score: 0.7,
270+
payload: {
271+
type: "task",
272+
summary: "Frequently reused deployment memory",
273+
importance: 0.8,
274+
access_count: 6,
275+
last_accessed_at: new Date(now - 2 * 24 * 3600 * 1000).toISOString(),
276+
started_at: now - 45 * 24 * 3600 * 1000,
277+
},
278+
},
279+
],
280+
},
281+
}),
282+
{ status: 200, headers: { "Content-Type": "application/json" } },
283+
),
284+
);
285+
}
286+
287+
return Promise.resolve(new Response(JSON.stringify({ status: "ok" }), { status: 200 }));
288+
}) as unknown as typeof fetch;
289+
290+
const qdrant = new QdrantClient(TEST_CONFIG);
291+
const embedder = new EmbeddingClient(TEST_CONFIG);
292+
const store = new EpisodicStore(qdrant, embedder, TEST_CONFIG);
293+
294+
const episodes = await store.recall("deployment", { strategy: "metadata" });
295+
296+
expect(episodes[0].id).toBe("durable-ep");
297+
expect(episodes[1].id).toBe("stale-ep");
298+
});
237299
});
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import { describe, expect, test } from "bun:test";
2+
import { calculateEpisodeRecallScore, shouldIncludeEpisodeInContext } from "../ranking.ts";
3+
import type { Episode } from "../types.ts";
4+
5+
function makeEpisode(overrides?: Partial<Episode>): Episode {
6+
return {
7+
id: "ep-1",
8+
type: "task",
9+
summary: "Memory summary",
10+
detail: "Memory detail",
11+
parent_id: null,
12+
session_id: "session-1",
13+
user_id: "user-1",
14+
tools_used: [],
15+
files_touched: [],
16+
outcome: "success",
17+
outcome_detail: "Completed successfully",
18+
lessons: [],
19+
started_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
20+
ended_at: new Date().toISOString(),
21+
duration_seconds: 60,
22+
importance: 0.6,
23+
access_count: 0,
24+
last_accessed_at: new Date().toISOString(),
25+
decay_rate: 1,
26+
...overrides,
27+
};
28+
}
29+
30+
describe("memory ranking", () => {
31+
test("metadata strategy rewards reinforced memories", () => {
32+
const staleWeak = calculateEpisodeRecallScore(
33+
0.82,
34+
{
35+
importance: 0.3,
36+
accessCount: 0,
37+
startedAt: Date.now() - 45 * 24 * 3600 * 1000,
38+
lastAccessedAt: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
39+
decayRate: 1,
40+
},
41+
"metadata",
42+
);
43+
44+
const durableRepeat = calculateEpisodeRecallScore(
45+
0.7,
46+
{
47+
importance: 0.8,
48+
accessCount: 6,
49+
startedAt: Date.now() - 45 * 24 * 3600 * 1000,
50+
lastAccessedAt: new Date(Date.now() - 2 * 24 * 3600 * 1000).toISOString(),
51+
decayRate: 1,
52+
},
53+
"metadata",
54+
);
55+
56+
expect(durableRepeat).toBeGreaterThan(staleWeak);
57+
});
58+
59+
test("context filtering drops stale low-signal memories", () => {
60+
const staleWeak = makeEpisode({
61+
importance: 0.2,
62+
access_count: 0,
63+
started_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
64+
last_accessed_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
65+
});
66+
67+
const durableRepeat = makeEpisode({
68+
id: "ep-2",
69+
importance: 0.85,
70+
access_count: 5,
71+
started_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
72+
last_accessed_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
73+
});
74+
75+
expect(shouldIncludeEpisodeInContext(staleWeak)).toBe(false);
76+
expect(shouldIncludeEpisodeInContext(durableRepeat)).toBe(true);
77+
});
78+
});

src/memory/context-builder.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { MemoryConfig } from "../config/types.ts";
2+
import { shouldIncludeEpisodeInContext } from "./ranking.ts";
23
import type { MemorySystem } from "./system.ts";
34
import type { Episode, Procedure, SemanticFact } from "./types.ts";
45

@@ -44,10 +45,13 @@ export class MemoryContextBuilder {
4445

4546
// Recent memories provide episode context
4647
if (episodes.length > 0 && tokenBudget > 500) {
47-
const episodeSection = this.formatEpisodes(episodes, tokenBudget);
48+
const durableEpisodes = episodes.filter(shouldIncludeEpisodeInContext);
49+
const episodeSection = this.formatEpisodes(durableEpisodes, tokenBudget);
4850
const episodeTokens = this.estimateTokens(episodeSection);
49-
sections.push(episodeSection);
50-
tokenBudget -= episodeTokens;
51+
if (episodeSection) {
52+
sections.push(episodeSection);
53+
tokenBudget -= episodeTokens;
54+
}
5155
}
5256

5357
// Relevant procedures
@@ -70,6 +74,8 @@ export class MemoryContextBuilder {
7074
}
7175

7276
private formatEpisodes(episodes: Episode[], tokenBudget: number): string {
77+
if (episodes.length === 0) return "";
78+
7379
const header = "## Recent Memories\n";
7480
let content = header;
7581
const maxChars = tokenBudget * CHARS_PER_TOKEN;

src/memory/episodic.ts

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type { MemoryConfig } from "../config/types.ts";
22
import { type EmbeddingClient, textToSparseVector } from "./embeddings.ts";
33
import type { QdrantClient } from "./qdrant-client.ts";
4+
import { calculateEpisodeRecallScore } from "./ranking.ts";
45
import type { Episode, QdrantSearchResult, RecallOptions } from "./types.ts";
56

67
const COLLECTION_SCHEMA = {
@@ -128,6 +129,7 @@ export class EpisodicStore {
128129
for (const id of ids) {
129130
try {
130131
await this.qdrant.updatePayload(this.collectionName, id, {
132+
access_count: { $inc: 1 },
131133
last_accessed_at: new Date().toISOString(),
132134
});
133135
} catch {
@@ -165,34 +167,23 @@ export class EpisodicStore {
165167
return { must };
166168
}
167169

168-
private applyStrategy(results: QdrantSearchResult[], strategy: string): QdrantSearchResult[] {
169-
const now = Date.now();
170-
170+
private applyStrategy(results: QdrantSearchResult[], strategy: RecallOptions["strategy"]): QdrantSearchResult[] {
171171
return results
172172
.map((r) => {
173-
const startedAt = (r.payload.started_at as number) ?? 0;
174-
const importance = (r.payload.importance as number) ?? 0.5;
175-
const hoursSince = (now - startedAt) / (1000 * 60 * 60);
176-
const recencyScore = Math.exp(-0.01 * hoursSince);
177-
178-
let finalScore: number;
179-
switch (strategy) {
180-
case "similarity":
181-
finalScore = r.score * 0.7 + importance * 0.2 + recencyScore * 0.1;
182-
break;
183-
case "temporal":
184-
finalScore = recencyScore * 0.7 + r.score * 0.2 + importance * 0.1;
185-
break;
186-
case "metadata":
187-
finalScore = r.score * 0.5 + recencyScore * 0.3 + importance * 0.2;
188-
break;
189-
default:
190-
// recency-biased (default)
191-
finalScore = r.score * 0.4 + recencyScore * 0.4 + importance * 0.2;
192-
break;
193-
}
194-
195-
return { ...r, score: finalScore };
173+
return {
174+
...r,
175+
score: calculateEpisodeRecallScore(
176+
r.score,
177+
{
178+
importance: (r.payload.importance as number) ?? 0.5,
179+
accessCount: (r.payload.access_count as number) ?? 0,
180+
startedAt: (r.payload.started_at as number) ?? 0,
181+
lastAccessedAt: (r.payload.last_accessed_at as string | undefined) ?? undefined,
182+
decayRate: (r.payload.decay_rate as number) ?? 1,
183+
},
184+
strategy,
185+
),
186+
};
196187
})
197188
.sort((a, b) => b.score - a.score);
198189
}

0 commit comments

Comments
 (0)