Merge pull request #2 from coe0718/memory-ranking-reinforcement-decay

mcheemaa · web-flow · commit bbf89ef3c446 · 2026-03-30T21:52:11.000-07:00
Improve episodic memory ranking with reinforcement and decay
diff --git a/docs/memory.md b/docs/memory.md
@@ -17,6 +17,8 @@ Session transcripts stored as embeddings. Each episode contains:
 
 Search: "What happened last time I worked on the auth service?"
 
+Episode ranking is not raw vector score alone. Retrieval blends semantic match with importance, reinforcement from repeated access, and decay over time so durable memories stay available while stale one-off memories fade.
+
 ### Tier 2: Semantic Memory
 
 Accumulated facts with contradiction detection and temporal validity:
@@ -50,7 +52,8 @@ Before each agent invocation, the context builder:
 3. Searches semantic memory (top 20 facts)
 4. Searches procedural memory (top 5 procedures)
 5. Budgets results to fit within the token limit (default: 50,000 tokens)
-6. Formats results into the memory section of the system prompt
+6. Filters out stale, low-signal episodic memories before prompt injection
+7. Formats results into the memory section of the system prompt
 
 ## Consolidation
 
diff --git a/src/memory/__tests__/context-builder.test.ts b/src/memory/__tests__/context-builder.test.ts
@@ -124,6 +124,61 @@ describe("MemoryContextBuilder", () => {
 		expect(result).toContain("success");
 	});
 
+	test("filters stale low-signal episodes from prompt context", async () => {
+		const memory = createMockMemorySystem({
+			episodes: Promise.resolve([
+				{
+					id: "stale-ep",
+					type: "task" as const,
+					summary: "One-off stale note",
+					detail: "No longer important",
+					parent_id: null,
+					session_id: "s1",
+					user_id: "u1",
+					tools_used: [],
+					files_touched: [],
+					outcome: "success" as const,
+					outcome_detail: "",
+					lessons: [],
+					started_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
+					ended_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
+					duration_seconds: 300,
+					importance: 0.2,
+					access_count: 0,
+					last_accessed_at: new Date(Date.now() - 90 * 24 * 3600 * 1000).toISOString(),
+					decay_rate: 1.0,
+				},
+				{
+					id: "durable-ep",
+					type: "task" as const,
+					summary: "Repeated deployment pattern",
+					detail: "Still referenced often",
+					parent_id: null,
+					session_id: "s2",
+					user_id: "u1",
+					tools_used: ["Bash"],
+					files_touched: [],
+					outcome: "success" as const,
+					outcome_detail: "",
+					lessons: [],
+					started_at: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
+					ended_at: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
+					duration_seconds: 300,
+					importance: 0.8,
+					access_count: 4,
+					last_accessed_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
+					decay_rate: 1.0,
+				},
+			]),
+		});
+
+		const builder = new MemoryContextBuilder(memory, TEST_CONFIG);
+		const result = await builder.build("deployment");
+
+		expect(result).toContain("Repeated deployment pattern");
+		expect(result).not.toContain("One-off stale note");
+	});
+
 	test("formats procedure section correctly", async () => {
 		const memory = createMockMemorySystem({
 			procedure: Promise.resolve({
diff --git a/src/memory/__tests__/episodic.test.ts b/src/memory/__tests__/episodic.test.ts
@@ -234,4 +234,66 @@ describe("EpisodicStore", () => {
 		expect(episodes[0].id).toBe("new-ep");
 		expect(episodes[1].id).toBe("old-ep");
 	});
+
+	test("recall() metadata strategy favors reinforced memories", async () => {
+		const vec = make768dVector();
+		const now = Date.now();
+
+		globalThis.fetch = mock((url: string | Request) => {
+			const urlStr = typeof url === "string" ? url : url.url;
+
+			if (urlStr.includes("/api/embed")) {
+				return Promise.resolve(new Response(JSON.stringify({ embeddings: [vec] }), { status: 200 }));
+			}
+
+			if (urlStr.includes("/points/query")) {
+				return Promise.resolve(
+					new Response(
+						JSON.stringify({
+							result: {
+								points: [
+									{
+										id: "stale-ep",
+										score: 0.82,
+										payload: {
+											type: "task",
+											summary: "Stale one-off episode",
+											importance: 0.3,
+											access_count: 0,
+											last_accessed_at: new Date(now - 45 * 24 * 3600 * 1000).toISOString(),
+											started_at: now - 45 * 24 * 3600 * 1000,
+										},
+									},
+									{
+										id: "durable-ep",
+										score: 0.7,
+										payload: {
+											type: "task",
+											summary: "Frequently reused deployment memory",
+											importance: 0.8,
+											access_count: 6,
+											last_accessed_at: new Date(now - 2 * 24 * 3600 * 1000).toISOString(),
+											started_at: now - 45 * 24 * 3600 * 1000,
+										},
+									},
+								],
+							},
+						}),
+						{ status: 200, headers: { "Content-Type": "application/json" } },
+					),
+				);
+			}
+
+			return Promise.resolve(new Response(JSON.stringify({ status: "ok" }), { status: 200 }));
+		}) as unknown as typeof fetch;
+
+		const qdrant = new QdrantClient(TEST_CONFIG);
+		const embedder = new EmbeddingClient(TEST_CONFIG);
+		const store = new EpisodicStore(qdrant, embedder, TEST_CONFIG);
+
+		const episodes = await store.recall("deployment", { strategy: "metadata" });
+
+		expect(episodes[0].id).toBe("durable-ep");
+		expect(episodes[1].id).toBe("stale-ep");
+	});
 });
diff --git a/src/memory/__tests__/ranking.test.ts b/src/memory/__tests__/ranking.test.ts
@@ -0,0 +1,78 @@
+import { describe, expect, test } from "bun:test";
+import { calculateEpisodeRecallScore, shouldIncludeEpisodeInContext } from "../ranking.ts";
+import type { Episode } from "../types.ts";
+
+function makeEpisode(overrides?: Partial<Episode>): Episode {
+	return {
+		id: "ep-1",
+		type: "task",
+		summary: "Memory summary",
+		detail: "Memory detail",
+		parent_id: null,
+		session_id: "session-1",
+		user_id: "user-1",
+		tools_used: [],
+		files_touched: [],
+		outcome: "success",
+		outcome_detail: "Completed successfully",
+		lessons: [],
+		started_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
+		ended_at: new Date().toISOString(),
+		duration_seconds: 60,
+		importance: 0.6,
+		access_count: 0,
+		last_accessed_at: new Date().toISOString(),
+		decay_rate: 1,
+		...overrides,
+	};
+}
+
+describe("memory ranking", () => {
+	test("metadata strategy rewards reinforced memories", () => {
+		const staleWeak = calculateEpisodeRecallScore(
+			0.82,
+			{
+				importance: 0.3,
+				accessCount: 0,
+				startedAt: Date.now() - 45 * 24 * 3600 * 1000,
+				lastAccessedAt: new Date(Date.now() - 45 * 24 * 3600 * 1000).toISOString(),
+				decayRate: 1,
+			},
+			"metadata",
+		);
+
+		const durableRepeat = calculateEpisodeRecallScore(
+			0.7,
+			{
+				importance: 0.8,
+				accessCount: 6,
+				startedAt: Date.now() - 45 * 24 * 3600 * 1000,
+				lastAccessedAt: new Date(Date.now() - 2 * 24 * 3600 * 1000).toISOString(),
+				decayRate: 1,
+			},
+			"metadata",
+		);
+
+		expect(durableRepeat).toBeGreaterThan(staleWeak);
+	});
+
+	test("context filtering drops stale low-signal memories", () => {
+		const staleWeak = makeEpisode({
+			importance: 0.2,
+			access_count: 0,
+			started_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
+			last_accessed_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
+		});
+
+		const durableRepeat = makeEpisode({
+			id: "ep-2",
+			importance: 0.85,
+			access_count: 5,
+			started_at: new Date(Date.now() - 60 * 24 * 3600 * 1000).toISOString(),
+			last_accessed_at: new Date(Date.now() - 24 * 3600 * 1000).toISOString(),
+		});
+
+		expect(shouldIncludeEpisodeInContext(staleWeak)).toBe(false);
+		expect(shouldIncludeEpisodeInContext(durableRepeat)).toBe(true);
+	});
+});
diff --git a/src/memory/context-builder.ts b/src/memory/context-builder.ts
@@ -1,4 +1,5 @@
 import type { MemoryConfig } from "../config/types.ts";
+import { shouldIncludeEpisodeInContext } from "./ranking.ts";
 import type { MemorySystem } from "./system.ts";
 import type { Episode, Procedure, SemanticFact } from "./types.ts";
 
@@ -44,10 +45,13 @@ export class MemoryContextBuilder {
 
 		// Recent memories provide episode context
 		if (episodes.length > 0 && tokenBudget > 500) {
-			const episodeSection = this.formatEpisodes(episodes, tokenBudget);
+			const durableEpisodes = episodes.filter(shouldIncludeEpisodeInContext);
+			const episodeSection = this.formatEpisodes(durableEpisodes, tokenBudget);
 			const episodeTokens = this.estimateTokens(episodeSection);
-			sections.push(episodeSection);
-			tokenBudget -= episodeTokens;
+			if (episodeSection) {
+				sections.push(episodeSection);
+				tokenBudget -= episodeTokens;
+			}
 		}
 
 		// Relevant procedures
@@ -70,6 +74,8 @@ export class MemoryContextBuilder {
 	}
 
 	private formatEpisodes(episodes: Episode[], tokenBudget: number): string {
+		if (episodes.length === 0) return "";
+
 		const header = "## Recent Memories\n";
 		let content = header;
 		const maxChars = tokenBudget * CHARS_PER_TOKEN;
diff --git a/src/memory/episodic.ts b/src/memory/episodic.ts
@@ -1,6 +1,7 @@
 import type { MemoryConfig } from "../config/types.ts";
 import { type EmbeddingClient, textToSparseVector } from "./embeddings.ts";
 import type { QdrantClient } from "./qdrant-client.ts";
+import { calculateEpisodeRecallScore } from "./ranking.ts";
 import type { Episode, QdrantSearchResult, RecallOptions } from "./types.ts";
 
 const COLLECTION_SCHEMA = {
@@ -128,6 +129,7 @@ export class EpisodicStore {
 		for (const id of ids) {
 			try {
 				await this.qdrant.updatePayload(this.collectionName, id, {
+					access_count: { $inc: 1 },
 					last_accessed_at: new Date().toISOString(),
 				});
 			} catch {
@@ -165,34 +167,23 @@ export class EpisodicStore {
 		return { must };
 	}
 
-	private applyStrategy(results: QdrantSearchResult[], strategy: string): QdrantSearchResult[] {
-		const now = Date.now();
-
+	private applyStrategy(results: QdrantSearchResult[], strategy: RecallOptions["strategy"]): QdrantSearchResult[] {
 		return results
 			.map((r) => {
-				const startedAt = (r.payload.started_at as number) ?? 0;
-				const importance = (r.payload.importance as number) ?? 0.5;
-				const hoursSince = (now - startedAt) / (1000 * 60 * 60);
-				const recencyScore = Math.exp(-0.01 * hoursSince);
-
-				let finalScore: number;
-				switch (strategy) {
-					case "similarity":
-						finalScore = r.score * 0.7 + importance * 0.2 + recencyScore * 0.1;
-						break;
-					case "temporal":
-						finalScore = recencyScore * 0.7 + r.score * 0.2 + importance * 0.1;
-						break;
-					case "metadata":
-						finalScore = r.score * 0.5 + recencyScore * 0.3 + importance * 0.2;
-						break;
-					default:
-						// recency-biased (default)
-						finalScore = r.score * 0.4 + recencyScore * 0.4 + importance * 0.2;
-						break;
-				}
-
-				return { ...r, score: finalScore };
+				return {
+					...r,
+					score: calculateEpisodeRecallScore(
+						r.score,
+						{
+							importance: (r.payload.importance as number) ?? 0.5,
+							accessCount: (r.payload.access_count as number) ?? 0,
+							startedAt: (r.payload.started_at as number) ?? 0,
+							lastAccessedAt: (r.payload.last_accessed_at as string | undefined) ?? undefined,
+							decayRate: (r.payload.decay_rate as number) ?? 1,
+						},
+						strategy,
+					),
+				};
 			})
 			.sort((a, b) => b.score - a.score);
 	}
diff --git a/src/memory/ranking.ts b/src/memory/ranking.ts