diff --git a/config.schema.json b/config.schema.json index 8688cd3..a9c502c 100644 --- a/config.schema.json +++ b/config.schema.json @@ -13,7 +13,7 @@ "type": "number", "minimum": 0.01, "maximum": 0.5, - "default": 0.10, + "default": 0.1, "description": "Angular distance threshold for cluster assignment" }, "minClusterSize": { diff --git a/docker-compose.yml b/docker-compose.yml index 6f27595..e14721f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,10 +18,16 @@ services: # - CAUSANTIC_ANTHROPIC_KEY=${CAUSANTIC_ANTHROPIC_KEY} ports: # HTTP port for web dashboard (when implemented) - - "3000:3000" + - '3000:3000' restart: unless-stopped healthcheck: - test: ["CMD", "node", "-e", "require('./dist/storage/db.js').getDatabase().prepare('SELECT 1').get()"] + test: + [ + 'CMD', + 'node', + '-e', + "require('./dist/storage/db.js').getDatabase().prepare('SELECT 1').get()", + ] interval: 30s timeout: 10s retries: 3 @@ -39,7 +45,7 @@ services: environment: - CAUSANTIC_STORAGE_DB_PATH=/data/causantic/memory.db - CAUSANTIC_STORAGE_VECTOR_PATH=/data/causantic/vectors - command: ["node", "dist/cli/index.js", "maintenance", "daemon"] + command: ['node', 'dist/cli/index.js', 'maintenance', 'daemon'] restart: unless-stopped depends_on: - causantic diff --git a/package.json b/package.json index 2c1a7e7..ac18155 100644 --- a/package.json +++ b/package.json @@ -55,8 +55,8 @@ "test:watch": "vitest", "lint": "eslint src/ test/", "lint:fix": "eslint src/ test/ --fix", - "format": "prettier --write 'src/**/*.ts' 'test/**/*.ts'", - "format:check": "prettier --check 'src/**/*.ts' 'test/**/*.ts'", + "format": "prettier --write 'src/**/*.{ts,tsx,css}' 'test/**/*.ts' 'scripts/**/*.ts' config.schema.json docker-compose.yml", + "format:check": "prettier --check 'src/**/*.{ts,tsx,css}' 'test/**/*.ts' 'scripts/**/*.ts' config.schema.json docker-compose.yml", "ingest": "tsx scripts/ops/ingest.ts", "batch-ingest": "tsx scripts/ops/batch-ingest.ts", "recall": "tsx scripts/ops/recall.ts", diff --git a/scripts/benchmarks/hdbscan-benchmark.ts b/scripts/benchmarks/hdbscan-benchmark.ts index 489dedb..2a44d57 100644 --- a/scripts/benchmarks/hdbscan-benchmark.ts +++ b/scripts/benchmarks/hdbscan-benchmark.ts @@ -108,7 +108,7 @@ async function runBenchmarks() { } console.log( - `| ${size.toString().padEnd(4)} | ${String(nativeParallel.ms).padEnd(17)}ms | ${String(nativeSingle.ms).padEnd(15)}ms | ${String(oldLib.ms).padEnd(10)}ms |` + `| ${size.toString().padEnd(4)} | ${String(nativeParallel.ms).padEnd(17)}ms | ${String(nativeSingle.ms).padEnd(15)}ms | ${String(oldLib.ms).padEnd(10)}ms |`, ); } @@ -155,7 +155,9 @@ async function runBenchmarks() { console.log(`Approximate k-NN: ${approxResult.ms}ms`); console.log('\n--- Cluster Quality Comparison ---'); - console.log(`Native clusters: ${defaultResult.value.numClusters}, noise: ${defaultResult.value.noiseCount}`); + console.log( + `Native clusters: ${defaultResult.value.numClusters}, noise: ${defaultResult.value.noiseCount}`, + ); try { const oldH = new OldHDBSCAN({ minClusterSize: 4, minSamples: 4 }); diff --git a/scripts/debug/compare-retrieval.ts b/scripts/debug/compare-retrieval.ts index c01f220..7a5246b 100644 --- a/scripts/debug/compare-retrieval.ts +++ b/scripts/debug/compare-retrieval.ts @@ -29,14 +29,14 @@ async function main() { const chunk = getChunkById(r.id); const preview = chunk?.content.slice(0, 100).replace(/\n/g, ' ') || '(no content)'; const similarity = (1 - r.distance).toFixed(3); - console.log((i + 1) + '. [sim=' + similarity + '] ' + (chunk?.sessionSlug || 'unknown')); + console.log(i + 1 + '. [sim=' + similarity + '] ' + (chunk?.sessionSlug || 'unknown')); console.log(' ' + preview + '...'); } console.log('\n' + '='.repeat(80)); console.log('\n## GRAPH TRAVERSAL ADDITIONS\n'); - const startIds = vectorResults.map(r => r.id); + const startIds = vectorResults.map((r) => r.id); // Uses config defaults: maxDepth=50, minWeight=0.01 const backwardResult = traverseMultiple(startIds, { @@ -47,11 +47,23 @@ async function main() { direction: 'forward', }); - const backwardAdditions = backwardResult.chunks.filter(c => !vectorChunkIds.has(c.chunkId)); - const forwardAdditions = forwardResult.chunks.filter(c => !vectorChunkIds.has(c.chunkId)); - - console.log('Backward traversal: ' + backwardResult.chunks.length + ' total, ' + backwardAdditions.length + ' NEW'); - console.log('Forward traversal: ' + forwardResult.chunks.length + ' total, ' + forwardAdditions.length + ' NEW'); + const backwardAdditions = backwardResult.chunks.filter((c) => !vectorChunkIds.has(c.chunkId)); + const forwardAdditions = forwardResult.chunks.filter((c) => !vectorChunkIds.has(c.chunkId)); + + console.log( + 'Backward traversal: ' + + backwardResult.chunks.length + + ' total, ' + + backwardAdditions.length + + ' NEW', + ); + console.log( + 'Forward traversal: ' + + forwardResult.chunks.length + + ' total, ' + + forwardAdditions.length + + ' NEW', + ); if (backwardAdditions.length > 0) { console.log('\n### Top 5 Backward Additions (context that LED TO matches):\n'); @@ -59,7 +71,9 @@ async function main() { const c = backwardAdditions[i]; const chunk = getChunkById(c.chunkId); const preview = chunk?.content.slice(0, 120).replace(/\n/g, ' ') || ''; - console.log((i + 1) + '. [w=' + c.weight.toFixed(3) + ' d=' + c.depth + '] ' + (chunk?.sessionSlug || '')); + console.log( + i + 1 + '. [w=' + c.weight.toFixed(3) + ' d=' + c.depth + '] ' + (chunk?.sessionSlug || ''), + ); console.log(' ' + preview + '...'); } } @@ -70,7 +84,9 @@ async function main() { const c = forwardAdditions[i]; const chunk = getChunkById(c.chunkId); const preview = chunk?.content.slice(0, 120).replace(/\n/g, ' ') || ''; - console.log((i + 1) + '. [w=' + c.weight.toFixed(3) + ' d=' + c.depth + '] ' + (chunk?.sessionSlug || '')); + console.log( + i + 1 + '. [w=' + c.weight.toFixed(3) + ' d=' + c.depth + '] ' + (chunk?.sessionSlug || ''), + ); console.log(' ' + preview + '...'); } } @@ -81,7 +97,13 @@ async function main() { console.log('Vector search alone: ' + vectorResults.length + ' chunks'); console.log('+ Backward traversal: ' + backwardAdditions.length + ' additional'); console.log('+ Forward traversal: ' + forwardAdditions.length + ' additional'); - console.log('Graph added: ' + totalAdded + ' chunks (' + Math.round(totalAdded / vectorResults.length * 100) + '% increase)'); + console.log( + 'Graph added: ' + + totalAdded + + ' chunks (' + + Math.round((totalAdded / vectorResults.length) * 100) + + '% increase)', + ); await embedder.dispose(); closeDb(); diff --git a/scripts/debug/debug-forward.ts b/scripts/debug/debug-forward.ts index df0e21e..86d16f2 100644 --- a/scripts/debug/debug-forward.ts +++ b/scripts/debug/debug-forward.ts @@ -14,7 +14,7 @@ async function debug(query: string) { // Vector search const vectorResults = await vectorStore.search(embedding, 20); - const vectorIds = new Set(vectorResults.map(r => r.id)); + const vectorIds = new Set(vectorResults.map((r) => r.id)); console.log(`\n=== Query: "${query}" ===\n`); console.log(`Vector search found: ${vectorResults.length} chunks\n`); @@ -35,15 +35,17 @@ async function debug(query: string) { totalForwardTargets++; if (vectorIds.has(edge.targetChunkId)) { targetsAlreadyInVectorSearch++; - console.log(` -> Target ${edge.targetChunkId.slice(0,30)}... ALREADY in vector search`); + console.log(` -> Target ${edge.targetChunkId.slice(0, 30)}... ALREADY in vector search`); } else { // Check if it was filtered by decay - const weighted = weightedForward.find(w => w.targetChunkId === edge.targetChunkId); + const weighted = weightedForward.find((w) => w.targetChunkId === edge.targetChunkId); if (!weighted) { targetsWithZeroWeight++; - console.log(` -> Target ${edge.targetChunkId.slice(0,30)}... FILTERED (zero weight)`); + console.log(` -> Target ${edge.targetChunkId.slice(0, 30)}... FILTERED (zero weight)`); } else { - console.log(` -> Target ${edge.targetChunkId.slice(0,30)}... weight=${weighted.weight.toFixed(3)} SHOULD BE ADDED`); + console.log( + ` -> Target ${edge.targetChunkId.slice(0, 30)}... weight=${weighted.weight.toFixed(3)} SHOULD BE ADDED`, + ); } } } @@ -54,7 +56,9 @@ async function debug(query: string) { console.log(`Total forward edge targets: ${totalForwardTargets}`); console.log(`Already in vector search: ${targetsAlreadyInVectorSearch}`); console.log(`Filtered by decay: ${targetsWithZeroWeight}`); - console.log(`Should be added: ${totalForwardTargets - targetsAlreadyInVectorSearch - targetsWithZeroWeight}`); + console.log( + `Should be added: ${totalForwardTargets - targetsAlreadyInVectorSearch - targetsWithZeroWeight}`, + ); await embedder.dispose(); } diff --git a/scripts/debug/debug-traversal.ts b/scripts/debug/debug-traversal.ts index 46cc921..fea87ca 100644 --- a/scripts/debug/debug-traversal.ts +++ b/scripts/debug/debug-traversal.ts @@ -9,50 +9,66 @@ async function debug() { const query = process.argv[2] || 'git commit workflow'; const embedder = new Embedder(); await embedder.load(getModel('jina-small')); - + const { embedding } = await embedder.embed(query, true); const results = await vectorStore.search(embedding, 10); - + console.log('Query: "' + query + '"\n'); console.log('Vector search results and their edges:\n'); - + const allTargets = new Set(); - const vectorIds = new Set(results.map(r => r.id)); - + const vectorIds = new Set(results.map((r) => r.id)); + for (const r of results) { const chunk = getChunkById(r.id); const backEdges = getOutgoingEdges(r.id, 'backward'); const fwdEdges = getOutgoingEdges(r.id, 'forward'); - + console.log('Chunk: ' + r.id.slice(-25)); console.log(' Session: ' + chunk?.sessionSlug); console.log(' Sim: ' + (1 - r.distance).toFixed(3)); console.log(' Backward edges: ' + backEdges.length); console.log(' Forward edges: ' + fwdEdges.length); - + if (backEdges.length > 0 || fwdEdges.length > 0) { for (const e of backEdges) { allTargets.add(e.targetChunkId); const inVector = vectorIds.has(e.targetChunkId) ? ' (OVERLAP)' : ''; - console.log(' BACK -> ' + e.targetChunkId.slice(-20) + ' w=' + e.initialWeight.toFixed(2) + ' links=' + e.linkCount + inVector); + console.log( + ' BACK -> ' + + e.targetChunkId.slice(-20) + + ' w=' + + e.initialWeight.toFixed(2) + + ' links=' + + e.linkCount + + inVector, + ); } for (const e of fwdEdges) { allTargets.add(e.targetChunkId); const inVector = vectorIds.has(e.targetChunkId) ? ' (OVERLAP)' : ''; - console.log(' FWD -> ' + e.targetChunkId.slice(-20) + ' w=' + e.initialWeight.toFixed(2) + ' links=' + e.linkCount + inVector); + console.log( + ' FWD -> ' + + e.targetChunkId.slice(-20) + + ' w=' + + e.initialWeight.toFixed(2) + + ' links=' + + e.linkCount + + inVector, + ); } } console.log(''); } - - const overlapping = [...allTargets].filter(t => vectorIds.has(t)).length; + + const overlapping = [...allTargets].filter((t) => vectorIds.has(t)).length; const newTargets = allTargets.size - overlapping; console.log('='.repeat(60)); console.log('Total edge targets: ' + allTargets.size); console.log(' Already in vector results: ' + overlapping); console.log(' NEW (added by traversal): ' + newTargets); - + await embedder.dispose(); closeDb(); } diff --git a/scripts/debug/investigate-traversal.ts b/scripts/debug/investigate-traversal.ts index d91b4f3..79abb65 100644 --- a/scripts/debug/investigate-traversal.ts +++ b/scripts/debug/investigate-traversal.ts @@ -37,9 +37,9 @@ interface QueryResult { async function analyzeQuery(embedder: Embedder, query: string): Promise { const { embedding } = await embedder.embed(query, true); const vectorResults = await vectorStore.search(embedding, 10); - - const vectorChunkIds = new Set(vectorResults.map(r => r.id)); - + + const vectorChunkIds = new Set(vectorResults.map((r) => r.id)); + // Analyze edge density on vector results let totalBackEdges = 0; let totalFwdEdges = 0; @@ -50,14 +50,14 @@ async function analyzeQuery(embedder: Embedder, query: string): Promise 0) chunksWithBack++; if (fwdEdges.length > 0) chunksWithFwd++; } - - const startIds = vectorResults.map(r => r.id); + + const startIds = vectorResults.map((r) => r.id); // Traverse (uses config defaults: maxDepth=50, minWeight=0.01) const backwardResult = traverseMultiple(startIds, { @@ -67,10 +67,10 @@ async function analyzeQuery(embedder: Embedder, query: string): Promise !vectorChunkIds.has(c.chunkId)).length; - const forwardAdded = forwardResult.chunks.filter(c => !vectorChunkIds.has(c.chunkId)).length; - + + const backwardAdded = backwardResult.chunks.filter((c) => !vectorChunkIds.has(c.chunkId)).length; + const forwardAdded = forwardResult.chunks.filter((c) => !vectorChunkIds.has(c.chunkId)).length; + return { query, vectorCount: vectorResults.length, @@ -85,23 +85,29 @@ async function analyzeQuery(embedder: Embedder, query: string): Promise; - + `, + ) + .all() as Array<{ back_count: number; chunks: number }>; + console.log('\nBackward edge distribution (edges per chunk):'); for (const d of chunkEdgeDist) { console.log(' ' + d.back_count + ' edges: ' + d.chunks + ' chunks'); } - + // Reference type distribution - const refTypes = db.prepare(` + const refTypes = db + .prepare( + ` SELECT reference_type, COUNT(*) as count FROM edges WHERE reference_type IS NOT NULL GROUP BY reference_type ORDER BY count DESC - `).all() as Array<{reference_type: string, count: number}>; - + `, + ) + .all() as Array<{ reference_type: string; count: number }>; + console.log('\nEdge reference types:'); for (const r of refTypes) { console.log(' ' + r.reference_type + ': ' + r.count); } - } async function main() { console.log('='.repeat(80)); console.log('GRAPH TRAVERSAL INVESTIGATION'); console.log('='.repeat(80)); - + const embedder = new Embedder(); await embedder.load(getModel('jina-small')); - + console.log('\nRunning ' + TEST_QUERIES.length + ' test queries...\n'); - + const results: QueryResult[] = []; - + for (const query of TEST_QUERIES) { process.stdout.write('Testing: ' + query.slice(0, 40) + '... '); const result = await analyzeQuery(embedder, query); results.push(result); console.log('done'); } - + // Print results table console.log('\n' + '='.repeat(80)); console.log('QUERY RESULTS'); console.log('='.repeat(80)); console.log('\nQuery | Vec | +Back | +Fwd | Back/Ch | Fwd/Ch'); console.log('-'.repeat(80)); - + for (const r of results) { const queryShort = r.query.slice(0, 34).padEnd(34); console.log( - queryShort + ' | ' + - String(r.vectorCount).padStart(3) + ' | ' + - String(r.backwardAdded).padStart(5) + ' | ' + - String(r.forwardAdded).padStart(4) + ' | ' + - r.avgBackwardEdgesPerChunk.toFixed(1).padStart(7) + ' | ' + - r.avgForwardEdgesPerChunk.toFixed(1).padStart(6) + queryShort + + ' | ' + + String(r.vectorCount).padStart(3) + + ' | ' + + String(r.backwardAdded).padStart(5) + + ' | ' + + String(r.forwardAdded).padStart(4) + + ' | ' + + r.avgBackwardEdgesPerChunk.toFixed(1).padStart(7) + + ' | ' + + r.avgForwardEdgesPerChunk.toFixed(1).padStart(6), ); } - + // Summary statistics console.log('\n' + '='.repeat(80)); console.log('SUMMARY STATISTICS'); console.log('='.repeat(80)); - + const totalBackward = results.reduce((s, r) => s + r.backwardAdded, 0); const totalForward = results.reduce((s, r) => s + r.forwardAdded, 0); const totalVector = results.reduce((s, r) => s + r.vectorCount, 0); const avgBackEdges = results.reduce((s, r) => s + r.avgBackwardEdgesPerChunk, 0) / results.length; const avgFwdEdges = results.reduce((s, r) => s + r.avgForwardEdgesPerChunk, 0) / results.length; - const queriesWithBackward = results.filter(r => r.backwardAdded > 0).length; - const queriesWithForward = results.filter(r => r.forwardAdded > 0).length; - + const queriesWithBackward = results.filter((r) => r.backwardAdded > 0).length; + const queriesWithForward = results.filter((r) => r.forwardAdded > 0).length; + console.log('\nAcross ' + results.length + ' queries:'); console.log(' Total chunks from vector search: ' + totalVector); - console.log(' Total chunks from backward: ' + totalBackward + ' (' + (totalBackward/totalVector*100).toFixed(1) + '% increase)'); - console.log(' Total chunks from forward: ' + totalForward + ' (' + (totalForward/totalVector*100).toFixed(1) + '% increase)'); - console.log(' Combined graph contribution: ' + (totalBackward + totalForward) + ' (' + ((totalBackward+totalForward)/totalVector*100).toFixed(1) + '% increase)'); + console.log( + ' Total chunks from backward: ' + + totalBackward + + ' (' + + ((totalBackward / totalVector) * 100).toFixed(1) + + '% increase)', + ); + console.log( + ' Total chunks from forward: ' + + totalForward + + ' (' + + ((totalForward / totalVector) * 100).toFixed(1) + + '% increase)', + ); + console.log( + ' Combined graph contribution: ' + + (totalBackward + totalForward) + + ' (' + + (((totalBackward + totalForward) / totalVector) * 100).toFixed(1) + + '% increase)', + ); console.log(''); console.log(' Queries with backward additions: ' + queriesWithBackward + '/' + results.length); console.log(' Queries with forward additions: ' + queriesWithForward + '/' + results.length); console.log(''); console.log(' Avg backward edges per chunk: ' + avgBackEdges.toFixed(2)); console.log(' Avg forward edges per chunk: ' + avgFwdEdges.toFixed(2)); - + // Global analysis await globalEdgeAnalysis(); - + await embedder.dispose(); closeDb(); } diff --git a/scripts/experiments/cross-project-experiment.ts b/scripts/experiments/cross-project-experiment.ts index 7ff1879..3a7228c 100644 --- a/scripts/experiments/cross-project-experiment.ts +++ b/scripts/experiments/cross-project-experiment.ts @@ -41,12 +41,16 @@ const GENERIC_QUERIES = [ async function extractProjectQueries(projectSlug: string, limit: number = 5): Promise { const db = getDb(); - const chunks = db.prepare(` + const chunks = db + .prepare( + ` SELECT content FROM chunks WHERE session_slug LIKE ? ORDER BY RANDOM() LIMIT 20 - `).all(`%${projectSlug}%`) as Array<{content: string}>; + `, + ) + .all(`%${projectSlug}%`) as Array<{ content: string }>; const queries: string[] = []; @@ -58,7 +62,9 @@ async function extractProjectQueries(projectSlug: string, limit: number = 5): Pr const codeMatches = chunk.content.match(/(?:function|class|const|def)\s+(\w+)/g); if (codeMatches) { - queries.push(...codeMatches.slice(0, 2).map(m => m.replace(/^(function|class|const|def)\s+/, ''))); + queries.push( + ...codeMatches.slice(0, 2).map((m) => m.replace(/^(function|class|const|def)\s+/, '')), + ); } if (queries.length >= limit) break; @@ -88,17 +94,21 @@ interface ProjectResult { async function analyzeProject( embedder: Embedder, projectSlug: string, - queries: string[] + queries: string[], ): Promise { const db = getDb(); - const stats = db.prepare(` + const stats = db + .prepare( + ` SELECT COUNT(DISTINCT session_slug) as sessions, COUNT(*) as chunks FROM chunks WHERE session_slug LIKE ? - `).get(`%${projectSlug}%`) as {sessions: number, chunks: number}; + `, + ) + .get(`%${projectSlug}%`) as { sessions: number; chunks: number }; const queryResults: ProjectResult['queryResults'] = []; let totalVectorResults = 0; @@ -114,8 +124,8 @@ async function analyzeProject( if (vectorResults.length === 0) continue; - const vectorChunkIds = new Set(vectorResults.map(r => r.id)); - const startIds = vectorResults.map(r => r.id); + const vectorChunkIds = new Set(vectorResults.map((r) => r.id)); + const startIds = vectorResults.map((r) => r.id); // Walk chains backward + forward from seeds const backwardChains = await walkChains(startIds, { @@ -142,7 +152,7 @@ async function analyzeProject( } } - const chainAdditions = [...allChainChunkIds].filter(id => !vectorChunkIds.has(id)).length; + const chainAdditions = [...allChainChunkIds].filter((id) => !vectorChunkIds.has(id)).length; queryResults.push({ query: query.slice(0, 50), @@ -185,12 +195,16 @@ async function main() { console.log('='.repeat(100)); console.log('CROSS-PROJECT CHAIN WALKING AUGMENTATION EXPERIMENT (v0.3)'); console.log('='.repeat(100)); - console.log('\nMethodology: vector search → chain walk (backward + forward) → count additional chunks'); + console.log( + '\nMethodology: vector search → chain walk (backward + forward) → count additional chunks', + ); console.log('Comparable to v0.2 experiment (traverseMultiple → sum-product graph traversal)'); const db = getDb(); - const projects = db.prepare(` + const projects = db + .prepare( + ` SELECT session_slug, COUNT(*) as chunk_count @@ -198,7 +212,9 @@ async function main() { GROUP BY session_slug HAVING chunk_count >= 20 ORDER BY chunk_count DESC - `).all() as Array<{session_slug: string, chunk_count: number}>; + `, + ) + .all() as Array<{ session_slug: string; chunk_count: number }>; const projectMap = new Map(); for (const p of projects) { @@ -230,7 +246,9 @@ async function main() { const result = await analyzeProject(embedder, project, allQueries); results.push(result); - console.log(`${result.augmentationRatio.toFixed(2)}x (${result.queryCount} queries, avg chain: ${result.avgChainLength.toFixed(1)})`); + console.log( + `${result.augmentationRatio.toFixed(2)}x (${result.queryCount} queries, avg chain: ${result.avgChainLength.toFixed(1)})`, + ); } // Print detailed results @@ -238,21 +256,32 @@ async function main() { console.log('RESULTS BY PROJECT'); console.log('='.repeat(120)); - console.log('\nProject'.padEnd(45) + ' | Sessions | Chunks | Queries | Vector | +Chain | Augment | Avg Chain | % w/ Chain'); + console.log( + '\nProject'.padEnd(45) + + ' | Sessions | Chunks | Queries | Vector | +Chain | Augment | Avg Chain | % w/ Chain', + ); console.log('-'.repeat(120)); for (const r of results) { - const chainPct = r.queryCount > 0 ? (r.queriesProducingChains / r.queryCount * 100) : 0; + const chainPct = r.queryCount > 0 ? (r.queriesProducingChains / r.queryCount) * 100 : 0; console.log( - r.project.slice(0, 43).padEnd(45) + ' | ' + - String(r.sessionCount).padStart(8) + ' | ' + - String(r.chunkCount).padStart(6) + ' | ' + - String(r.queryCount).padStart(7) + ' | ' + - r.avgVectorResults.toFixed(1).padStart(6) + ' | ' + - ('+' + r.avgChainAdditions.toFixed(1)).padStart(6) + ' | ' + - (r.augmentationRatio.toFixed(2) + 'x').padStart(7) + ' | ' + - r.avgChainLength.toFixed(1).padStart(9) + ' | ' + - (chainPct.toFixed(0) + '%').padStart(9) + r.project.slice(0, 43).padEnd(45) + + ' | ' + + String(r.sessionCount).padStart(8) + + ' | ' + + String(r.chunkCount).padStart(6) + + ' | ' + + String(r.queryCount).padStart(7) + + ' | ' + + r.avgVectorResults.toFixed(1).padStart(6) + + ' | ' + + ('+' + r.avgChainAdditions.toFixed(1)).padStart(6) + + ' | ' + + (r.augmentationRatio.toFixed(2) + 'x').padStart(7) + + ' | ' + + r.avgChainLength.toFixed(1).padStart(9) + + ' | ' + + (chainPct.toFixed(0) + '%').padStart(9), ); } @@ -266,11 +295,13 @@ async function main() { const totalQueries = results.reduce((s, r) => s + r.queryCount, 0); const totalQueriesWithChains = results.reduce((s, r) => s + r.queriesProducingChains, 0); - const weightedAugmentation = results.reduce((s, r) => s + r.augmentationRatio * r.queryCount, 0) / totalQueries; - const simpleAvgAugmentation = results.reduce((s, r) => s + r.augmentationRatio, 0) / results.length; + const weightedAugmentation = + results.reduce((s, r) => s + r.augmentationRatio * r.queryCount, 0) / totalQueries; + const simpleAvgAugmentation = + results.reduce((s, r) => s + r.augmentationRatio, 0) / results.length; - const minAug = Math.min(...results.map(r => r.augmentationRatio)); - const maxAug = Math.max(...results.map(r => r.augmentationRatio)); + const minAug = Math.min(...results.map((r) => r.augmentationRatio)); + const maxAug = Math.max(...results.map((r) => r.augmentationRatio)); const avgChainLength = results.reduce((s, r) => s + r.avgChainLength, 0) / results.length; @@ -285,7 +316,7 @@ async function main() { console.log(` Simple average: ${simpleAvgAugmentation.toFixed(2)}x`); console.log(` Range: ${minAug.toFixed(2)}x - ${maxAug.toFixed(2)}x`); - const augmentations = results.map(r => r.augmentationRatio).sort((a, b) => a - b); + const augmentations = results.map((r) => r.augmentationRatio).sort((a, b) => a - b); const median = augmentations[Math.floor(augmentations.length / 2)]; const q1 = augmentations[Math.floor(augmentations.length * 0.25)]; const q3 = augmentations[Math.floor(augmentations.length * 0.75)]; @@ -295,18 +326,26 @@ async function main() { console.log('\nChain-Specific Metrics:'); console.log(` Mean chain length: ${avgChainLength.toFixed(1)} chunks`); - console.log(` Queries producing chains: ${totalQueriesWithChains}/${totalQueries} (${(totalQueriesWithChains / totalQueries * 100).toFixed(0)}%)`); + console.log( + ` Queries producing chains: ${totalQueriesWithChains}/${totalQueries} (${((totalQueriesWithChains / totalQueries) * 100).toFixed(0)}%)`, + ); console.log('\nComparison to v0.2 (sum-product traversal, m×n edges):'); console.log(` v0.2 weighted average: 4.65x (492 queries, 25 projects)`); - console.log(` v0.3 weighted average: ${weightedAugmentation.toFixed(2)}x (${totalQueries} queries, ${results.length} projects)`); + console.log( + ` v0.3 weighted average: ${weightedAugmentation.toFixed(2)}x (${totalQueries} queries, ${results.length} projects)`, + ); console.log('\n' + '='.repeat(120)); console.log('CONCLUSION'); console.log('='.repeat(120)); console.log(`\nAcross ${results.length} independent projects and ${totalQueries} queries:`); - console.log(`Chain walking provides ${weightedAugmentation.toFixed(2)}× augmentation vs vector search alone.`); - console.log(`${(totalQueriesWithChains / totalQueries * 100).toFixed(0)}% of queries produce episodic chains (avg ${avgChainLength.toFixed(1)} chunks).`); + console.log( + `Chain walking provides ${weightedAugmentation.toFixed(2)}× augmentation vs vector search alone.`, + ); + console.log( + `${((totalQueriesWithChains / totalQueries) * 100).toFixed(0)}% of queries produce episodic chains (avg ${avgChainLength.toFixed(1)} chunks).`, + ); await embedder.dispose(); closeDb(); diff --git a/scripts/experiments/run-cluster-threshold.ts b/scripts/experiments/run-cluster-threshold.ts index ee093ed..44e030e 100644 --- a/scripts/experiments/run-cluster-threshold.ts +++ b/scripts/experiments/run-cluster-threshold.ts @@ -59,19 +59,30 @@ Example: console.log(`Clusters: ${result.numClusters}`); console.log(`Noise ratio: ${(result.noiseRatio * 100).toFixed(1)}%`); console.log(`Silhouette: ${result.silhouetteScore.toFixed(3)}`); - console.log(`\nWithin-cluster distance: ${result.withinClusterDistances.mean.toFixed(3)} ± ${result.withinClusterDistances.std.toFixed(3)}`); - console.log(`Cross-cluster distance: ${result.crossClusterDistances.mean.toFixed(3)} ± ${result.crossClusterDistances.std.toFixed(3)}`); + console.log( + `\nWithin-cluster distance: ${result.withinClusterDistances.mean.toFixed(3)} ± ${result.withinClusterDistances.std.toFixed(3)}`, + ); + console.log( + `Cross-cluster distance: ${result.crossClusterDistances.mean.toFixed(3)} ± ${result.crossClusterDistances.std.toFixed(3)}`, + ); console.log(`\n*** Recommended threshold: ${result.recommendedThreshold} ***`); // Find best by precision if different - const bestPrecision = result.thresholds.reduce((a, b) => a.precision > b.precision ? a : b); - const bestRecall = result.thresholds.reduce((a, b) => a.recall > b.recall ? a : b); + const bestPrecision = result.thresholds.reduce((a, b) => (a.precision > b.precision ? a : b)); + const bestRecall = result.thresholds.reduce((a, b) => (a.recall > b.recall ? a : b)); if (bestPrecision.threshold !== result.recommendedThreshold) { - console.log(` (Best precision: ${bestPrecision.threshold} with ${(bestPrecision.precision * 100).toFixed(1)}%)`); + console.log( + ` (Best precision: ${bestPrecision.threshold} with ${(bestPrecision.precision * 100).toFixed(1)}%)`, + ); } - if (bestRecall.threshold !== result.recommendedThreshold && bestRecall.threshold !== bestPrecision.threshold) { - console.log(` (Best recall: ${bestRecall.threshold} with ${(bestRecall.recall * 100).toFixed(1)}%)`); + if ( + bestRecall.threshold !== result.recommendedThreshold && + bestRecall.threshold !== bestPrecision.threshold + ) { + console.log( + ` (Best recall: ${bestRecall.threshold} with ${(bestRecall.recall * 100).toFixed(1)}%)`, + ); } } diff --git a/scripts/experiments/run-edge-decay-experiments.ts b/scripts/experiments/run-edge-decay-experiments.ts index da3a255..1ece7f8 100644 --- a/scripts/experiments/run-edge-decay-experiments.ts +++ b/scripts/experiments/run-edge-decay-experiments.ts @@ -112,7 +112,7 @@ async function main(): Promise { console.log(`Projects directory: ${projectsDir}`); console.log(`Max sessions: ${maxSessions}`); console.log(`Output directory: ${outputDir}`); - console.log(`Decay models: ${PRESET_MODELS.map(m => m.id).join(', ')}\n`); + console.log(`Decay models: ${PRESET_MODELS.map((m) => m.id).join(', ')}\n`); await mkdir(outputDir, { recursive: true }); diff --git a/scripts/experiments/run-edge-decay-sim.ts b/scripts/experiments/run-edge-decay-sim.ts index a29c1fd..805db42 100644 --- a/scripts/experiments/run-edge-decay-sim.ts +++ b/scripts/experiments/run-edge-decay-sim.ts @@ -163,7 +163,7 @@ function printAsciiChart(comparison: ReturnType): void { // Print grid with Y axis for (let y = 0; y < height; y++) { - const weightLabel = ((height - 1 - y) / (height - 1) * maxWeight).toFixed(1); + const weightLabel = (((height - 1 - y) / (height - 1)) * maxWeight).toFixed(1); console.log(`${weightLabel.padStart(5)} |${grid[y].join('')}|`); } diff --git a/scripts/experiments/run-experiments.ts b/scripts/experiments/run-experiments.ts index e3dd058..6144a2a 100644 --- a/scripts/experiments/run-experiments.ts +++ b/scripts/experiments/run-experiments.ts @@ -15,7 +15,10 @@ import { readFile, writeFile, mkdir } from 'node:fs/promises'; import { join } from 'node:path'; import type { Corpus } from '../src/eval/corpus-builder.js'; import type { AnnotationSet } from '../src/eval/annotation-schema.js'; -import { singleModelRun, type SingleModelResult } from '../src/eval/experiments/single-model-run.js'; +import { + singleModelRun, + type SingleModelResult, +} from '../src/eval/experiments/single-model-run.js'; import { runTruncationExperiment } from '../src/eval/experiments/truncation.js'; import { runHdbscanSweep } from '../src/eval/experiments/hdbscan-sweep.js'; import { runBoilerplateExperiment } from '../src/eval/experiments/boilerplate-filter.js'; @@ -103,9 +106,7 @@ async function main(): Promise { const pairsJson = await readFile(join(corpusDir, 'labeled-pairs.json'), 'utf-8'); const annotations: AnnotationSet = JSON.parse(pairsJson); - console.log( - `Corpus: ${corpus.chunks.length} chunks, ${annotations.pairs.length} pairs`, - ); + console.log(`Corpus: ${corpus.chunks.length} chunks, ${annotations.pairs.length} pairs`); console.log(`Experiments to run: ${experiments.join(', ')}\n`); // Experiments 1-3 share baseline embeddings — compute once @@ -117,8 +118,8 @@ async function main(): Promise { baselineResult = await singleModelRun(MODEL_ID, corpus.chunks, annotations.pairs); console.log( ` Baseline: ROC AUC=${baselineResult.rocAuc.toFixed(3)}, ` + - `Silhouette=${baselineResult.silhouetteScore.toFixed(3)}, ` + - `Clusters=${baselineResult.clusterCount}`, + `Silhouette=${baselineResult.silhouetteScore.toFixed(3)}, ` + + `Clusters=${baselineResult.clusterCount}`, ); } @@ -127,30 +128,18 @@ async function main(): Promise { // Experiment 1: Truncation if (experiments.includes(1)) { - const result = await runTruncationExperiment( - corpus.chunks, - annotations.pairs, - baselineResult, - ); + const result = await runTruncationExperiment(corpus.chunks, annotations.pairs, baselineResult); experimentResults.push(result); } // Experiment 2: HDBSCAN sweep (different return type) if (experiments.includes(2)) { - sweepResult = await runHdbscanSweep( - corpus.chunks, - annotations.pairs, - baselineResult, - ); + sweepResult = await runHdbscanSweep(corpus.chunks, annotations.pairs, baselineResult); } // Experiment 3: Boilerplate filter if (experiments.includes(3)) { - const result = await runBoilerplateExperiment( - corpus.chunks, - annotations.pairs, - baselineResult, - ); + const result = await runBoilerplateExperiment(corpus.chunks, annotations.pairs, baselineResult); experimentResults.push(result); } @@ -159,12 +148,8 @@ async function main(): Promise { const needsRebuild = experiments.some((e) => [4, 5].includes(e)); if (needsRebuild && corpus.config.sessionPaths.length === 0) { - console.warn( - '\nWARNING: Experiments 4 and 5 require session file paths in the corpus config.', - ); - console.warn( - 'The loaded corpus may not have accessible sessionPaths. Attempting anyway...\n', - ); + console.warn('\nWARNING: Experiments 4 and 5 require session file paths in the corpus config.'); + console.warn('The loaded corpus may not have accessible sessionPaths. Attempting anyway...\n'); } // For experiments 4 and 5, compute a fresh baseline from the original corpus @@ -180,21 +165,13 @@ async function main(): Promise { // Experiment 4: Thinking ablation if (experiments.includes(4)) { - const result = await runThinkingAblation( - corpus, - annotations.pairs, - rechunkBaseline, - ); + const result = await runThinkingAblation(corpus, annotations.pairs, rechunkBaseline); experimentResults.push(result); } // Experiment 5: Code-focused mode if (experiments.includes(5)) { - const result = await runCodeFocusedExperiment( - corpus, - annotations.pairs, - rechunkBaseline, - ); + const result = await runCodeFocusedExperiment(corpus, annotations.pairs, rechunkBaseline); experimentResults.push(result); } diff --git a/scripts/experiments/run-hold-period-sweep.ts b/scripts/experiments/run-hold-period-sweep.ts index a055c24..b4a2149 100644 --- a/scripts/experiments/run-hold-period-sweep.ts +++ b/scripts/experiments/run-hold-period-sweep.ts @@ -100,9 +100,7 @@ Example: console.log(`Found ${allSessionPaths.length} sessions`); // Sample and build session sources - const selectedPaths = allSessionPaths - .sort(() => Math.random() - 0.5) - .slice(0, maxSessions); + const selectedPaths = allSessionPaths.sort(() => Math.random() - 0.5).slice(0, maxSessions); const sessions: SessionSource[] = []; for (const path of selectedPaths) { @@ -145,7 +143,9 @@ Example: for (const r of sorted) { const holdMatch = r.modelName.match(/\((\d+)min\)/); const hold = holdMatch ? holdMatch[1] : '?'; - console.log(` ${hold}min hold: MRR=${r.mrr.toFixed(3)}, Rank@1=${r.rankDistribution.rank1} (${((r.rankDistribution.rank1 / r.queryCount) * 100).toFixed(0)}%)`); + console.log( + ` ${hold}min hold: MRR=${r.mrr.toFixed(3)}, Rank@1=${r.rankDistribution.rank1} (${((r.rankDistribution.rank1 / r.queryCount) * 100).toFixed(0)}%)`, + ); } const best = sorted[0]; @@ -153,7 +153,9 @@ Example: const bestHold = holdMatch ? holdMatch[1] : '?'; console.log(`\n*** Recommended hold period: ${bestHold} minutes ***`); - console.log(` (MRR=${best.mrr.toFixed(3)}, Rank@1=${((best.rankDistribution.rank1 / best.queryCount) * 100).toFixed(0)}%)`); + console.log( + ` (MRR=${best.mrr.toFixed(3)}, Rank@1=${((best.rankDistribution.rank1 / best.queryCount) * 100).toFixed(0)}%)`, + ); } main().catch((err) => { diff --git a/scripts/experiments/run-noncoding-validation.ts b/scripts/experiments/run-noncoding-validation.ts index 90d0837..3668396 100644 --- a/scripts/experiments/run-noncoding-validation.ts +++ b/scripts/experiments/run-noncoding-validation.ts @@ -207,28 +207,38 @@ Example: console.log('-'.repeat(70)); for (const ncResult of nonCodingResults.retrievalRanking) { - const codingResult = codingResults.retrievalRanking.find((r) => r.modelId === ncResult.modelId); + const codingResult = codingResults.retrievalRanking.find( + (r) => r.modelId === ncResult.modelId, + ); if (codingResult) { const diff = ncResult.mrr - codingResult.mrr; const sign = diff >= 0 ? '+' : ''; console.log( - `${ncResult.modelName.padEnd(25)} | ${ncResult.mrr.toFixed(3).padEnd(14)} | ${codingResult.mrr.toFixed(3).padEnd(10)} | ${sign}${(diff * 100).toFixed(1)}%` + `${ncResult.modelName.padEnd(25)} | ${ncResult.mrr.toFixed(3).padEnd(14)} | ${codingResult.mrr.toFixed(3).padEnd(10)} | ${sign}${(diff * 100).toFixed(1)}%`, ); } } // Conclusion - const avgNonCoding = nonCodingResults.retrievalRanking.reduce((sum, r) => sum + r.mrr, 0) / nonCodingResults.retrievalRanking.length; - const avgCoding = codingResults.retrievalRanking.reduce((sum, r) => sum + r.mrr, 0) / codingResults.retrievalRanking.length; + const avgNonCoding = + nonCodingResults.retrievalRanking.reduce((sum, r) => sum + r.mrr, 0) / + nonCodingResults.retrievalRanking.length; + const avgCoding = + codingResults.retrievalRanking.reduce((sum, r) => sum + r.mrr, 0) / + codingResults.retrievalRanking.length; const avgDiff = avgNonCoding - avgCoding; console.log('-'.repeat(70)); if (Math.abs(avgDiff) < 0.02) { console.log('\n✓ Models perform similarly on non-coding and coding sessions'); } else if (avgDiff > 0) { - console.log(`\n✓ Models perform ${(avgDiff * 100).toFixed(1)}% better on non-coding sessions`); + console.log( + `\n✓ Models perform ${(avgDiff * 100).toFixed(1)}% better on non-coding sessions`, + ); } else { - console.log(`\n⚠ Models perform ${(-avgDiff * 100).toFixed(1)}% worse on non-coding sessions`); + console.log( + `\n⚠ Models perform ${(-avgDiff * 100).toFixed(1)}% worse on non-coding sessions`, + ); console.log(' Consider parameter adjustments for non-coding use cases'); } } @@ -236,7 +246,7 @@ Example: function generateComparison( nonCoding: { retrievalRanking: Array<{ modelId: string; mrr: number }> }, - coding: { retrievalRanking: Array<{ modelId: string; mrr: number }> } + coding: { retrievalRanking: Array<{ modelId: string; mrr: number }> }, ): Record { const comparison: Record = {}; diff --git a/scripts/experiments/run-topic-continuity.ts b/scripts/experiments/run-topic-continuity.ts index d496d23..c19c205 100644 --- a/scripts/experiments/run-topic-continuity.ts +++ b/scripts/experiments/run-topic-continuity.ts @@ -85,10 +85,7 @@ async function main(): Promise { if (exportOnly) { // Just export the labeled dataset const exportPath = join(outputDir, 'topic-continuity-dataset.json'); - await exportTransitionsDataset( - { projectsDir, maxSessions, timeGapMinutes }, - exportPath, - ); + await exportTransitionsDataset({ projectsDir, maxSessions, timeGapMinutes }, exportPath); return; } diff --git a/scripts/experiments/sweep-depth.ts b/scripts/experiments/sweep-depth.ts index d8bb84f..84570cf 100644 --- a/scripts/experiments/sweep-depth.ts +++ b/scripts/experiments/sweep-depth.ts @@ -58,9 +58,9 @@ async function runSweep(embedder: Embedder): Promise { // Get vector search results const { embedding } = await embedder.embed(query, true); const vectorResults = await vectorStore.search(embedding, 10); - const vectorChunkIds = new Set(vectorResults.map(r => r.id)); + const vectorChunkIds = new Set(vectorResults.map((r) => r.id)); - const startIds = vectorResults.map(r => r.id); + const startIds = vectorResults.map((r) => r.id); // Traverse backward const backwardResult = traverseMultiple(startIds, { @@ -78,8 +78,8 @@ async function runSweep(embedder: Embedder): Promise { // Combine and dedupe const allChunks = [...backwardResult.chunks, ...forwardResult.chunks]; - const newChunks = allChunks.filter(c => !vectorChunkIds.has(c.chunkId)); - const weights = newChunks.map(c => c.weight).sort((a, b) => b - a); + const newChunks = allChunks.filter((c) => !vectorChunkIds.has(c.chunkId)); + const weights = newChunks.map((c) => c.weight).sort((a, b) => b - a); totalChunksAdded += newChunks.length; totalPathsExplored += backwardResult.visited + forwardResult.visited; @@ -90,7 +90,9 @@ async function runSweep(embedder: Embedder): Promise { totalMinWeight += weights[weights.length - 1]; } - console.log(`+${newChunks.length} chunks, ${backwardResult.visited + forwardResult.visited} paths`); + console.log( + `+${newChunks.length} chunks, ${backwardResult.visited + forwardResult.visited} paths`, + ); } const elapsed = Date.now() - startTime; @@ -115,7 +117,9 @@ function printResults(results: DepthResult[]) { console.log('MAX DEPTH SWEEP RESULTS (minWeight=' + MIN_WEIGHT + ')'); console.log('='.repeat(110)); - console.log('\nmaxDepth | Chunks Added | Paths Explored | Augmentation | Max Weight | Median Weight | Time (ms)'); + console.log( + '\nmaxDepth | Chunks Added | Paths Explored | Augmentation | Max Weight | Median Weight | Time (ms)', + ); console.log('-'.repeat(110)); const baselineChunks = 10; // vector search returns 10 seeds @@ -123,13 +127,19 @@ function printResults(results: DepthResult[]) { for (const r of results) { const augmentation = ((r.avgChunksAdded + baselineChunks) / baselineChunks).toFixed(2) + 'x'; console.log( - String(r.depth).padStart(8) + ' | ' + - r.avgChunksAdded.toFixed(1).padStart(12) + ' | ' + - r.avgPathsExplored.toFixed(0).padStart(14) + ' | ' + - augmentation.padStart(12) + ' | ' + - r.avgMaxWeight.toFixed(4).padStart(10) + ' | ' + - r.avgMedianWeight.toFixed(4).padStart(13) + ' | ' + - String(r.totalTimeMs).padStart(9) + String(r.depth).padStart(8) + + ' | ' + + r.avgChunksAdded.toFixed(1).padStart(12) + + ' | ' + + r.avgPathsExplored.toFixed(0).padStart(14) + + ' | ' + + augmentation.padStart(12) + + ' | ' + + r.avgMaxWeight.toFixed(4).padStart(10) + + ' | ' + + r.avgMedianWeight.toFixed(4).padStart(13) + + ' | ' + + String(r.totalTimeMs).padStart(9), ); } @@ -149,9 +159,9 @@ function printResults(results: DepthResult[]) { console.log( ` depth ${prev.depth} → ${curr.depth}: ` + - `+${chunkGain.toFixed(1)} chunks (+${(chunkGain / prev.avgChunksAdded * 100).toFixed(1)}%), ` + - `+${pathGain.toFixed(0)} paths, ` + - `${chunksPerDepth.toFixed(2)} chunks/depth` + `+${chunkGain.toFixed(1)} chunks (+${((chunkGain / prev.avgChunksAdded) * 100).toFixed(1)}%), ` + + `+${pathGain.toFixed(0)} paths, ` + + `${chunksPerDepth.toFixed(2)} chunks/depth`, ); } @@ -179,17 +189,21 @@ function printResults(results: DepthResult[]) { for (let i = 1; i < results.length; i++) { const prev = results[i - 1]; const curr = results[i]; - const percentGain = (curr.avgChunksAdded - prev.avgChunksAdded) / prev.avgChunksAdded * 100; + const percentGain = ((curr.avgChunksAdded - prev.avgChunksAdded) / prev.avgChunksAdded) * 100; const gainPerDepth = percentGain / (curr.depth - prev.depth); - if (gainPerDepth < 1.0) { // Less than 1% gain per depth unit + if (gainPerDepth < 1.0) { + // Less than 1% gain per depth unit recommendedDepth = prev.depth; - console.log(`\nDiminishing returns start at depth=${prev.depth} (${gainPerDepth.toFixed(2)}%/depth after)`); + console.log( + `\nDiminishing returns start at depth=${prev.depth} (${gainPerDepth.toFixed(2)}%/depth after)`, + ); break; } } - const finalResult = results.find(r => r.depth === recommendedDepth) || results[results.length - 1]; + const finalResult = + results.find((r) => r.depth === recommendedDepth) || results[results.length - 1]; console.log(`\nRecommended: maxDepth=${recommendedDepth}`); console.log(` - Chunks added: ${finalResult.avgChunksAdded.toFixed(1)}`); console.log(` - Augmentation: ${((finalResult.avgChunksAdded + 10) / 10).toFixed(2)}x`); diff --git a/scripts/experiments/sweep-min-weight.ts b/scripts/experiments/sweep-min-weight.ts index 82f0288..c53ee2a 100644 --- a/scripts/experiments/sweep-min-weight.ts +++ b/scripts/experiments/sweep-min-weight.ts @@ -58,9 +58,9 @@ async function runSweep(embedder: Embedder): Promise { // Get vector search results const { embedding } = await embedder.embed(query, true); const vectorResults = await vectorStore.search(embedding, 10); - const vectorChunkIds = new Set(vectorResults.map(r => r.id)); + const vectorChunkIds = new Set(vectorResults.map((r) => r.id)); - const startIds = vectorResults.map(r => r.id); + const startIds = vectorResults.map((r) => r.id); // Traverse backward const backwardResult = traverseMultiple(startIds, { @@ -78,8 +78,8 @@ async function runSweep(embedder: Embedder): Promise { // Combine and dedupe const allChunks = [...backwardResult.chunks, ...forwardResult.chunks]; - const newChunks = allChunks.filter(c => !vectorChunkIds.has(c.chunkId)); - const weights = newChunks.map(c => c.weight).sort((a, b) => b - a); + const newChunks = allChunks.filter((c) => !vectorChunkIds.has(c.chunkId)); + const weights = newChunks.map((c) => c.weight).sort((a, b) => b - a); totalChunksAdded += newChunks.length; totalPathsExplored += backwardResult.visited + forwardResult.visited; @@ -90,7 +90,9 @@ async function runSweep(embedder: Embedder): Promise { totalMinWeight += weights[weights.length - 1]; } - console.log(`+${newChunks.length} chunks, ${backwardResult.visited + forwardResult.visited} paths`); + console.log( + `+${newChunks.length} chunks, ${backwardResult.visited + forwardResult.visited} paths`, + ); } const elapsed = Date.now() - startTime; @@ -115,18 +117,26 @@ function printResults(results: SweepResult[]) { console.log('MIN WEIGHT SWEEP RESULTS'); console.log('='.repeat(100)); - console.log('\nminWeight | Chunks Added | Paths Explored | Max Weight | Median Weight | Min Weight | Time (ms)'); + console.log( + '\nminWeight | Chunks Added | Paths Explored | Max Weight | Median Weight | Min Weight | Time (ms)', + ); console.log('-'.repeat(100)); for (const r of results) { console.log( - String(r.minWeight).padEnd(11) + ' | ' + - r.avgChunksAdded.toFixed(1).padStart(12) + ' | ' + - r.avgPathsExplored.toFixed(0).padStart(14) + ' | ' + - r.avgMaxWeight.toFixed(4).padStart(10) + ' | ' + - r.avgMedianWeight.toFixed(4).padStart(13) + ' | ' + - r.avgMinWeight.toFixed(4).padStart(10) + ' | ' + - String(r.totalTimeMs).padStart(9) + String(r.minWeight).padEnd(11) + + ' | ' + + r.avgChunksAdded.toFixed(1).padStart(12) + + ' | ' + + r.avgPathsExplored.toFixed(0).padStart(14) + + ' | ' + + r.avgMaxWeight.toFixed(4).padStart(10) + + ' | ' + + r.avgMedianWeight.toFixed(4).padStart(13) + + ' | ' + + r.avgMinWeight.toFixed(4).padStart(10) + + ' | ' + + String(r.totalTimeMs).padStart(9), ); } @@ -139,15 +149,16 @@ function printResults(results: SweepResult[]) { for (let i = 1; i < results.length; i++) { const prev = results[i - 1]; const curr = results[i]; - const chunkIncrease = ((curr.avgChunksAdded - prev.avgChunksAdded) / prev.avgChunksAdded * 100); - const pathIncrease = ((curr.avgPathsExplored - prev.avgPathsExplored) / prev.avgPathsExplored * 100); + const chunkIncrease = ((curr.avgChunksAdded - prev.avgChunksAdded) / prev.avgChunksAdded) * 100; + const pathIncrease = + ((curr.avgPathsExplored - prev.avgPathsExplored) / prev.avgPathsExplored) * 100; const efficiency = chunkIncrease / pathIncrease; console.log( ` ${prev.minWeight} → ${curr.minWeight}: ` + - `+${chunkIncrease.toFixed(1)}% chunks, ` + - `+${pathIncrease.toFixed(1)}% paths, ` + - `efficiency: ${efficiency.toFixed(2)}` + `+${chunkIncrease.toFixed(1)}% chunks, ` + + `+${pathIncrease.toFixed(1)}% paths, ` + + `efficiency: ${efficiency.toFixed(2)}`, ); } @@ -158,8 +169,8 @@ function printResults(results: SweepResult[]) { for (let i = 0; i < results.length - 1; i++) { const curr = results[i]; const next = results[i + 1]; - const efficiency = (next.avgChunksAdded / next.avgPathsExplored) / - (curr.avgChunksAdded / curr.avgPathsExplored); + const efficiency = + next.avgChunksAdded / next.avgPathsExplored / (curr.avgChunksAdded / curr.avgPathsExplored); if (efficiency > bestEfficiency && next.avgChunksAdded > curr.avgChunksAdded) { bestEfficiency = efficiency; bestIdx = i + 1; @@ -167,7 +178,9 @@ function printResults(results: SweepResult[]) { } console.log(` Best efficiency at minWeight = ${results[bestIdx].minWeight}`); - console.log(` (${results[bestIdx].avgChunksAdded.toFixed(1)} chunks / ${results[bestIdx].avgPathsExplored.toFixed(0)} paths)`); + console.log( + ` (${results[bestIdx].avgChunksAdded.toFixed(1)} chunks / ${results[bestIdx].avgPathsExplored.toFixed(0)} paths)`, + ); } async function main() { diff --git a/scripts/ops/recluster-fast.ts b/scripts/ops/recluster-fast.ts index b6bd652..4a782e3 100644 --- a/scripts/ops/recluster-fast.ts +++ b/scripts/ops/recluster-fast.ts @@ -32,16 +32,20 @@ interface PythonHDBSCANResult { async function runPythonHDBSCAN( ids: string[], embeddings: number[][], - minClusterSize: number + minClusterSize: number, ): Promise { const scriptPath = join(__dirname, 'hdbscan-python.py'); const args = [ scriptPath, - '--min-cluster-size', String(minClusterSize), - '--min-samples', String(minClusterSize), - '--core-dist-n-jobs', '-1', // Use all cores - '--metric', 'euclidean', + '--min-cluster-size', + String(minClusterSize), + '--min-samples', + String(minClusterSize), + '--core-dist-n-jobs', + '-1', // Use all cores + '--metric', + 'euclidean', ]; return new Promise((resolve, reject) => { @@ -166,8 +170,8 @@ Requires: pip install hdbscan numpy // Run Python HDBSCAN console.log(`\nRunning HDBSCAN (Python, parallel)...`); - const ids = vectors.map(v => v.id); - const embeddings = vectors.map(v => v.embedding); + const ids = vectors.map((v) => v.id); + const embeddings = vectors.map((v) => v.embedding); const hdbscanResult = await runPythonHDBSCAN(ids, embeddings, minClusterSize); @@ -191,18 +195,18 @@ Requires: pip install hdbscan numpy for (const [label, members] of clusterMembers) { // Compute centroid - const centroid = computeCentroid(members.map(m => m.embedding)); + const centroid = computeCentroid(members.map((m) => m.embedding)); // Select exemplars (closest to centroid) - const withDistances = members.map(m => ({ + const withDistances = members.map((m) => ({ ...m, distance: angularDistance(m.embedding, centroid), })); withDistances.sort((a, b) => a.distance - b.distance); - const exemplarIds = withDistances.slice(0, 3).map(m => m.id); + const exemplarIds = withDistances.slice(0, 3).map((m) => m.id); // Compute membership hash - const memberIds = members.map(m => m.id); + const memberIds = members.map((m) => m.id); const membershipHash = computeMembershipHash(memberIds); // Create cluster @@ -238,7 +242,9 @@ Requires: pip install hdbscan numpy console.log(`Clusters found: ${clusterMembers.size}`); console.log(`Chunks assigned: ${assignments.length}`); console.log(`Noise chunks: ${hdbscanResult.n_noise}`); - console.log(`Noise ratio: ${(hdbscanResult.n_noise / vectors.length * 100).toFixed(1)}%`); + console.log( + `Noise ratio: ${((hdbscanResult.n_noise / vectors.length) * 100).toFixed(1)}%`, + ); console.log(`Duration: ${(durationMs / 1000).toFixed(1)}s`); if (clusterSizes.length > 0) { diff --git a/scripts/ops/refresh-clusters.ts b/scripts/ops/refresh-clusters.ts index 728b4ed..02d02f2 100644 --- a/scripts/ops/refresh-clusters.ts +++ b/scripts/ops/refresh-clusters.ts @@ -50,7 +50,9 @@ Example: const apiKey = await getApiKey('ANTHROPIC_API_KEY'); if (!apiKey) { console.error('Error: ANTHROPIC_API_KEY not found'); - console.error('Set via environment variable or run: npm run refresh-clusters -- --set-key '); + console.error( + 'Set via environment variable or run: npm run refresh-clusters -- --set-key ', + ); process.exit(1); } diff --git a/src/dashboard/client/src/components/clusters/ClusterBubbles.tsx b/src/dashboard/client/src/components/clusters/ClusterBubbles.tsx index 42da5bc..cc97c00 100644 --- a/src/dashboard/client/src/components/clusters/ClusterBubbles.tsx +++ b/src/dashboard/client/src/components/clusters/ClusterBubbles.tsx @@ -12,9 +12,21 @@ interface ClusterBubblesProps { } const COLORS = [ - '#10b981', '#06b6d4', '#8b5cf6', '#f59e0b', '#ef4444', - '#ec4899', '#14b8a6', '#f97316', '#6366f1', '#84cc16', - '#0ea5e9', '#d946ef', '#22c55e', '#eab308', '#a855f7', + '#10b981', + '#06b6d4', + '#8b5cf6', + '#f59e0b', + '#ef4444', + '#ec4899', + '#14b8a6', + '#f97316', + '#6366f1', + '#84cc16', + '#0ea5e9', + '#d946ef', + '#22c55e', + '#eab308', + '#a855f7', ]; export function ClusterBubbles({ clusters }: ClusterBubblesProps) { @@ -34,10 +46,7 @@ export function ClusterBubbles({ clusters }: ClusterBubblesProps) { svg.attr('width', width).attr('height', height); - const pack = d3 - .pack() - .size([width, height]) - .padding(6); + const pack = d3.pack().size([width, height]).padding(6); const root = d3 .hierarchy<{ children: ClusterData[] }>({ children: clusters }) @@ -71,7 +80,9 @@ export function ClusterBubbles({ clusters }: ClusterBubblesProps) { .attr('fill', fg) .attr('font-size', (d) => Math.min(14, d.r / 3)) .attr('font-weight', 600) - .text((d) => (d.data as ClusterData).name?.slice(0, 20) ?? (d.data as ClusterData).id.slice(0, 8)); + .text( + (d) => (d.data as ClusterData).name?.slice(0, 20) ?? (d.data as ClusterData).id.slice(0, 8), + ); node .filter((d) => d.r > 30) diff --git a/src/dashboard/client/src/components/clusters/ClusterCard.tsx b/src/dashboard/client/src/components/clusters/ClusterCard.tsx index 8930ee6..7ce91cf 100644 --- a/src/dashboard/client/src/components/clusters/ClusterCard.tsx +++ b/src/dashboard/client/src/components/clusters/ClusterCard.tsx @@ -22,9 +22,7 @@ export function ClusterCard({ cluster }: ClusterCardProps) { setExpanded(!expanded)}>
- - {cluster.name ?? cluster.id.slice(0, 12)} - + {cluster.name ?? cluster.id.slice(0, 12)}
{cluster.memberCount} chunks {expanded ? : } @@ -49,7 +47,10 @@ export function ClusterCard({ cluster }: ClusterCardProps) {

Exemplar Chunks

{cluster.exemplarPreviews.map((ex) => ( -
+
{ex.id.slice(0, 8)}

{ex.preview}

diff --git a/src/dashboard/client/src/components/graph/GraphControls.tsx b/src/dashboard/client/src/components/graph/GraphControls.tsx index 5e278e4..49b8072 100644 --- a/src/dashboard/client/src/components/graph/GraphControls.tsx +++ b/src/dashboard/client/src/components/graph/GraphControls.tsx @@ -12,7 +12,12 @@ interface GraphControlsProps { onLimitChange: (limit: number) => void; } -export function GraphControls({ project, onProjectChange, limit, onLimitChange }: GraphControlsProps) { +export function GraphControls({ + project, + onProjectChange, + limit, + onLimitChange, +}: GraphControlsProps) { const { data: projectsData } = useApi('/api/projects'); const projectOptions = (projectsData?.projects ?? []).map((p) => ({ diff --git a/src/dashboard/client/src/components/graph/NodeInspector.tsx b/src/dashboard/client/src/components/graph/NodeInspector.tsx index ea4fcfb..b083a8b 100644 --- a/src/dashboard/client/src/components/graph/NodeInspector.tsx +++ b/src/dashboard/client/src/components/graph/NodeInspector.tsx @@ -86,7 +86,10 @@ export function NodeInspector({ node, onClose, onExploreNeighborhood }: NodeInsp
{edgeData.edges.map((edge) => (
- + {edge.type} {edge.referenceType && ( diff --git a/src/dashboard/client/src/components/layout/Shell.tsx b/src/dashboard/client/src/components/layout/Shell.tsx index eb07095..4a466e3 100644 --- a/src/dashboard/client/src/components/layout/Shell.tsx +++ b/src/dashboard/client/src/components/layout/Shell.tsx @@ -1,15 +1,6 @@ import { useState } from 'react'; import { NavLink, Outlet } from 'react-router-dom'; -import { - LayoutDashboard, - Clock, - Boxes, - Search, - FolderOpen, - Menu, - X, - Brain, -} from 'lucide-react'; +import { LayoutDashboard, Clock, Boxes, Search, FolderOpen, Menu, X, Brain } from 'lucide-react'; import { ThemeToggle } from './ThemeToggle'; import { cn } from '../../lib/utils'; @@ -72,9 +63,7 @@ export function Shell() { {/* Footer */} -
- Causantic -
+
Causantic
{/* Main content */} diff --git a/src/dashboard/client/src/components/search/PipelineComparison.tsx b/src/dashboard/client/src/components/search/PipelineComparison.tsx index 014023e..2a7fa63 100644 --- a/src/dashboard/client/src/components/search/PipelineComparison.tsx +++ b/src/dashboard/client/src/components/search/PipelineComparison.tsx @@ -16,7 +16,12 @@ interface PipelineComparisonProps { }; } -function ResultColumn({ title, results, color, allIds }: { +function ResultColumn({ + title, + results, + color, + allIds, +}: { title: string; results: SearchResult[]; color: string; @@ -40,10 +45,7 @@ function ResultColumn({ title, results, color, allIds }: { {results.map((result, i) => { const sourceCount = getSourceCount(result.id); return ( -
+
#{i + 1} diff --git a/src/dashboard/client/src/components/stats/TimeSeries.tsx b/src/dashboard/client/src/components/stats/TimeSeries.tsx index 82dcca3..c1df671 100644 --- a/src/dashboard/client/src/components/stats/TimeSeries.tsx +++ b/src/dashboard/client/src/components/stats/TimeSeries.tsx @@ -56,7 +56,12 @@ export function TimeSeries({ data }: TimeSeriesProps) { // X axis g.append('g') .attr('transform', `translate(0,${innerHeight})`) - .call(d3.axisBottom(x).ticks(6).tickFormat(d3.timeFormat('%b %y') as (d: Date | d3.NumberValue) => string)) + .call( + d3 + .axisBottom(x) + .ticks(6) + .tickFormat(d3.timeFormat('%b %y') as (d: Date | d3.NumberValue) => string), + ) .attr('color', mutedFg) .selectAll('line') .attr('stroke', borderColor); diff --git a/src/dashboard/client/src/components/timeline/ChainView.tsx b/src/dashboard/client/src/components/timeline/ChainView.tsx index 934c0a1..63b7e46 100644 --- a/src/dashboard/client/src/components/timeline/ChainView.tsx +++ b/src/dashboard/client/src/components/timeline/ChainView.tsx @@ -69,12 +69,7 @@ export function ChainView({ chunkId, direction, onDirectionChange }: ChainViewPr {data && (
{/* Seed chunk */} - + {/* Chain chunks */} {data.chain.map((chunk, i) => ( diff --git a/src/dashboard/client/src/components/timeline/ChunkInspector.tsx b/src/dashboard/client/src/components/timeline/ChunkInspector.tsx index 37189f4..8813fad 100644 --- a/src/dashboard/client/src/components/timeline/ChunkInspector.tsx +++ b/src/dashboard/client/src/components/timeline/ChunkInspector.tsx @@ -31,12 +31,8 @@ interface ChunkInspectorProps { } export function ChunkInspector({ chunkId, onClose, onWalkChain }: ChunkInspectorProps) { - const { data: chunkData } = useApi<{ chunks: ChunkDetail[] }>( - `/api/chunks?chunkId=${chunkId}`, - ); - const { data: edgeData } = useApi( - `/api/edges?chunkId=${chunkId}&limit=20`, - ); + const { data: chunkData } = useApi<{ chunks: ChunkDetail[] }>(`/api/chunks?chunkId=${chunkId}`); + const { data: edgeData } = useApi(`/api/edges?chunkId=${chunkId}&limit=20`); const chunk = chunkData?.chunks?.[0]; diff --git a/src/dashboard/client/src/components/timeline/ClusterLegend.tsx b/src/dashboard/client/src/components/timeline/ClusterLegend.tsx index 664f307..390d3dc 100644 --- a/src/dashboard/client/src/components/timeline/ClusterLegend.tsx +++ b/src/dashboard/client/src/components/timeline/ClusterLegend.tsx @@ -15,9 +15,17 @@ interface TooltipState { anchorRect: DOMRect | null; } -export function ClusterLegend({ clusters, activeClusterIds, unclusteredCount }: ClusterLegendProps) { +export function ClusterLegend({ + clusters, + activeClusterIds, + unclusteredCount, +}: ClusterLegendProps) { const containerRef = useRef(null); - const [tooltip, setTooltip] = useState({ visible: false, cluster: null, anchorRect: null }); + const [tooltip, setTooltip] = useState({ + visible: false, + cluster: null, + anchorRect: null, + }); const clusterMap = new Map(clusters.map((c) => [c.id, c])); @@ -35,7 +43,15 @@ export function ClusterLegend({ clusters, activeClusterIds, unclusteredCount }: info: c.info ?? null, })), ...(unclusteredCount > 0 - ? [{ key: '_unclustered', color: '#64748b', name: 'Unclustered', count: unclusteredCount, info: null }] + ? [ + { + key: '_unclustered', + color: '#64748b', + name: 'Unclustered', + count: unclusteredCount, + info: null, + }, + ] : []), ]; @@ -50,18 +66,18 @@ export function ClusterLegend({ clusters, activeClusterIds, unclusteredCount }: // Compute tooltip position relative to container const containerRect = containerRef.current?.getBoundingClientRect(); - const tipLeft = tooltip.anchorRect && containerRect - ? Math.min( - tooltip.anchorRect.left - containerRect.left, - containerRect.width - 296, - ) - : 0; - const tipTop = tooltip.anchorRect && containerRect - ? tooltip.anchorRect.bottom - containerRect.top + 6 - : 0; + const tipLeft = + tooltip.anchorRect && containerRect + ? Math.min(tooltip.anchorRect.left - containerRect.left, containerRect.width - 296) + : 0; + const tipTop = + tooltip.anchorRect && containerRect ? tooltip.anchorRect.bottom - containerRect.top + 6 : 0; return ( -
+
Topic Clusters
@@ -82,7 +98,9 @@ export function ClusterLegend({ clusters, activeClusterIds, unclusteredCount }: style={{ backgroundColor: item.color }} /> {item.name} - {item.count} + + {item.count} +
))}
@@ -96,12 +114,19 @@ export function ClusterLegend({ clusters, activeClusterIds, unclusteredCount }: top: tipTop, }} > -
{tooltip.cluster.name ?? 'Unnamed'}
+
+ {tooltip.cluster.name ?? 'Unnamed'} +
{tooltip.cluster.description && ( -

{tooltip.cluster.description}

+

+ {tooltip.cluster.description} +

)}
-
Members: {tooltip.cluster.memberCount}
+
+ Members:{' '} + {tooltip.cluster.memberCount} +
)} diff --git a/src/dashboard/client/src/components/timeline/TimelineView.tsx b/src/dashboard/client/src/components/timeline/TimelineView.tsx index a7676db..030d1c1 100644 --- a/src/dashboard/client/src/components/timeline/TimelineView.tsx +++ b/src/dashboard/client/src/components/timeline/TimelineView.tsx @@ -31,7 +31,14 @@ interface TimelineViewProps { clusters?: ClusterInfo[]; } -export function TimelineView({ chunks, edges: _edges, timeRange, onChunkClick, selectedChunkId, clusters }: TimelineViewProps) { +export function TimelineView({ + chunks, + edges: _edges, + timeRange, + onChunkClick, + selectedChunkId, + clusters, +}: TimelineViewProps) { const svgRef = useRef(null); // Build cluster lookup: id → sorted index + metadata @@ -69,34 +76,35 @@ export function TimelineView({ chunks, edges: _edges, timeRange, onChunkClick, s const sessionSlugs = [...new Set(chunks.map((c) => c.sessionSlug))]; // Scales - const xScale = d3.scaleTime() + const xScale = d3 + .scaleTime() .domain([new Date(timeRange.earliest!), new Date(timeRange.latest!)]) .range([0, innerWidth]); - const yScale = d3.scaleBand() - .domain(sessionSlugs) - .range([0, innerHeight]) - .padding(0.3); + const yScale = d3.scaleBand().domain(sessionSlugs).range([0, innerHeight]).padding(0.3); // Container group with margins svg.attr('width', width).attr('height', height); // Clip path to constrain chunks/edges within chart area - svg.append('defs') + svg + .append('defs') .append('clipPath') .attr('id', 'timeline-clip') .append('rect') .attr('width', innerWidth) .attr('height', innerHeight); - const g = svg - .append('g') - .attr('transform', `translate(${margin.left},${margin.top})`); + const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`); // Zoom on X axis - const zoom = d3.zoom() + const zoom = d3 + .zoom() .scaleExtent([0.5, 20]) - .translateExtent([[-100, 0], [innerWidth + 100, height]]) + .translateExtent([ + [-100, 0], + [innerWidth + 100, height], + ]) .on('zoom', (event) => { const newXScale = event.transform.rescaleX(xScale); xAxisG.call(d3.axisBottom(newXScale).ticks(8)); @@ -113,7 +121,8 @@ export function TimelineView({ chunks, edges: _edges, timeRange, onChunkClick, s svg.call(zoom); // Draw axes - const xAxisG = g.append('g') + const xAxisG = g + .append('g') .attr('transform', `translate(0,${innerHeight})`) .call(d3.axisBottom(xScale).ticks(8)) .attr('class', 'timeline-axis'); @@ -125,16 +134,18 @@ export function TimelineView({ chunks, edges: _edges, timeRange, onChunkClick, s .style('font-size', '11px'); // Style axis lines - g.selectAll('.timeline-axis line, .timeline-axis path') - .attr('stroke', 'var(--border-color, #334155)'); - g.selectAll('.timeline-axis text') - .attr('fill', 'var(--muted-foreground, #94a3b8)'); + g.selectAll('.timeline-axis line, .timeline-axis path').attr( + 'stroke', + 'var(--border-color, #334155)', + ); + g.selectAll('.timeline-axis text').attr('fill', 'var(--muted-foreground, #94a3b8)'); // Clipped group for chart content const chartArea = g.append('g').attr('clip-path', 'url(#timeline-clip)'); // Draw chunks - const chunkRects = chartArea.append('g') + const chunkRects = chartArea + .append('g') .selectAll('rect') .data(chunks) .join('rect') @@ -147,8 +158,8 @@ export function TimelineView({ chunks, edges: _edges, timeRange, onChunkClick, s .attr('height', yScale.bandwidth()) .attr('rx', 2) .attr('fill', (d) => getColor(d.clusterId)) - .attr('fill-opacity', (d) => d.id === selectedChunkId ? 1.0 : 0.7) - .attr('stroke', (d) => d.id === selectedChunkId ? '#ffffff' : 'none') + .attr('fill-opacity', (d) => (d.id === selectedChunkId ? 1.0 : 0.7)) + .attr('stroke', (d) => (d.id === selectedChunkId ? '#ffffff' : 'none')) .attr('stroke-width', 2) .attr('cursor', 'pointer') .on('click', (_event, d) => onChunkClick(d.id)) @@ -160,14 +171,21 @@ export function TimelineView({ chunks, edges: _edges, timeRange, onChunkClick, s }); // Chunk tooltips — include cluster name - chunkRects.append('title') - .text((d) => { - const info = d.clusterId ? clusterMap.get(d.clusterId) : null; - const clusterLabel = info?.name ?? (d.clusterId ? 'Unknown cluster' : 'Unclustered'); - return `${d.sessionSlug}\n${clusterLabel}\n${d.preview.slice(0, 100)}...\n${new Date(d.startTime).toLocaleString()}`; - }); - - }, [chunks, timeRange, onChunkClick, selectedChunkId, clusters, sortedClusters, colorIndex, clusterMap]); + chunkRects.append('title').text((d) => { + const info = d.clusterId ? clusterMap.get(d.clusterId) : null; + const clusterLabel = info?.name ?? (d.clusterId ? 'Unknown cluster' : 'Unclustered'); + return `${d.sessionSlug}\n${clusterLabel}\n${d.preview.slice(0, 100)}...\n${new Date(d.startTime).toLocaleString()}`; + }); + }, [ + chunks, + timeRange, + onChunkClick, + selectedChunkId, + clusters, + sortedClusters, + colorIndex, + clusterMap, + ]); return (
diff --git a/src/dashboard/client/src/components/timeline/TopicFlowView.tsx b/src/dashboard/client/src/components/timeline/TopicFlowView.tsx index 8e2f7ec..e6238bd 100644 --- a/src/dashboard/client/src/components/timeline/TopicFlowView.tsx +++ b/src/dashboard/client/src/components/timeline/TopicFlowView.tsx @@ -46,10 +46,16 @@ interface TopicStats { focusScore: number; } -function computeStats(sorted: TimelineChunk[], clusterLabel: (id: string | null) => string): TopicStats { +function computeStats( + sorted: TimelineChunk[], + clusterLabel: (id: string | null) => string, +): TopicStats { if (sorted.length <= 1) { return { - longestStreak: sorted.length === 1 ? { cluster: clusterLabel(sorted[0].clusterId), count: 1, durationMs: 0 } : null, + longestStreak: + sorted.length === 1 + ? { cluster: clusterLabel(sorted[0].clusterId), count: 1, durationMs: 0 } + : null, mostSwitches: null, focusScore: 1, }; @@ -73,7 +79,9 @@ function computeStats(sorted: TimelineChunk[], clusterLabel: (id: string | null) if (curStreak.count > bestStreak.count) { bestStreak = { ...curStreak, endIdx: sorted.length - 1 }; } - const streakDuration = new Date(sorted[bestStreak.endIdx].endTime).getTime() - new Date(sorted[bestStreak.startIdx].startTime).getTime(); + const streakDuration = + new Date(sorted[bestStreak.endIdx].endTime).getTime() - + new Date(sorted[bestStreak.startIdx].startTime).getTime(); // Switch counts between pairs const pairCounts = new Map(); @@ -96,12 +104,20 @@ function computeStats(sorted: TimelineChunk[], clusterLabel: (id: string | null) let mostSwitches: TopicStats['mostSwitches'] = null; for (const pair of pairCounts.values()) { if (!mostSwitches || pair.count > mostSwitches.count) { - mostSwitches = { from: clusterLabel(pair.from === '__unclustered' ? null : pair.from), to: clusterLabel(pair.to === '__unclustered' ? null : pair.to), count: pair.count }; + mostSwitches = { + from: clusterLabel(pair.from === '__unclustered' ? null : pair.from), + to: clusterLabel(pair.to === '__unclustered' ? null : pair.to), + count: pair.count, + }; } } return { - longestStreak: { cluster: clusterLabel(bestStreak.cluster), count: bestStreak.count, durationMs: streakDuration }, + longestStreak: { + cluster: clusterLabel(bestStreak.cluster), + count: bestStreak.count, + durationMs: streakDuration, + }, mostSwitches, focusScore: 1 - switchCount / (sorted.length - 1), }; @@ -116,14 +132,22 @@ function formatDuration(ms: number): string { return rem > 0 ? `${hrs}h ${rem}m` : `${hrs}h`; } -export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, selectedChunkId, clusters }: TopicFlowViewProps) { +export function TopicFlowView({ + chunks, + edges: _edges, + timeRange, + onChunkClick, + selectedChunkId, + clusters, +}: TopicFlowViewProps) { const svgRef = useRef(null); const clusterMap = useMemo(() => new Map((clusters ?? []).map((c) => [c.id, c])), [clusters]); // Sort chunks chronologically - const sorted = useMemo(() => - [...chunks].sort((a, b) => new Date(a.startTime).getTime() - new Date(b.startTime).getTime()), + const sorted = useMemo( + () => + [...chunks].sort((a, b) => new Date(a.startTime).getTime() - new Date(b.startTime).getTime()), [chunks], ); @@ -217,11 +241,13 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, const innerHeight = height - margin.top - margin.bottom; // Scales - const xScale = d3.scaleTime() + const xScale = d3 + .scaleTime() .domain([new Date(timeRange.earliest!), new Date(timeRange.latest!)]) .range([0, innerWidth]); - const yScale = d3.scaleBand() + const yScale = d3 + .scaleBand() .domain(clusterOrder) .range([0, innerHeight]) .padding(0.25); @@ -232,7 +258,8 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, // Defs: clip path + gradients const defs = svg.append('defs'); - defs.append('clipPath') + defs + .append('clipPath') .attr('id', 'topicflow-clip') .append('rect') .attr('width', innerWidth) @@ -250,18 +277,30 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, for (const pair of gradientPairs) { const [fromId, toId] = pair.split('||'); const gradId = `grad-${fromId}-${toId}`.replace(/[^a-zA-Z0-9-]/g, '_'); - const grad = defs.append('linearGradient') + const grad = defs + .append('linearGradient') .attr('id', gradId) - .attr('x1', '0%').attr('y1', '0%') - .attr('x2', '100%').attr('y2', '0%'); - grad.append('stop').attr('offset', '0%').attr('stop-color', getColor(fromId === '__unclustered' ? null : fromId)).attr('stop-opacity', 0.3); - grad.append('stop').attr('offset', '100%').attr('stop-color', getColor(toId === '__unclustered' ? null : toId)).attr('stop-opacity', 0.3); + .attr('x1', '0%') + .attr('y1', '0%') + .attr('x2', '100%') + .attr('y2', '0%'); + grad + .append('stop') + .attr('offset', '0%') + .attr('stop-color', getColor(fromId === '__unclustered' ? null : fromId)) + .attr('stop-opacity', 0.3); + grad + .append('stop') + .attr('offset', '100%') + .attr('stop-color', getColor(toId === '__unclustered' ? null : toId)) + .attr('stop-opacity', 0.3); } const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`); // X axis - const xAxisG = g.append('g') + const xAxisG = g + .append('g') .attr('transform', `translate(0,${innerHeight})`) .call(d3.axisBottom(xScale).ticks(8)) .attr('class', 'topicflow-axis'); @@ -272,10 +311,15 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, const yPos = (yScale(cid) ?? 0) + bandwidth / 2; const label = clusterLabel(cid === '__unclustered' ? null : cid); const cs = clusterStats.get(cid); - const tokenStr = cs ? (cs.tokens >= 1000 ? `${Math.round(cs.tokens / 1000)}k` : `${cs.tokens}`) : '0'; + const tokenStr = cs + ? cs.tokens >= 1000 + ? `${Math.round(cs.tokens / 1000)}k` + : `${cs.tokens}` + : '0'; const fullLabel = cs ? `${label} (${cs.count}, ${tokenStr} tok)` : label; const truncated = fullLabel.length > 30 ? fullLabel.slice(0, 28) + '…' : fullLabel; - yAxisG.append('text') + yAxisG + .append('text') .attr('x', -8) .attr('y', yPos) .attr('text-anchor', 'end') @@ -283,19 +327,24 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, .attr('fill', 'var(--muted-foreground, #94a3b8)') .attr('font-size', '11px') .text(truncated) - .append('title').text(fullLabel); + .append('title') + .text(fullLabel); } // Y axis line - yAxisG.append('line') - .attr('x1', 0).attr('y1', 0) - .attr('x2', 0).attr('y2', innerHeight) + yAxisG + .append('line') + .attr('x1', 0) + .attr('y1', 0) + .attr('x2', 0) + .attr('y2', innerHeight) .attr('stroke', 'var(--border-color, #334155)'); // Style axis - g.selectAll('.topicflow-axis line, .topicflow-axis path') - .attr('stroke', 'var(--border-color, #334155)'); - g.selectAll('.topicflow-axis text') - .attr('fill', 'var(--muted-foreground, #94a3b8)'); + g.selectAll('.topicflow-axis line, .topicflow-axis path').attr( + 'stroke', + 'var(--border-color, #334155)', + ); + g.selectAll('.topicflow-axis text').attr('fill', 'var(--muted-foreground, #94a3b8)'); const chartArea = g.append('g').attr('clip-path', 'url(#topicflow-clip)'); @@ -307,14 +356,18 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, const sessionG = chartArea.append('g').attr('class', 'session-boundaries'); for (const sb of sessionBoundaries) { const x = xS(new Date(sb.time)); - sessionG.append('line') - .attr('x1', x).attr('y1', 0) - .attr('x2', x).attr('y2', innerHeight) + sessionG + .append('line') + .attr('x1', x) + .attr('y1', 0) + .attr('x2', x) + .attr('y2', innerHeight) .attr('stroke', '#475569') .attr('stroke-width', 1) .attr('stroke-dasharray', '4,4') .attr('stroke-opacity', 0.3); - sessionG.append('text') + sessionG + .append('text') .attr('x', x + 4) .attr('y', 10) .attr('fill', 'var(--muted-foreground, #94a3b8)') @@ -336,7 +389,8 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, // Gap indicator const midX = (xS(new Date(cur.endTime)) + xS(new Date(next.startTime))) / 2; const midY = innerHeight / 2; - ribbonG.append('text') + ribbonG + .append('text') .attr('class', 'gap-indicator') .attr('x', midX) .attr('y', midY) @@ -359,7 +413,8 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, if (fromCid === toCid) { // Same-cluster: flat horizontal band const yTop = (yScale(fromCid) ?? 0) + bandwidth * 0.25; - ribbonG.append('rect') + ribbonG + .append('rect') .attr('class', `ribbon ribbon-${i}`) .attr('x', Math.min(x1, x2)) .attr('y', yTop) @@ -377,15 +432,19 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, const gradId = `grad-${fromCid}-${toCid}`.replace(/[^a-zA-Z0-9-]/g, '_'); - ribbonG.append('path') + ribbonG + .append('path') .attr('class', `ribbon ribbon-${i}`) - .attr('d', [ - `M${x1},${y1Top}`, - `C${mx},${y1Top} ${mx},${y2Top} ${x2},${y2Top}`, - `L${x2},${y2Bot}`, - `C${mx},${y2Bot} ${mx},${y1Bot} ${x1},${y1Bot}`, - 'Z', - ].join(' ')) + .attr( + 'd', + [ + `M${x1},${y1Top}`, + `C${mx},${y1Top} ${mx},${y2Top} ${x2},${y2Top}`, + `L${x2},${y2Bot}`, + `C${mx},${y2Bot} ${mx},${y1Bot} ${x1},${y1Bot}`, + 'Z', + ].join(' '), + ) .attr('fill', `url(#${gradId})`) .attr('stroke', 'none'); } @@ -393,7 +452,8 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, // Chunks const chunkG = chartArea.append('g').attr('class', 'chunks'); - chunkG.selectAll('rect') + chunkG + .selectAll('rect') .data(sorted) .join('rect') .attr('class', (_, i) => `chunk chunk-${i}`) @@ -409,8 +469,8 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, .attr('height', bandwidth * 0.7) .attr('rx', 2) .attr('fill', (d) => getColor(d.clusterId)) - .attr('fill-opacity', (d) => d.id === selectedChunkId ? 1.0 : 0.7) - .attr('stroke', (d) => d.id === selectedChunkId ? '#ffffff' : 'none') + .attr('fill-opacity', (d) => (d.id === selectedChunkId ? 1.0 : 0.7)) + .attr('stroke', (d) => (d.id === selectedChunkId ? '#ffffff' : 'none')) .attr('stroke-width', 2) .attr('cursor', 'pointer') .on('click', (_event, d) => onChunkClick(d.id)) @@ -423,7 +483,8 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, }); // Chunk tooltips - chunkG.selectAll('rect') + chunkG + .selectAll('rect') .append('title') .text((_: unknown, i: number) => { const d = sorted[i]; @@ -438,13 +499,15 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, const highlighted = new Set(); highlighted.add(centerIdx); for (let i = centerIdx - 1; i >= 0 && highlighted.size < CHAIN_MAX; i--) { - const gap = new Date(sorted[i + 1].startTime).getTime() - new Date(sorted[i].endTime).getTime(); + const gap = + new Date(sorted[i + 1].startTime).getTime() - new Date(sorted[i].endTime).getTime(); if (gap > GAP_THRESHOLD_MS) break; highlighted.add(i); } // Walk forward for (let i = centerIdx + 1; i < sorted.length && highlighted.size < CHAIN_MAX; i++) { - const gap = new Date(sorted[i].startTime).getTime() - new Date(sorted[i - 1].endTime).getTime(); + const gap = + new Date(sorted[i].startTime).getTime() - new Date(sorted[i - 1].endTime).getTime(); if (gap > GAP_THRESHOLD_MS) break; highlighted.add(i); } @@ -467,8 +530,11 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, } function clearHighlight() { - chartArea.selectAll('.chunk') - .attr('fill-opacity', (d: unknown) => (d as TimelineChunk).id === selectedChunkId ? 1.0 : 0.7); + chartArea + .selectAll('.chunk') + .attr('fill-opacity', (d: unknown) => + (d as TimelineChunk).id === selectedChunkId ? 1.0 : 0.7, + ); chartArea.selectAll('.ribbon').attr('opacity', 1.0); chartArea.selectAll('.gap-indicator').attr('fill-opacity', 0.5); } @@ -477,9 +543,13 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, renderContent(xScale); // Zoom - const zoom = d3.zoom() + const zoom = d3 + .zoom() .scaleExtent([0.5, 20]) - .translateExtent([[-100, 0], [innerWidth + 100, height]]) + .translateExtent([ + [-100, 0], + [innerWidth + 100, height], + ]) .on('zoom', (event) => { const newXScale = event.transform.rescaleX(xScale); xAxisG.call(d3.axisBottom(newXScale).ticks(8)); @@ -487,8 +557,17 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick, }); svg.call(zoom); - - }, [sorted, timeRange, onChunkClick, selectedChunkId, clusterOrder, clusterMap, colorIndex, clusterStats, sessionBoundaries]); + }, [ + sorted, + timeRange, + onChunkClick, + selectedChunkId, + clusterOrder, + clusterMap, + colorIndex, + clusterStats, + sessionBoundaries, + ]); return (
@@ -499,14 +578,17 @@ export function TopicFlowView({ chunks, edges: _edges, timeRange, onChunkClick,
Longest streak: - {stats.longestStreak.cluster} ({stats.longestStreak.count} chunks, {formatDuration(stats.longestStreak.durationMs)}) + {stats.longestStreak.cluster} ({stats.longestStreak.count} chunks,{' '} + {formatDuration(stats.longestStreak.durationMs)})
)}
Most switches: - {stats.mostSwitches ? `${stats.mostSwitches.from} ↔ ${stats.mostSwitches.to} (${stats.mostSwitches.count}×)` : '—'} + {stats.mostSwitches + ? `${stats.mostSwitches.from} ↔ ${stats.mostSwitches.to} (${stats.mostSwitches.count}×)` + : '—'}
diff --git a/src/dashboard/client/src/components/ui/badge.tsx b/src/dashboard/client/src/components/ui/badge.tsx index ecbb528..779390f 100644 --- a/src/dashboard/client/src/components/ui/badge.tsx +++ b/src/dashboard/client/src/components/ui/badge.tsx @@ -17,7 +17,8 @@ const badgeVariants = cva( }, ); -interface BadgeProps extends React.HTMLAttributes, VariantProps {} +interface BadgeProps + extends React.HTMLAttributes, VariantProps {} export function Badge({ className, variant, ...props }: BadgeProps) { return
; diff --git a/src/dashboard/client/src/components/ui/card.tsx b/src/dashboard/client/src/components/ui/card.tsx index 5ac903f..eea88f7 100644 --- a/src/dashboard/client/src/components/ui/card.tsx +++ b/src/dashboard/client/src/components/ui/card.tsx @@ -3,7 +3,10 @@ import { cn } from '../../lib/utils'; export function Card({ className, ...props }: React.HTMLAttributes) { return (
); @@ -14,10 +17,15 @@ export function CardHeader({ className, ...props }: React.HTMLAttributes) { - return

; + return ( +

+ ); } -export function CardDescription({ className, ...props }: React.HTMLAttributes) { +export function CardDescription({ + className, + ...props +}: React.HTMLAttributes) { return

; } diff --git a/src/dashboard/client/src/index.css b/src/dashboard/client/src/index.css index 630b705..a36db5e 100644 --- a/src/dashboard/client/src/index.css +++ b/src/dashboard/client/src/index.css @@ -1,4 +1,4 @@ -@import "tailwindcss"; +@import 'tailwindcss'; @theme { --color-background: var(--bg); @@ -12,7 +12,7 @@ --color-accent-foreground: var(--accent-fg); --color-ring: var(--ring-color); - --font-sans: "Inter", ui-sans-serif, system-ui, -apple-system, sans-serif; + --font-sans: 'Inter', ui-sans-serif, system-ui, -apple-system, sans-serif; --radius-lg: 0.75rem; --radius-md: 0.5rem; --radius-sm: 0.25rem; @@ -47,7 +47,12 @@ body { background-color: var(--bg); color: var(--fg); - font-family: "Inter", ui-sans-serif, system-ui, -apple-system, sans-serif; + font-family: + 'Inter', + ui-sans-serif, + system-ui, + -apple-system, + sans-serif; } /* Animated count-up */ diff --git a/src/dashboard/client/src/pages/Overview.tsx b/src/dashboard/client/src/pages/Overview.tsx index 8ed0a93..4a2a1a4 100644 --- a/src/dashboard/client/src/pages/Overview.tsx +++ b/src/dashboard/client/src/pages/Overview.tsx @@ -64,7 +64,10 @@ export function Overview() {

{recent.chunks.map((chunk) => ( -
+
{chunk.sessionSlug} diff --git a/src/dashboard/client/src/pages/Projects.tsx b/src/dashboard/client/src/pages/Projects.tsx index 46b75ee..be70a15 100644 --- a/src/dashboard/client/src/pages/Projects.tsx +++ b/src/dashboard/client/src/pages/Projects.tsx @@ -33,15 +33,26 @@ export function Projects() { - - - - + + + + {data.projects.map((project) => ( - +
ProjectChunksFirst SeenLast Seen + Project + + Chunks + + First Seen + + Last Seen +
{project.slug} diff --git a/src/dashboard/client/src/pages/SearchPage.tsx b/src/dashboard/client/src/pages/SearchPage.tsx index da1ac4e..270e500 100644 --- a/src/dashboard/client/src/pages/SearchPage.tsx +++ b/src/dashboard/client/src/pages/SearchPage.tsx @@ -49,9 +49,7 @@ export function SearchPage() { {data && } - {!data && !loading && query && ( -
No results found.
- )} + {!data && !loading && query &&
No results found.
}
); } diff --git a/src/dashboard/client/src/pages/Timeline.tsx b/src/dashboard/client/src/pages/Timeline.tsx index e141396..f34e4e2 100644 --- a/src/dashboard/client/src/pages/Timeline.tsx +++ b/src/dashboard/client/src/pages/Timeline.tsx @@ -88,7 +88,9 @@ export function Timeline() { {clustersData && timelineData && timelineData.chunks.length > 0 && ( c.clusterId).filter(Boolean))] as string[]} + activeClusterIds={ + [...new Set(timelineData.chunks.map((c) => c.clusterId).filter(Boolean))] as string[] + } unclusteredCount={timelineData.chunks.filter((c) => !c.clusterId).length} /> )}