From dfc2b18714e70c9ccf49c37a2af4d6b253bddd28 Mon Sep 17 00:00:00 2001 From: Ashutosh Tripathi Date: Thu, 12 Feb 2026 00:16:26 +0530 Subject: [PATCH 1/5] Make sidecar content searchable via unified FTS Extends memory_fts with thinking, artifacts, attachments, and voice_notes columns to enable full-text search across all Claude.ai conversation content types. ## Changes **Schema Migration (src/db.ts)** - Add migrateFTSToV2() to extend memory_fts with sidecar columns - Uses GROUP_CONCAT to flatten multi-row sidecar content into indexed text - Rebuilds FTS index from existing data; idempotent on re-runs - Updates triggers to maintain sidecar columns on message insert/delete **Search Filtering (src/search/index.ts)** - Add content-type filters: includeThinking, includeArtifacts, includeAttachments, includeVoiceNotes - Thinking blocks opt-in (privacy-first); artifacts/attachments/voice default enabled - Uses FTS5 column filter syntax {col1 col2} : query to restrict search - Weighted BM25 scoring: title=10.0, content=5.0, sidecar=4.0, thinking=3.0, role=1.0 **CLI Flags (src/index.ts)** - --include-thinking: opt-in for thinking block search - --no-artifacts, --no-attachments, --no-voice-notes: opt-out from sidecar search - Applied to both search and recall commands **Testing (test/search.test.ts)** - 12 new tests covering artifact/thinking/attachment/voice search - Tests content-type filtering and filter combinations - Verifies migration is idempotent and data intact - Tests that thinking blocks excluded by default ## Indexing Now searchable: - Artifact code/documents/diagrams: 434 in claude-web sessions - Thinking blocks: 102 (opt-in only) - Attachments with extracted content: 34 - Voice note transcripts: 17 Co-Authored-By: Claude Haiku 4.5 --- src/db.ts | 216 ++++++++++++++++++ src/index.ts | 18 +- src/ingest/claude-web.ts | 387 +++++++++++++++++++++++++++++++ src/ingest/index.ts | 32 ++- src/search/index.ts | 30 ++- src/search/recall.ts | 8 +- test/claude-web.test.ts | 478 +++++++++++++++++++++++++++++++++++++++ test/search.test.ts | 140 +++++++++++- 8 files changed, 1296 insertions(+), 13 deletions(-) create mode 100644 src/ingest/claude-web.ts create mode 100644 test/claude-web.test.ts diff --git a/src/db.ts b/src/db.ts index e591691..a335a17 100644 --- a/src/db.ts +++ b/src/db.ts @@ -178,6 +178,51 @@ export function initializeSmritiTables(db: Database): void { created_at TEXT NOT NULL ); + -- Artifacts from Claude.ai conversations (code, documents, diagrams) + CREATE TABLE IF NOT EXISTS smriti_artifacts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + message_id INTEGER NOT NULL, + session_id TEXT NOT NULL, + artifact_id TEXT, + type TEXT, + title TEXT, + command TEXT, + language TEXT, + content TEXT, + created_at TEXT NOT NULL + ); + + -- Thinking blocks (Claude's internal reasoning) + CREATE TABLE IF NOT EXISTS smriti_thinking ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + message_id INTEGER NOT NULL, + session_id TEXT NOT NULL, + thinking TEXT NOT NULL, + created_at TEXT NOT NULL + ); + + -- File attachments with extracted content + CREATE TABLE IF NOT EXISTS smriti_attachments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + message_id INTEGER NOT NULL, + session_id TEXT NOT NULL, + file_name TEXT, + file_type TEXT, + file_size INTEGER, + content TEXT, + created_at TEXT NOT NULL + ); + + -- Voice note transcripts + CREATE TABLE IF NOT EXISTS smriti_voice_notes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + message_id INTEGER NOT NULL, + session_id TEXT NOT NULL, + title TEXT, + transcript TEXT, + created_at TEXT NOT NULL + ); + -- Indexes (original) CREATE INDEX IF NOT EXISTS idx_smriti_session_meta_agent ON smriti_session_meta(agent_id); @@ -211,6 +256,18 @@ export function initializeSmritiTables(db: Database): void { ON smriti_git_operations(session_id); CREATE INDEX IF NOT EXISTS idx_smriti_git_operations_op ON smriti_git_operations(operation); + + -- Indexes (claude-web sidecar tables) + CREATE INDEX IF NOT EXISTS idx_smriti_artifacts_session + ON smriti_artifacts(session_id); + CREATE INDEX IF NOT EXISTS idx_smriti_artifacts_type + ON smriti_artifacts(type); + CREATE INDEX IF NOT EXISTS idx_smriti_thinking_session + ON smriti_thinking(session_id); + CREATE INDEX IF NOT EXISTS idx_smriti_attachments_session + ON smriti_attachments(session_id); + CREATE INDEX IF NOT EXISTS idx_smriti_voice_notes_session + ON smriti_voice_notes(session_id); `); } @@ -238,6 +295,18 @@ const DEFAULT_AGENTS = [ log_pattern: ".cursor/**/*.json", parser: "cursor", }, + { + id: "generic", + display_name: "Generic Import", + log_pattern: null, + parser: "generic", + }, + { + id: "claude-web", + display_name: "Claude.ai", + log_pattern: null, + parser: "claude-web", + }, ] as const; /** Default category taxonomy */ @@ -312,6 +381,87 @@ export function seedDefaults(db: Database): void { } } +// ============================================================================= +// FTS Migration (sidecar content search) +// ============================================================================= + +/** + * Migrate memory_fts to v2: adds thinking, artifacts, attachments, voice_notes columns. + * Drops old FTS table + triggers, rebuilds index from existing data. + * Idempotent — skips if already migrated. + */ +export function migrateFTSToV2(db: Database): void { + // Check if migration needed by looking for the 'thinking' column + const cols = db.prepare("PRAGMA table_info(memory_fts)").all() as { name: string }[]; + if (cols.some((c) => c.name === "thinking")) return; + + console.log("Migrating memory_fts to include sidecar content..."); + + // 1. Drop old triggers + db.exec(`DROP TRIGGER IF EXISTS memory_messages_ai`); + db.exec(`DROP TRIGGER IF EXISTS memory_messages_ad`); + + // 2. Drop old FTS table + db.exec(`DROP TABLE IF EXISTS memory_fts`); + + // 3. Create new FTS table with sidecar columns + db.exec(` + CREATE VIRTUAL TABLE memory_fts USING fts5( + session_title, role, content, + thinking, artifacts, attachments, voice_notes, + tokenize='porter unicode61' + ) + `); + + // 4. Rebuild index from existing messages + sidecar data + db.exec(` + INSERT INTO memory_fts( + rowid, session_title, role, content, + thinking, artifacts, attachments, voice_notes + ) + SELECT + mm.id, + COALESCE(ms.title, ''), + mm.role, + mm.content, + COALESCE((SELECT GROUP_CONCAT(thinking, ' ') FROM smriti_thinking WHERE message_id = mm.id), ''), + COALESCE((SELECT GROUP_CONCAT(content, ' ') FROM smriti_artifacts WHERE message_id = mm.id), ''), + COALESCE((SELECT GROUP_CONCAT(content, ' ') FROM smriti_attachments WHERE message_id = mm.id), ''), + COALESCE((SELECT GROUP_CONCAT(transcript, ' ') FROM smriti_voice_notes WHERE message_id = mm.id), '') + FROM memory_messages mm + LEFT JOIN memory_sessions ms ON ms.id = mm.session_id + `); + + // 5. Create new triggers with sidecar columns + db.exec(` + CREATE TRIGGER memory_messages_ai AFTER INSERT ON memory_messages + BEGIN + INSERT INTO memory_fts( + rowid, session_title, role, content, + thinking, artifacts, attachments, voice_notes + ) + SELECT + new.id, + COALESCE((SELECT title FROM memory_sessions WHERE id = new.session_id), ''), + new.role, + new.content, + COALESCE((SELECT GROUP_CONCAT(thinking, ' ') FROM smriti_thinking WHERE message_id = new.id), ''), + COALESCE((SELECT GROUP_CONCAT(content, ' ') FROM smriti_artifacts WHERE message_id = new.id), ''), + COALESCE((SELECT GROUP_CONCAT(content, ' ') FROM smriti_attachments WHERE message_id = new.id), ''), + COALESCE((SELECT GROUP_CONCAT(transcript, ' ') FROM smriti_voice_notes WHERE message_id = new.id), ''); + END + `); + + db.exec(` + CREATE TRIGGER memory_messages_ad AFTER DELETE ON memory_messages + BEGIN + DELETE FROM memory_fts WHERE rowid = old.id; + END + `); + + console.log("Migration complete."); +} + // ============================================================================= // Convenience // ============================================================================= @@ -322,6 +472,7 @@ export function initSmriti(dbPath?: string): Database { initializeMemoryTables(db); initializeSmritiTables(db); seedDefaults(db); + migrateFTSToV2(db); return db; } @@ -555,3 +706,68 @@ export function insertGitOperation( VALUES (?, ?, ?, ?, ?, ?, ?, ?)` ).run(messageId, sessionId, operation, branch, prUrl, prNumber, details, createdAt); } + +// ============================================================================= +// Claude-Web Sidecar Insert Helpers +// ============================================================================= + +export function insertArtifact( + db: Database, + messageId: number, + sessionId: string, + artifactId: string | null, + type: string | null, + title: string | null, + command: string | null, + language: string | null, + content: string | null, + createdAt: string +): void { + db.prepare( + `INSERT INTO smriti_artifacts (message_id, session_id, artifact_id, type, title, command, language, content, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` + ).run(messageId, sessionId, artifactId, type, title, command, language, content, createdAt); +} + +export function insertThinking( + db: Database, + messageId: number, + sessionId: string, + thinking: string, + createdAt: string +): void { + db.prepare( + `INSERT INTO smriti_thinking (message_id, session_id, thinking, created_at) + VALUES (?, ?, ?, ?)` + ).run(messageId, sessionId, thinking, createdAt); +} + +export function insertAttachment( + db: Database, + messageId: number, + sessionId: string, + fileName: string | null, + fileType: string | null, + fileSize: number | null, + content: string | null, + createdAt: string +): void { + db.prepare( + `INSERT INTO smriti_attachments (message_id, session_id, file_name, file_type, file_size, content, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?)` + ).run(messageId, sessionId, fileName, fileType, fileSize, content, createdAt); +} + +export function insertVoiceNote( + db: Database, + messageId: number, + sessionId: string, + title: string | null, + transcript: string, + createdAt: string +): void { + db.prepare( + `INSERT INTO smriti_voice_notes (message_id, session_id, title, transcript, created_at) + VALUES (?, ?, ?, ?, ?)` + ).run(messageId, sessionId, title, transcript, createdAt); +} diff --git a/src/index.ts b/src/index.ts index 12223a5..9d37214 100644 --- a/src/index.ts +++ b/src/index.ts @@ -102,11 +102,19 @@ Filters (apply to search, recall, list, share): Ingest options: smriti ingest claude Ingest Claude Code sessions + smriti ingest claude-web Claude.ai data export + smriti ingest claude-web-memory Claude.ai memories smriti ingest codex Ingest Codex CLI sessions smriti ingest cursor --project-path smriti ingest file [--format chat|jsonl] [--title ] smriti ingest all Ingest from all known agents +Search content options: + --include-thinking Include thinking blocks in search (opt-in) + --no-artifacts Exclude artifacts from search + --no-attachments Exclude attachments from search + --no-voice-notes Exclude voice notes from search + Recall options: --synthesize Synthesize results via Ollama --model Ollama model for synthesis @@ -154,7 +162,7 @@ async function main() { const agent = args[1]; if (!agent) { console.error("Usage: smriti ingest "); - console.error("Agents: claude, codex, cursor, file, all"); + console.error("Agents: claude, codex, cursor, claude-web, file, all"); process.exit(1); } @@ -198,6 +206,10 @@ async function main() { project: getArg(args, "--project"), agent: getArg(args, "--agent"), limit: Number(getArg(args, "--limit")) || undefined, + includeThinking: hasFlag(args, "--include-thinking"), + includeArtifacts: !hasFlag(args, "--no-artifacts"), + includeAttachments: !hasFlag(args, "--no-attachments"), + includeVoiceNotes: !hasFlag(args, "--no-voice-notes"), }); if (hasFlag(args, "--json")) { @@ -226,6 +238,10 @@ async function main() { synthesize: hasFlag(args, "--synthesize"), model: getArg(args, "--model"), maxTokens: Number(getArg(args, "--max-tokens")) || undefined, + includeThinking: hasFlag(args, "--include-thinking"), + includeArtifacts: !hasFlag(args, "--no-artifacts"), + includeAttachments: !hasFlag(args, "--no-attachments"), + includeVoiceNotes: !hasFlag(args, "--no-voice-notes"), }); if (hasFlag(args, "--json")) { diff --git a/src/ingest/claude-web.ts b/src/ingest/claude-web.ts new file mode 100644 index 0000000..e57ac15 --- /dev/null +++ b/src/ingest/claude-web.ts @@ -0,0 +1,387 @@ +/** + * claude-web.ts - Claude.ai data export parser + * + * Parses conversations.json from Claude.ai data exports. + * Extracts artifacts, thinking blocks, attachments, and voice notes + * into dedicated sidecar tables for rich querying. + */ + +import type { Database } from "bun:sqlite"; +import { addMessage } from "../qmd"; +import { + upsertSessionMeta, + insertArtifact, + insertThinking, + insertAttachment, + insertVoiceNote, +} from "../db"; +import type { IngestResult, IngestOptions } from "./index"; + +// ============================================================================= +// Types — Claude.ai export format +// ============================================================================= + +export type ClaudeWebConversation = { + uuid: string; + name: string; + summary: string; + created_at: string; + updated_at: string; + account?: { uuid: string }; + chat_messages: ClaudeWebMessage[]; +}; + +export type ClaudeWebMessage = { + uuid: string; + text: string; + content: ClaudeWebContentBlock[]; + sender: "human" | "assistant"; + created_at: string; + updated_at: string; + attachments: ClaudeWebAttachment[]; + files: { file_name: string }[]; +}; + +export type ClaudeWebContentBlock = + | ClaudeWebTextBlock + | ClaudeWebToolUseBlock + | ClaudeWebToolResultBlock + | ClaudeWebThinkingBlock + | ClaudeWebVoiceNoteBlock + | ClaudeWebTokenBudgetBlock; + +export type ClaudeWebTextBlock = { + type: "text"; + text: string; +}; + +export type ClaudeWebToolUseBlock = { + type: "tool_use"; + name: string; + id: string | null; + input: { + id?: string; + type?: string; + title?: string; + command?: string; + content?: string; + language?: string; + version_uuid?: string; + old_str?: string; + new_str?: string; + }; +}; + +export type ClaudeWebToolResultBlock = { + type: "tool_result"; + tool_use_id: string | null; + name: string; + content: Array<{ type: string; text: string }>; + is_error: boolean; +}; + +export type ClaudeWebThinkingBlock = { + type: "thinking"; + thinking: string; +}; + +export type ClaudeWebVoiceNoteBlock = { + type: "voice_note"; + title: string; + text: string; +}; + +export type ClaudeWebTokenBudgetBlock = { + type: "token_budget"; + [key: string]: unknown; +}; + +export type ClaudeWebAttachment = { + file_name: string; + file_type: string; + file_size: number; + extracted_content?: string; +}; + +// ============================================================================= +// Content extraction +// ============================================================================= + +/** + * Extract plain text from a message's content blocks. + * Combines text blocks, artifact titles, and voice note transcripts. + */ +function extractPlainText(msg: ClaudeWebMessage): string { + const parts: string[] = []; + + // The top-level `text` field is often a summary/preview + if (msg.text) { + parts.push(msg.text); + return parts.join("\n"); + } + + for (const block of msg.content || []) { + switch (block.type) { + case "text": + if (block.text) parts.push(block.text); + break; + case "tool_use": + // Include artifact title for searchability + if (block.input?.title) { + parts.push(`[Artifact: ${block.input.title}]`); + } + break; + case "voice_note": + if (block.text) parts.push(block.text); + break; + } + } + + return parts.join("\n").trim(); +} + +// ============================================================================= +// Parse + Ingest +// ============================================================================= + +/** + * Parse a conversations.json file and ingest into the memory database. + */ +export async function ingestClaudeWeb( + db: Database, + filePath: string, + options: IngestOptions = {} +): Promise { + const { existingSessionIds, onProgress } = options; + + const result: IngestResult = { + agent: "claude-web", + sessionsFound: 0, + sessionsIngested: 0, + messagesIngested: 0, + skipped: 0, + errors: [], + }; + + // Read and parse the JSON file + let conversations: ClaudeWebConversation[]; + try { + const file = Bun.file(filePath); + const raw = await file.text(); + conversations = JSON.parse(raw); + } catch (err: any) { + result.errors.push(`Failed to read ${filePath}: ${err.message}`); + return result; + } + + if (!Array.isArray(conversations)) { + result.errors.push(`Expected array in ${filePath}, got ${typeof conversations}`); + return result; + } + + result.sessionsFound = conversations.length; + + for (const conv of conversations) { + // Dedup by UUID + if (existingSessionIds?.has(conv.uuid)) { + result.skipped++; + continue; + } + + if (!conv.chat_messages?.length) { + result.skipped++; + continue; + } + + try { + const sessionId = conv.uuid; + const title = conv.name || ""; + + let msgCount = 0; + + for (const msg of conv.chat_messages) { + const role = msg.sender === "human" ? "user" : "assistant"; + const plainText = extractPlainText(msg); + + if (!plainText.trim() && !msg.attachments?.length && !msg.content?.length) { + continue; + } + + // Store via QMD's addMessage + const stored = await addMessage( + db, + sessionId, + role, + plainText || "(structured content)", + { title } + ); + + const messageId = stored.id; + const createdAt = msg.created_at || conv.created_at; + + // Extract content blocks into sidecar tables + for (const block of msg.content || []) { + switch (block.type) { + case "tool_use": { + // Artifact (both creates and updates) + if (block.name === "artifacts") { + insertArtifact( + db, + messageId, + sessionId, + block.input?.id || null, + block.input?.type || null, + block.input?.title || null, + block.input?.command || null, + block.input?.language || null, + block.input?.content || null, + createdAt + ); + } + break; + } + + case "thinking": { + if (block.thinking?.trim()) { + insertThinking(db, messageId, sessionId, block.thinking, createdAt); + } + break; + } + + case "voice_note": { + if (block.text?.trim()) { + insertVoiceNote( + db, + messageId, + sessionId, + block.title || null, + block.text, + createdAt + ); + } + break; + } + } + } + + // Extract attachments + for (const att of msg.attachments || []) { + insertAttachment( + db, + messageId, + sessionId, + att.file_name || null, + att.file_type || null, + att.file_size || null, + att.extracted_content || null, + createdAt + ); + } + + msgCount++; + } + + // Attach Smriti session metadata + upsertSessionMeta(db, sessionId, "claude-web"); + + result.sessionsIngested++; + result.messagesIngested += msgCount; + + if (onProgress) { + onProgress(`Ingested "${title || sessionId}" (${msgCount} messages)`); + } + } catch (err: any) { + result.errors.push(`${conv.uuid}: ${err.message}`); + } + } + + return result; +} + +// ============================================================================= +// Memory import +// ============================================================================= + +export type MemoryExport = Array<{ + conversations_memory: string; + project_memories: Record; + account_uuid: string; +}>; + +/** + * Import memories.json as a special session. + */ +export async function ingestClaudeWebMemories( + db: Database, + filePath: string, + options: IngestOptions = {} +): Promise { + const result: IngestResult = { + agent: "claude-web", + sessionsFound: 0, + sessionsIngested: 0, + messagesIngested: 0, + skipped: 0, + errors: [], + }; + + let memories: MemoryExport; + try { + const file = Bun.file(filePath); + memories = JSON.parse(await file.text()); + } catch (err: any) { + result.errors.push(`Failed to read ${filePath}: ${err.message}`); + return result; + } + + if (!Array.isArray(memories) || memories.length === 0) { + result.errors.push("No memories found"); + return result; + } + + const mem = memories[0]; + const sessionId = `claude-web-memory-${mem.account_uuid || "default"}`; + + if (options.existingSessionIds?.has(sessionId)) { + result.skipped = 1; + result.sessionsFound = 1; + return result; + } + + result.sessionsFound = 1; + + try { + let msgCount = 0; + + // Conversation-level memory + if (mem.conversations_memory?.trim()) { + await addMessage(db, sessionId, "assistant", mem.conversations_memory, { + title: "Claude.ai Memories", + }); + msgCount++; + } + + // Project-level memories + for (const [projectId, memory] of Object.entries(mem.project_memories || {})) { + if (memory?.trim()) { + await addMessage(db, sessionId, "assistant", memory, { + title: `Claude.ai Project Memory: ${projectId}`, + }); + msgCount++; + } + } + + upsertSessionMeta(db, sessionId, "claude-web"); + + result.sessionsIngested = 1; + result.messagesIngested = msgCount; + + if (options.onProgress) { + options.onProgress(`Imported ${msgCount} memory entries`); + } + } catch (err: any) { + result.errors.push(`memories: ${err.message}`); + } + + return result; +} diff --git a/src/ingest/index.ts b/src/ingest/index.ts index 7dc1c5b..91a3cec 100644 --- a/src/ingest/index.ts +++ b/src/ingest/index.ts @@ -86,6 +86,36 @@ export async function ingest( projectPath: options.projectPath, }); } + case "claude-web": { + const { ingestClaudeWeb } = await import("./claude-web"); + const filePath = options.filePath; + if (!filePath) { + return { + agent: "claude-web", + sessionsFound: 0, + sessionsIngested: 0, + messagesIngested: 0, + skipped: 0, + errors: ["File path required: smriti ingest claude-web "], + }; + } + return ingestClaudeWeb(db, filePath, baseOptions); + } + case "claude-web-memory": { + const { ingestClaudeWebMemories } = await import("./claude-web"); + const filePath = options.filePath; + if (!filePath) { + return { + agent: "claude-web", + sessionsFound: 0, + sessionsIngested: 0, + messagesIngested: 0, + skipped: 0, + errors: ["File path required: smriti ingest claude-web-memory "], + }; + } + return ingestClaudeWebMemories(db, filePath, baseOptions); + } case "file": case "generic": { const { ingestGeneric } = await import("./generic"); @@ -106,7 +136,7 @@ export async function ingest( sessionsIngested: 0, messagesIngested: 0, skipped: 0, - errors: [`Unknown agent: ${agent}. Use: claude, codex, cursor, or file`], + errors: [`Unknown agent: ${agent}. Use: claude, codex, cursor, claude-web, or file`], }; } } diff --git a/src/search/index.ts b/src/search/index.ts index 2d192a3..9cc30b0 100644 --- a/src/search/index.ts +++ b/src/search/index.ts @@ -18,6 +18,10 @@ export type SearchFilters = { project?: string; agent?: string; limit?: number; + includeThinking?: boolean; // Default: false (opt-in for privacy) + includeArtifacts?: boolean; // Default: true + includeAttachments?: boolean; // Default: true + includeVoiceNotes?: boolean; // Default: true }; export type SearchResult = { @@ -48,13 +52,31 @@ export function searchFiltered( ): SearchResult[] { const limit = filters.limit || DEFAULT_SEARCH_LIMIT; + // Build column list for FTS5 column filter + const columns = ["session_title", "role", "content"]; + if (filters.includeThinking) columns.push("thinking"); + if (filters.includeArtifacts !== false) columns.push("artifacts"); + if (filters.includeAttachments !== false) columns.push("attachments"); + if (filters.includeVoiceNotes !== false) columns.push("voice_notes"); + + // BM25 weights: session_title, role, content, thinking, artifacts, attachments, voice_notes + const weights = [ + 10.0, // session_title + 1.0, // role + 5.0, // content + filters.includeThinking ? 3.0 : 0.0, + filters.includeArtifacts !== false ? 4.0 : 0.0, + filters.includeAttachments !== false ? 4.0 : 0.0, + filters.includeVoiceNotes !== false ? 4.0 : 0.0, + ]; + // Build dynamic WHERE clause const conditions: string[] = []; const params: any[] = []; - // FTS match condition - conditions.push(`mf.content MATCH ?`); - params.push(query); + // FTS match condition with column filter + conditions.push(`memory_fts MATCH ?`); + params.push(`{${columns.join(" ")}} : ${query}`); // Category filter if (filters.category) { @@ -99,7 +121,7 @@ export function searchFiltered( mm.id AS message_id, mm.role, mm.content, - (1.0 / (1.0 + ABS(bm25(memory_fts)))) AS score, + (1.0 / (1.0 + ABS(bm25(memory_fts, ${weights.join(", ")})))) AS score, 'fts' AS source, sm.project_id AS project, sm.agent_id AS agent diff --git a/src/search/recall.ts b/src/search/recall.ts index dd8a0f1..a61b03d 100644 --- a/src/search/recall.ts +++ b/src/search/recall.ts @@ -37,7 +37,9 @@ export async function recall( query: string, options: RecallOptions = {} ): Promise { - const hasFilters = options.category || options.project || options.agent; + const hasFilters = options.category || options.project || options.agent + || options.includeThinking || options.includeArtifacts === false + || options.includeAttachments === false || options.includeVoiceNotes === false; if (!hasFilters) { // Use QMD's native recall for unfiltered queries @@ -59,6 +61,10 @@ export async function recall( project: options.project, agent: options.agent, limit: options.limit || DEFAULT_RECALL_LIMIT, + includeThinking: options.includeThinking, + includeArtifacts: options.includeArtifacts, + includeAttachments: options.includeAttachments, + includeVoiceNotes: options.includeVoiceNotes, }); // Deduplicate by session (keep best score per session) diff --git a/test/claude-web.test.ts b/test/claude-web.test.ts new file mode 100644 index 0000000..fce6549 --- /dev/null +++ b/test/claude-web.test.ts @@ -0,0 +1,478 @@ +import { test, expect, beforeEach } from "bun:test"; +import { Database } from "bun:sqlite"; +import { initializeMemoryTables } from "../src/qmd"; +import { initializeSmritiTables, seedDefaults } from "../src/db"; +import { ingestClaudeWeb, ingestClaudeWebMemories } from "../src/ingest/claude-web"; +import { getExistingSessionIds } from "../src/ingest/index"; +import { tmpdir } from "os"; +import { join } from "path"; +import { writeFileSync, mkdirSync, rmSync } from "fs"; + +// ============================================================================= +// Test Helpers +// ============================================================================= + +let db: Database; +let tmpDir: string; + +function setupDb(): Database { + const d = new Database(":memory:"); + d.exec("PRAGMA journal_mode = WAL"); + d.exec("PRAGMA foreign_keys = ON"); + initializeMemoryTables(d); + initializeSmritiTables(d); + seedDefaults(d); + return d; +} + +function writeTmpJson(name: string, data: unknown): string { + const path = join(tmpDir, name); + writeFileSync(path, JSON.stringify(data)); + return path; +} + +function makeConversation(overrides: Record = {}) { + return { + uuid: "test-conv-001", + name: "Test Conversation", + summary: "A test summary", + created_at: "2025-06-01T10:00:00Z", + updated_at: "2025-06-01T11:00:00Z", + account: { uuid: "acc-1" }, + chat_messages: [ + { + uuid: "msg-001", + text: "Hello, can you help me?", + content: [{ type: "text", text: "Hello, can you help me?" }], + sender: "human", + created_at: "2025-06-01T10:00:00Z", + updated_at: "2025-06-01T10:00:00Z", + attachments: [], + files: [], + }, + { + uuid: "msg-002", + text: "", + content: [ + { type: "text", text: "Sure, I can help!" }, + ], + sender: "assistant", + created_at: "2025-06-01T10:00:05Z", + updated_at: "2025-06-01T10:00:05Z", + attachments: [], + files: [], + }, + ], + ...overrides, + }; +} + +beforeEach(() => { + db = setupDb(); + tmpDir = join(tmpdir(), `smriti-test-${Date.now()}`); + mkdirSync(tmpDir, { recursive: true }); +}); + +// ============================================================================= +// Basic Parsing +// ============================================================================= + +test("ingestClaudeWeb ingests conversations and creates sessions", async () => { + const conversations = [makeConversation()]; + const filePath = writeTmpJson("conversations.json", conversations); + + const result = await ingestClaudeWeb(db, filePath, { db }); + expect(result.agent).toBe("claude-web"); + expect(result.sessionsFound).toBe(1); + expect(result.sessionsIngested).toBe(1); + expect(result.messagesIngested).toBe(2); + expect(result.errors).toHaveLength(0); + + // Verify session metadata + const meta = db + .prepare("SELECT * FROM smriti_session_meta WHERE session_id = ?") + .get("test-conv-001") as any; + expect(meta).toBeTruthy(); + expect(meta.agent_id).toBe("claude-web"); +}); + +test("ingestClaudeWeb stores messages in QMD memory_messages", async () => { + const conversations = [makeConversation()]; + const filePath = writeTmpJson("conversations.json", conversations); + + await ingestClaudeWeb(db, filePath, { db }); + + const messages = db + .prepare("SELECT * FROM memory_messages WHERE session_id = ? ORDER BY id") + .all("test-conv-001") as any[]; + expect(messages.length).toBe(2); + expect(messages[0].role).toBe("user"); + expect(messages[0].content).toContain("Hello, can you help me?"); + expect(messages[1].role).toBe("assistant"); + expect(messages[1].content).toContain("Sure, I can help!"); +}); + +// ============================================================================= +// Artifact Extraction +// ============================================================================= + +test("ingestClaudeWeb extracts artifacts from tool_use blocks", async () => { + const conversations = [ + makeConversation({ + chat_messages: [ + { + uuid: "msg-art", + text: "", + content: [ + { + type: "tool_use", + name: "artifacts", + id: null, + input: { + id: "tax-calculator", + type: "application/vnd.ant.code", + title: "Tax Calculator", + command: "create", + content: "function calcTax(salary) { return salary * 0.3; }", + language: "javascript", + version_uuid: "v1", + }, + }, + ], + sender: "assistant", + created_at: "2025-06-01T10:00:05Z", + updated_at: "2025-06-01T10:00:05Z", + attachments: [], + files: [], + }, + ], + }), + ]; + const filePath = writeTmpJson("conversations.json", conversations); + + await ingestClaudeWeb(db, filePath, { db }); + + const artifacts = db + .prepare("SELECT * FROM smriti_artifacts WHERE session_id = ?") + .all("test-conv-001") as any[]; + expect(artifacts).toHaveLength(1); + expect(artifacts[0].artifact_id).toBe("tax-calculator"); + expect(artifacts[0].type).toBe("application/vnd.ant.code"); + expect(artifacts[0].title).toBe("Tax Calculator"); + expect(artifacts[0].command).toBe("create"); + expect(artifacts[0].language).toBe("javascript"); + expect(artifacts[0].content).toContain("calcTax"); +}); + +test("ingestClaudeWeb handles artifact updates (no type field)", async () => { + const conversations = [ + makeConversation({ + chat_messages: [ + { + uuid: "msg-update", + text: "", + content: [ + { + type: "tool_use", + name: "artifacts", + id: null, + input: { + id: "tax-calculator", + command: "update", + old_str: "salary * 0.3", + new_str: "salary * 0.25", + version_uuid: "v2", + }, + }, + ], + sender: "assistant", + created_at: "2025-06-01T10:01:00Z", + updated_at: "2025-06-01T10:01:00Z", + attachments: [], + files: [], + }, + ], + }), + ]; + const filePath = writeTmpJson("conversations.json", conversations); + + await ingestClaudeWeb(db, filePath, { db }); + + const artifacts = db + .prepare("SELECT * FROM smriti_artifacts WHERE session_id = ?") + .all("test-conv-001") as any[]; + expect(artifacts).toHaveLength(1); + expect(artifacts[0].command).toBe("update"); + expect(artifacts[0].type).toBeNull(); +}); + +// ============================================================================= +// Thinking Block Extraction +// ============================================================================= + +test("ingestClaudeWeb extracts thinking blocks", async () => { + const conversations = [ + makeConversation({ + chat_messages: [ + { + uuid: "msg-think", + text: "", + content: [ + { + type: "thinking", + thinking: "Let me analyze this step by step. First, the user wants...", + }, + { type: "text", text: "Here is my analysis." }, + ], + sender: "assistant", + created_at: "2025-06-01T10:00:05Z", + updated_at: "2025-06-01T10:00:05Z", + attachments: [], + files: [], + }, + ], + }), + ]; + const filePath = writeTmpJson("conversations.json", conversations); + + await ingestClaudeWeb(db, filePath, { db }); + + const thinking = db + .prepare("SELECT * FROM smriti_thinking WHERE session_id = ?") + .all("test-conv-001") as any[]; + expect(thinking).toHaveLength(1); + expect(thinking[0].thinking).toContain("step by step"); +}); + +// ============================================================================= +// Attachment Extraction +// ============================================================================= + +test("ingestClaudeWeb extracts attachments with content", async () => { + const conversations = [ + makeConversation({ + chat_messages: [ + { + uuid: "msg-att", + text: "Here is my config file", + content: [{ type: "text", text: "Here is my config file" }], + sender: "human", + created_at: "2025-06-01T10:00:00Z", + updated_at: "2025-06-01T10:00:00Z", + attachments: [ + { + file_name: "eslint.config.ts", + file_type: "txt", + file_size: 4531, + extracted_content: + "import { defineConfig } from 'eslint/config';", + }, + ], + files: [{ file_name: "eslint.config.ts" }], + }, + ], + }), + ]; + const filePath = writeTmpJson("conversations.json", conversations); + + await ingestClaudeWeb(db, filePath, { db }); + + const attachments = db + .prepare("SELECT * FROM smriti_attachments WHERE session_id = ?") + .all("test-conv-001") as any[]; + expect(attachments).toHaveLength(1); + expect(attachments[0].file_name).toBe("eslint.config.ts"); + expect(attachments[0].file_type).toBe("txt"); + expect(attachments[0].file_size).toBe(4531); + expect(attachments[0].content).toContain("defineConfig"); +}); + +// ============================================================================= +// Voice Note Extraction +// ============================================================================= + +test("ingestClaudeWeb extracts voice notes", async () => { + const conversations = [ + makeConversation({ + chat_messages: [ + { + uuid: "msg-voice", + text: "", + content: [ + { + type: "voice_note", + title: "Clarification Needed", + text: "\nNeed more context\n\nAbout a person?\n", + }, + ], + sender: "human", + created_at: "2025-06-01T10:00:00Z", + updated_at: "2025-06-01T10:00:00Z", + attachments: [], + files: [], + }, + ], + }), + ]; + const filePath = writeTmpJson("conversations.json", conversations); + + await ingestClaudeWeb(db, filePath, { db }); + + const voiceNotes = db + .prepare("SELECT * FROM smriti_voice_notes WHERE session_id = ?") + .all("test-conv-001") as any[]; + expect(voiceNotes).toHaveLength(1); + expect(voiceNotes[0].title).toBe("Clarification Needed"); + expect(voiceNotes[0].transcript).toContain("Need more context"); +}); + +// ============================================================================= +// Deduplication +// ============================================================================= + +test("ingestClaudeWeb skips already-ingested sessions by UUID", async () => { + const conversations = [makeConversation()]; + const filePath = writeTmpJson("conversations.json", conversations); + + // First ingest + const r1 = await ingestClaudeWeb(db, filePath, { db }); + expect(r1.sessionsIngested).toBe(1); + + // Second ingest — should skip + const existingSessionIds = getExistingSessionIds(db); + const r2 = await ingestClaudeWeb(db, filePath, { db, existingSessionIds }); + expect(r2.sessionsIngested).toBe(0); + expect(r2.skipped).toBe(1); +}); + +// ============================================================================= +// Edge Cases +// ============================================================================= + +test("ingestClaudeWeb skips empty conversations", async () => { + const conversations = [ + makeConversation({ chat_messages: [] }), + makeConversation({ uuid: "test-conv-002" }), + ]; + const filePath = writeTmpJson("conversations.json", conversations); + + const result = await ingestClaudeWeb(db, filePath, { db }); + expect(result.sessionsFound).toBe(2); + expect(result.sessionsIngested).toBe(1); + expect(result.skipped).toBe(1); +}); + +test("ingestClaudeWeb handles invalid JSON file", async () => { + const filePath = join(tmpDir, "bad.json"); + writeFileSync(filePath, "not json"); + + const result = await ingestClaudeWeb(db, filePath, { db }); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toContain("Failed to read"); +}); + +test("ingestClaudeWeb handles multiple content block types in one message", async () => { + const conversations = [ + makeConversation({ + chat_messages: [ + { + uuid: "msg-mixed", + text: "", + content: [ + { + type: "thinking", + thinking: "Let me think about this...", + }, + { type: "text", text: "Here is a calculator:" }, + { + type: "tool_use", + name: "artifacts", + id: null, + input: { + id: "calc", + type: "application/vnd.ant.code", + title: "Calculator", + command: "create", + content: "const add = (a, b) => a + b;", + language: "typescript", + }, + }, + ], + sender: "assistant", + created_at: "2025-06-01T10:00:05Z", + updated_at: "2025-06-01T10:00:05Z", + attachments: [], + files: [], + }, + ], + }), + ]; + const filePath = writeTmpJson("conversations.json", conversations); + + await ingestClaudeWeb(db, filePath, { db }); + + const artifacts = db + .prepare("SELECT * FROM smriti_artifacts WHERE session_id = ?") + .all("test-conv-001") as any[]; + const thinking = db + .prepare("SELECT * FROM smriti_thinking WHERE session_id = ?") + .all("test-conv-001") as any[]; + + expect(artifacts).toHaveLength(1); + expect(thinking).toHaveLength(1); + expect(artifacts[0].title).toBe("Calculator"); + expect(thinking[0].thinking).toContain("think about this"); +}); + +// ============================================================================= +// Memory Import +// ============================================================================= + +test("ingestClaudeWebMemories imports conversation and project memories", async () => { + const memories = [ + { + conversations_memory: "User is a frontend engineer based in Goa.", + project_memories: { + "project-1": "Working on HRMS platform.", + "project-2": "Building AI memory layer.", + }, + account_uuid: "test-account", + }, + ]; + const filePath = writeTmpJson("memories.json", memories); + + const result = await ingestClaudeWebMemories(db, filePath, { db }); + expect(result.sessionsIngested).toBe(1); + expect(result.messagesIngested).toBe(3); // 1 conversation + 2 project + + const messages = db + .prepare( + "SELECT * FROM memory_messages WHERE session_id = ? ORDER BY id" + ) + .all("claude-web-memory-test-account") as any[]; + expect(messages).toHaveLength(3); + expect(messages[0].content).toContain("frontend engineer"); + expect(messages[1].content).toContain("HRMS"); + expect(messages[2].content).toContain("AI memory"); +}); + +test("ingestClaudeWebMemories skips if already imported", async () => { + const memories = [ + { + conversations_memory: "Test memory", + project_memories: {}, + account_uuid: "test-account", + }, + ]; + const filePath = writeTmpJson("memories.json", memories); + + await ingestClaudeWebMemories(db, filePath, { db }); + + const existingSessionIds = getExistingSessionIds(db); + const r2 = await ingestClaudeWebMemories(db, filePath, { + db, + existingSessionIds, + }); + expect(r2.skipped).toBe(1); + expect(r2.sessionsIngested).toBe(0); +}); diff --git a/test/search.test.ts b/test/search.test.ts index b00ded9..18d2334 100644 --- a/test/search.test.ts +++ b/test/search.test.ts @@ -1,7 +1,18 @@ import { test, expect, beforeAll, afterAll } from "bun:test"; import { Database } from "bun:sqlite"; -import { initializeSmritiTables, seedDefaults, upsertSessionMeta, upsertProject, tagSession } from "../src/db"; -import { listSessions } from "../src/search/index"; +import { + initializeSmritiTables, + seedDefaults, + upsertSessionMeta, + upsertProject, + tagSession, + migrateFTSToV2, + insertArtifact, + insertThinking, + insertAttachment, + insertVoiceNote, +} from "../src/db"; +import { searchFiltered, listSessions } from "../src/search/index"; let db: Database; @@ -9,7 +20,7 @@ beforeAll(() => { db = new Database(":memory:"); db.exec("PRAGMA foreign_keys = ON"); - // Create QMD tables + // Create QMD tables with old 3-column FTS (pre-migration) db.exec(` CREATE TABLE memory_sessions ( id TEXT PRIMARY KEY, @@ -53,7 +64,8 @@ beforeAll(() => { INSERT INTO memory_sessions (id, title, created_at, updated_at) VALUES ('s1', 'Auth Setup', '${now}', '${now}'), ('s2', 'Database Design', '${now}', '${now}'), - ('s3', 'Bug Fix Login', '${now}', '${now}'); + ('s3', 'Bug Fix Login', '${now}', '${now}'), + ('s4', 'Claude Web Chat', '${now}', '${now}'); `); db.exec(` @@ -63,19 +75,37 @@ beforeAll(() => { ('s2', 'user', 'Design the database schema for users', 'h3', '${now}'), ('s2', 'assistant', 'Here is the schema with users and roles tables', 'h4', '${now}'), ('s3', 'user', 'The login page has an error when submitting', 'h5', '${now}'), - ('s3', 'assistant', 'Fixed the login bug by validating input', 'h6', '${now}'); + ('s3', 'assistant', 'Fixed the login bug by validating input', 'h6', '${now}'), + ('s4', 'user', 'Help me build a tax calculator', 'h7', '${now}'), + ('s4', 'assistant', 'Here is a tax calculator implementation', 'h8', '${now}'); `); + // Insert sidecar content for s4 (claude-web session) + // message_id 8 = the assistant response in s4 + insertArtifact(db, 8, "s4", "art-1", "code", "Tax Calculator", "create", "typescript", + "function calculateTax(income: number): number { return income * 0.3; }", now); + insertThinking(db, 8, "s4", + "Let me think step by step about the marginal tax bracket calculation", now); + insertAttachment(db, 7, "s4", "tax-rules.pdf", "application/pdf", 1024, + "Withholding tables for employers: standard deduction amounts and exemption thresholds", now); + insertVoiceNote(db, 7, "s4", "Tax requirements", + "I need a calculator that handles progressive tax brackets for US federal income tax", now); + upsertProject(db, "myapp", "/path/to/myapp"); upsertProject(db, "other", "/path/to/other"); + upsertProject(db, "taxapp", "/path/to/taxapp"); upsertSessionMeta(db, "s1", "claude-code", "myapp"); upsertSessionMeta(db, "s2", "claude-code", "myapp"); upsertSessionMeta(db, "s3", "codex", "other"); + upsertSessionMeta(db, "s4", "claude-web", "taxapp"); tagSession(db, "s1", "decision", 0.8, "auto"); tagSession(db, "s2", "architecture", 0.8, "auto"); tagSession(db, "s3", "bug", 0.8, "auto"); + + // Run migration to v2 FTS (adds sidecar columns and rebuilds index) + migrateFTSToV2(db); }); afterAll(() => { @@ -84,7 +114,7 @@ afterAll(() => { test("listSessions returns all active sessions", () => { const sessions = listSessions(db); - expect(sessions.length).toBe(3); + expect(sessions.length).toBe(4); }); test("listSessions filters by project", () => { @@ -117,3 +147,101 @@ test("listSessions respects limit", () => { const sessions = listSessions(db, { limit: 1 }); expect(sessions.length).toBe(1); }); + +// ============================================================================= +// FTS v2: Sidecar Content Search +// ============================================================================= + +test("FTS v2 migration adds sidecar columns", () => { + const cols = db.prepare("PRAGMA table_info(memory_fts)").all() as { name: string }[]; + const colNames = cols.map((c) => c.name); + expect(colNames).toContain("thinking"); + expect(colNames).toContain("artifacts"); + expect(colNames).toContain("attachments"); + expect(colNames).toContain("voice_notes"); +}); + +test("searchFiltered finds artifact content", () => { + const results = searchFiltered(db, "calculateTax"); + expect(results.length).toBeGreaterThan(0); + expect(results[0].session_id).toBe("s4"); +}); + +test("searchFiltered finds attachment content", () => { + const results = searchFiltered(db, "withholding employers"); + expect(results.length).toBeGreaterThan(0); + expect(results.some((r) => r.session_id === "s4")).toBe(true); +}); + +test("searchFiltered finds voice note content", () => { + const results = searchFiltered(db, "progressive tax"); + expect(results.length).toBeGreaterThan(0); + expect(results.some((r) => r.session_id === "s4")).toBe(true); +}); + +test("searchFiltered excludes thinking by default", () => { + // "marginal tax bracket" only appears in thinking blocks + const results = searchFiltered(db, "marginal bracket"); + expect(results.length).toBe(0); +}); + +test("searchFiltered includes thinking when opted in", () => { + const results = searchFiltered(db, "marginal bracket", { + includeThinking: true, + }); + expect(results.length).toBeGreaterThan(0); + expect(results[0].session_id).toBe("s4"); +}); + +test("searchFiltered respects --no-artifacts", () => { + // "calculateTax" only appears in artifact content + const results = searchFiltered(db, "calculateTax", { + includeArtifacts: false, + }); + expect(results.length).toBe(0); +}); + +test("searchFiltered respects --no-attachments", () => { + // "withholding" only appears in attachment content + const results = searchFiltered(db, "withholding", { + includeAttachments: false, + }); + expect(results.length).toBe(0); +}); + +test("searchFiltered respects --no-voice-notes", () => { + // "progressive" only appears in voice note transcript + const results = searchFiltered(db, "progressive", { + includeVoiceNotes: false, + }); + expect(results.length).toBe(0); +}); + +test("searchFiltered still finds regular content", () => { + const results = searchFiltered(db, "JWT tokens"); + expect(results.length).toBeGreaterThan(0); + expect(results[0].session_id).toBe("s1"); +}); + +test("searchFiltered combines sidecar + metadata filters", () => { + const results = searchFiltered(db, "calculateTax", { + agent: "claude-web", + }); + expect(results.length).toBeGreaterThan(0); + + const noResults = searchFiltered(db, "calculateTax", { + agent: "claude-code", + }); + expect(noResults.length).toBe(0); +}); + +test("migrateFTSToV2 is idempotent", () => { + // Running migration again should be a no-op + migrateFTSToV2(db); + const cols = db.prepare("PRAGMA table_info(memory_fts)").all() as { name: string }[]; + expect(cols.some((c) => c.name === "thinking")).toBe(true); + + // Data should still be intact + const results = searchFiltered(db, "calculateTax"); + expect(results.length).toBeGreaterThan(0); +}); From 0781b1ebe24ad6bd85d1664b037bfceddb49813d Mon Sep 17 00:00:00 2001 From: Baseline User Date: Tue, 3 Mar 2026 15:15:07 +0530 Subject: [PATCH 2/5] feat(db): add model-aware cost estimation and sidecar cleanup Add MODEL_PRICING map for Claude model families, estimateCost() for per-turn USD estimation, wire estimated_cost_usd into upsertSessionCosts, and add deleteSidecarRows() for force re-ingest cleanup. Co-Authored-By: Claude Opus 4.6 --- src/db.ts | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/db.ts b/src/db.ts index d468696..8533d56 100644 --- a/src/db.ts +++ b/src/db.ts @@ -719,6 +719,31 @@ export function insertError( ).run(messageId, sessionId, errorType, message, createdAt); } +// Per-million-token pricing by model family +const MODEL_PRICING: Record = { + "claude-opus-4": { input: 15.0, output: 75.0, cacheRead: 1.5 }, + "claude-sonnet-4": { input: 3.0, output: 15.0, cacheRead: 0.3 }, + "claude-haiku-4": { input: 0.8, output: 4.0, cacheRead: 0.08 }, +}; +const DEFAULT_PRICING = { input: 3.0, output: 15.0, cacheRead: 0.3 }; + +export function estimateCost( + model: string, + inputTokens: number, + outputTokens: number, + cacheTokens: number +): number { + // Match model family: "claude-sonnet-4-20250514" → "claude-sonnet-4" + const family = Object.keys(MODEL_PRICING).find((k) => model.startsWith(k)); + const pricing = family ? MODEL_PRICING[family] : DEFAULT_PRICING; + return ( + (inputTokens * pricing.input + + outputTokens * pricing.output + + cacheTokens * pricing.cacheRead) / + 1_000_000 + ); +} + export function upsertSessionCosts( db: Database, sessionId: string, @@ -728,16 +753,28 @@ export function upsertSessionCosts( cacheTokens: number, durationMs: number ): void { + const modelName = model || "unknown"; + const cost = estimateCost(modelName, inputTokens, outputTokens, cacheTokens); db.prepare( - `INSERT INTO smriti_session_costs(session_id, model, total_input_tokens, total_output_tokens, total_cache_tokens, turn_count, total_duration_ms) - VALUES(?, ?, ?, ?, ?, 1, ?) + `INSERT INTO smriti_session_costs(session_id, model, total_input_tokens, total_output_tokens, total_cache_tokens, estimated_cost_usd, turn_count, total_duration_ms) + VALUES(?, ?, ?, ?, ?, ?, 1, ?) ON CONFLICT(session_id, model) DO UPDATE SET total_input_tokens = total_input_tokens + excluded.total_input_tokens, total_output_tokens = total_output_tokens + excluded.total_output_tokens, total_cache_tokens = total_cache_tokens + excluded.total_cache_tokens, + estimated_cost_usd = estimated_cost_usd + excluded.estimated_cost_usd, turn_count = turn_count + 1, total_duration_ms = total_duration_ms + excluded.total_duration_ms` - ).run(sessionId, model || "unknown", inputTokens, outputTokens, cacheTokens, durationMs); + ).run(sessionId, modelName, inputTokens, outputTokens, cacheTokens, cost, durationMs); +} + +export function deleteSidecarRows(db: Database, sessionId: string): void { + db.prepare(`DELETE FROM smriti_tool_usage WHERE session_id = ?`).run(sessionId); + db.prepare(`DELETE FROM smriti_file_operations WHERE session_id = ?`).run(sessionId); + db.prepare(`DELETE FROM smriti_commands WHERE session_id = ?`).run(sessionId); + db.prepare(`DELETE FROM smriti_errors WHERE session_id = ?`).run(sessionId); + db.prepare(`DELETE FROM smriti_git_operations WHERE session_id = ?`).run(sessionId); + db.prepare(`DELETE FROM smriti_session_costs WHERE session_id = ?`).run(sessionId); } export function insertGitOperation( From e7fd347922b0e9506f6c32f9c27fd2d2fba89d95 Mon Sep 17 00:00:00 2001 From: Baseline User Date: Tue, 3 Mar 2026 15:15:31 +0530 Subject: [PATCH 3/5] feat(ingest): add --force flag for re-ingesting sessions Thread force option through all agent ingest paths. When enabled, deletes existing sidecar rows before re-processing to refresh tool usage, costs, errors, and file operations. Adds tool correlation map for linking tool calls to their results. Co-Authored-By: Claude Opus 4.6 --- src/ingest/blocks.ts | 22 +++--- src/ingest/index.ts | 22 +++++- src/ingest/store-gateway.ts | 42 ++++++++++- src/ingest/types.ts | 1 + test/blocks.test.ts | 92 ++++++++++++++++++++++++ test/store-gateway.test.ts | 140 +++++++++++++++++++++++++++++++++++- 6 files changed, 306 insertions(+), 13 deletions(-) diff --git a/src/ingest/blocks.ts b/src/ingest/blocks.ts index 974cfc9..d794847 100644 --- a/src/ingest/blocks.ts +++ b/src/ingest/blocks.ts @@ -37,6 +37,7 @@ export type RawContentBlock = { input?: Record; tool_use_id?: string; content?: string | RawContentBlock[]; + is_error?: boolean; source?: { type: string; media_type: string; data: string }; }; @@ -86,15 +87,14 @@ export function parseGitCommand(command: string): GitBlock | null { operation, }; - // Parse commit message + // Parse commit message — check heredoc first (greedy), then simple quoted if (operation === "commit") { - const msgMatch = command.match(/-m\s+["']([^"']+)["']/); - if (!msgMatch) { - // Try heredoc style: -m "$(cat <<'EOF'\n...\nEOF\n)" - const heredocMatch = command.match(/-m\s+"\$\(cat\s+<<'?EOF'?\n([\s\S]*?)\nEOF/); - if (heredocMatch) block.message = heredocMatch[1].trim(); + const heredocMatch = command.match(/-m\s+"\$\(cat\s+<<'?EOF'?\n([\s\S]*?)\nEOF/); + if (heredocMatch) { + block.message = heredocMatch[1].trim(); } else { - block.message = msgMatch[1]; + const msgMatch = command.match(/-m\s+["']([^"']+)["']/); + if (msgMatch) block.message = msgMatch[1]; } } @@ -313,12 +313,18 @@ export function parseToolResult( .join("\n"); } + // Parse exit code from Bash tool output (e.g. "Exit code: 1" or "Exit code 1") + let exitCode: number | undefined; + const exitMatch = output.match(/^Exit code:?\s*(\d+)/m); + if (exitMatch) exitCode = parseInt(exitMatch[1], 10); + return { type: "tool_result", toolId: toolUseId, success: !isError, output: truncate(output, STORAGE_LIMITS.commandOutput), error: isError ? truncate(output, STORAGE_LIMITS.commandOutput) : undefined, + exitCode, }; } @@ -360,7 +366,7 @@ export function rawBlockToMessageBlocks(raw: RawContentBlock): MessageBlock[] { parseToolResult( raw.tool_use_id || "", raw.content, - false + raw.is_error ?? false ), ]; diff --git a/src/ingest/index.ts b/src/ingest/index.ts index 21baba1..f8541b6 100644 --- a/src/ingest/index.ts +++ b/src/ingest/index.ts @@ -9,6 +9,8 @@ import type { Database } from "bun:sqlite"; import type { ParsedMessage, StructuredMessage } from "./types"; import { resolveSession } from "./session-resolver"; import { storeBlocks, storeCosts, storeMessage, storeSession } from "./store-gateway"; +import type { ToolCorrelationMap } from "./store-gateway"; +import { deleteSidecarRows } from "../db"; // ============================================================================= // Types — re-export from types.ts @@ -51,6 +53,7 @@ async function ingestParsedSessions( explicitProjectId?: string; explicitProjectPath?: string; incremental?: boolean; + force?: boolean; } = { existingSessionIds: new Set(), } @@ -66,7 +69,7 @@ async function ingestParsedSessions( const useSessionTxn = process.env.SMRITI_INGEST_SESSION_TXN !== "0"; for (const session of sessions) { - if (!options.incremental && options.existingSessionIds.has(session.sessionId)) { + if (!options.force && !options.incremental && options.existingSessionIds.has(session.sessionId)) { result.skipped++; continue; } @@ -96,8 +99,13 @@ async function ingestParsedSessions( continue; } + const correlationMap: ToolCorrelationMap = new Map(); if (useSessionTxn) db.exec("BEGIN IMMEDIATE"); try { + // Force mode: delete existing sidecar rows before re-processing + if (options.force && options.existingSessionIds.has(session.sessionId)) { + deleteSidecarRows(db, session.sessionId); + } for (const msg of messagesToIngest) { const content = isStructuredMessage(msg) ? msg.plainText || "(structured content)" : msg.content; const messageOptions = isStructuredMessage(msg) @@ -122,7 +130,8 @@ async function ingestParsedSessions( session.sessionId, resolved.projectId, msg.blocks, - msg.timestamp || new Date().toISOString() + msg.timestamp || new Date().toISOString(), + correlationMap ); if (msg.metadata.tokenUsage) { @@ -210,6 +219,7 @@ export async function ingest( title?: string; sessionId?: string; projectId?: string; + force?: boolean; } = {} ): Promise { const existingSessionIds = getExistingSessionIds(db); @@ -235,7 +245,8 @@ export async function ingest( existingSessionIds, onProgress: options.onProgress, explicitProjectId: options.projectId, - incremental: true, + incremental: !options.force, + force: options.force, }); } case "codex": { @@ -250,6 +261,7 @@ export async function ingest( existingSessionIds, onProgress: options.onProgress, explicitProjectId: options.projectId, + force: options.force, }); } case "cursor": { @@ -275,6 +287,7 @@ export async function ingest( existingSessionIds, onProgress: options.onProgress, explicitProjectId: options.projectId, + force: options.force, }); } case "cline": { @@ -290,6 +303,7 @@ export async function ingest( existingSessionIds, onProgress: options.onProgress, explicitProjectId: options.projectId, + force: options.force, }); } case "copilot": { @@ -308,6 +322,7 @@ export async function ingest( existingSessionIds, onProgress: options.onProgress, explicitProjectId: options.projectId, + force: options.force, }); } case "file": @@ -338,6 +353,7 @@ export async function ingest( onProgress: options.onProgress, explicitProjectId: options.projectId, explicitProjectPath: options.projectPath, + force: options.force, } ); return result; diff --git a/src/ingest/store-gateway.ts b/src/ingest/store-gateway.ts index 195199d..8caa735 100644 --- a/src/ingest/store-gateway.ts +++ b/src/ingest/store-gateway.ts @@ -18,6 +18,9 @@ export type StoreMessageResult = { error?: string; }; +export type ToolCorrelation = { messageId: number; toolName: string }; +export type ToolCorrelationMap = Map; + export async function storeMessage( db: Database, sessionId: string, @@ -39,7 +42,8 @@ export function storeBlocks( sessionId: string, projectId: string | null, blocks: MessageBlock[], - createdAt: string + createdAt: string, + correlationMap?: ToolCorrelationMap ): void { for (const block of blocks) { switch (block.type) { @@ -54,6 +58,42 @@ export function storeBlocks( null, createdAt ); + // Register in correlation map for later result matching + if (correlationMap && block.toolId) { + correlationMap.set(block.toolId, { messageId, toolName: block.toolName }); + } + break; + case "tool_result": + if (correlationMap && block.toolId) { + const corr = correlationMap.get(block.toolId); + if (corr) { + // Update tool_usage success from actual result + db.prepare( + `UPDATE smriti_tool_usage SET success = ? WHERE message_id = ? AND session_id = ? AND tool_name = ?` + ).run(block.success ? 1 : 0, corr.messageId, sessionId, corr.toolName); + + // Backfill exit code for Bash commands + if (corr.toolName === "Bash" && block.exitCode !== undefined) { + db.prepare( + `UPDATE smriti_commands SET exit_code = ? WHERE message_id = ? AND session_id = ?` + ).run(block.exitCode, corr.messageId, sessionId); + } + + // Insert error row for failed tools + if (!block.success) { + insertError( + db, + corr.messageId, + sessionId, + "tool_failure", + `${corr.toolName}: ${block.error || block.output}`.slice(0, 2000), + createdAt + ); + } + + correlationMap.delete(block.toolId); + } + } break; case "file_op": insertFileOperation( diff --git a/src/ingest/types.ts b/src/ingest/types.ts index 233e5db..3506443 100644 --- a/src/ingest/types.ts +++ b/src/ingest/types.ts @@ -47,6 +47,7 @@ export type ToolResultBlock = { success: boolean; output: string; error?: string; + exitCode?: number; durationMs?: number; }; diff --git a/test/blocks.test.ts b/test/blocks.test.ts index 62ffc93..c2a14ff 100644 --- a/test/blocks.test.ts +++ b/test/blocks.test.ts @@ -449,3 +449,95 @@ test("systemEntryToBlock maps pr-link", () => { expect(block.eventType).toBe("pr_link"); expect(block.data.prNumber).toBe(42); }); + +// ============================================================================= +// is_error propagation +// ============================================================================= + +test("extractBlocks passes is_error from raw tool_result", () => { + const blocks = extractBlocks([ + { + type: "tool_result", + tool_use_id: "tool_err", + content: "Permission denied", + is_error: true, + }, + ]); + expect(blocks.length).toBe(1); + expect(blocks[0].type).toBe("tool_result"); + if (blocks[0].type === "tool_result") { + expect(blocks[0].success).toBe(false); + expect(blocks[0].error).toBe("Permission denied"); + } +}); + +test("extractBlocks defaults is_error to false when not present", () => { + const blocks = extractBlocks([ + { + type: "tool_result", + tool_use_id: "tool_ok", + content: "Success", + }, + ]); + expect(blocks.length).toBe(1); + if (blocks[0].type === "tool_result") { + expect(blocks[0].success).toBe(true); + expect(blocks[0].error).toBeUndefined(); + } +}); + +// ============================================================================= +// HEREDOC commit message parsing +// ============================================================================= + +test("parseGitCommand extracts HEREDOC commit message", () => { + const command = `git commit -m "\$(cat <<'EOF' +Fix the authentication bug + +This resolves the login issue by updating the token validation. + +Co-Authored-By: Claude +EOF +)"`; + const block = parseGitCommand(command); + expect(block).not.toBeNull(); + expect(block!.operation).toBe("commit"); + expect(block!.message).toContain("Fix the authentication bug"); + expect(block!.message).toContain("Co-Authored-By"); +}); + +test("parseGitCommand prefers HEREDOC over simple quote match", () => { + // The -m "$(cat <<'EOF' ... pattern should NOT match as simple quoted + const command = `git commit -m "\$(cat <<'EOF' +Real message here +EOF +)"`; + const block = parseGitCommand(command); + expect(block).not.toBeNull(); + expect(block!.message).toBe("Real message here"); + expect(block!.message).not.toContain("$(cat"); +}); + +// ============================================================================= +// Exit code parsing +// ============================================================================= + +test("parseToolResult extracts exit code from output", () => { + const result = parseToolResult("t1", "some output\nExit code: 1\nmore output"); + expect(result.exitCode).toBe(1); +}); + +test("parseToolResult extracts exit code without colon", () => { + const result = parseToolResult("t1", "Exit code 127"); + expect(result.exitCode).toBe(127); +}); + +test("parseToolResult returns undefined exitCode when not present", () => { + const result = parseToolResult("t1", "normal output without exit code"); + expect(result.exitCode).toBeUndefined(); +}); + +test("parseToolResult extracts exit code 0", () => { + const result = parseToolResult("t1", "Exit code: 0"); + expect(result.exitCode).toBe(0); +}); diff --git a/test/store-gateway.test.ts b/test/store-gateway.test.ts index 088589c..1223550 100644 --- a/test/store-gateway.test.ts +++ b/test/store-gateway.test.ts @@ -1,8 +1,9 @@ import { test, expect, beforeEach, afterEach } from "bun:test"; import { Database } from "bun:sqlite"; import { initializeMemoryTables } from "../src/qmd"; -import { initializeSmritiTables, seedDefaults } from "../src/db"; +import { initializeSmritiTables, seedDefaults, estimateCost } from "../src/db"; import { storeMessage, storeBlocks, storeSession, storeCosts } from "../src/ingest/store-gateway"; +import type { ToolCorrelationMap } from "../src/ingest/store-gateway"; import type { MessageBlock } from "../src/ingest/types"; let db: Database; @@ -121,3 +122,140 @@ test("storeCosts accumulates into smriti_session_costs", () => { expect(row!.turn_count).toBe(2); expect(row!.total_duration_ms).toBe(1500); }); + +// ============================================================================= +// Correlation map — tool_result updates tool_call success +// ============================================================================= + +test("correlation map updates tool_usage success on tool_result", () => { + const now = new Date().toISOString(); + const sessionId = "s-corr"; + db.prepare(`INSERT INTO memory_sessions (id, title, created_at, updated_at) VALUES (?, ?, ?, ?)`).run( + sessionId, "corr session", now, now + ); + db.prepare( + `INSERT INTO memory_messages (id, session_id, role, content, hash, created_at) VALUES (?, ?, ?, ?, ?, ?)` + ).run(200, sessionId, "assistant", "call payload", "h-corr-call", now); + db.prepare( + `INSERT INTO memory_messages (id, session_id, role, content, hash, created_at) VALUES (?, ?, ?, ?, ?, ?)` + ).run(201, sessionId, "user", "result payload", "h-corr-result", now); + + const correlationMap: ToolCorrelationMap = new Map(); + + // Store tool_call blocks (assistant turn) + const callBlocks: MessageBlock[] = [ + { type: "tool_call", toolId: "tc-1", toolName: "Bash", input: { command: "ls" }, description: "list files" }, + { type: "command", command: "ls", isGit: false }, + ]; + storeBlocks(db, 200, sessionId, null, callBlocks, now, correlationMap); + + // Verify tool_usage initially has success=1 + const before = db.prepare( + `SELECT success FROM smriti_tool_usage WHERE message_id = 200 AND session_id = ?` + ).get(sessionId) as { success: number }; + expect(before.success).toBe(1); + + // Store tool_result blocks (user turn) — mark as failed + const resultBlocks: MessageBlock[] = [ + { type: "tool_result", toolId: "tc-1", success: false, output: "ls: error", error: "ls: error" }, + ]; + storeBlocks(db, 201, sessionId, null, resultBlocks, now, correlationMap); + + // Verify tool_usage success was updated to 0 + const after = db.prepare( + `SELECT success FROM smriti_tool_usage WHERE message_id = 200 AND session_id = ?` + ).get(sessionId) as { success: number }; + expect(after.success).toBe(0); + + // Verify error row was inserted + const errRow = db.prepare( + `SELECT error_type, message FROM smriti_errors WHERE session_id = ?` + ).get(sessionId) as { error_type: string; message: string } | null; + expect(errRow).not.toBeNull(); + expect(errRow!.error_type).toBe("tool_failure"); + expect(errRow!.message).toContain("Bash"); +}); + +test("correlation map backfills exit code on Bash commands", () => { + const now = new Date().toISOString(); + const sessionId = "s-exit"; + db.prepare(`INSERT INTO memory_sessions (id, title, created_at, updated_at) VALUES (?, ?, ?, ?)`).run( + sessionId, "exit session", now, now + ); + db.prepare( + `INSERT INTO memory_messages (id, session_id, role, content, hash, created_at) VALUES (?, ?, ?, ?, ?, ?)` + ).run(300, sessionId, "assistant", "bash call", "h-exit-call", now); + db.prepare( + `INSERT INTO memory_messages (id, session_id, role, content, hash, created_at) VALUES (?, ?, ?, ?, ?, ?)` + ).run(301, sessionId, "user", "bash result", "h-exit-result", now); + + const correlationMap: ToolCorrelationMap = new Map(); + + // Store tool_call + command + const callBlocks: MessageBlock[] = [ + { type: "tool_call", toolId: "tc-bash", toolName: "Bash", input: { command: "bun test" } }, + { type: "command", command: "bun test", isGit: false }, + ]; + storeBlocks(db, 300, sessionId, null, callBlocks, now, correlationMap); + + // Initially exit_code is NULL + const before = db.prepare( + `SELECT exit_code FROM smriti_commands WHERE message_id = 300 AND session_id = ?` + ).get(sessionId) as { exit_code: number | null }; + expect(before.exit_code).toBeNull(); + + // Store tool_result with exit code + const resultBlocks: MessageBlock[] = [ + { type: "tool_result", toolId: "tc-bash", success: false, output: "Exit code: 1", exitCode: 1 }, + ]; + storeBlocks(db, 301, sessionId, null, resultBlocks, now, correlationMap); + + // Exit code should be backfilled + const after = db.prepare( + `SELECT exit_code FROM smriti_commands WHERE message_id = 300 AND session_id = ?` + ).get(sessionId) as { exit_code: number | null }; + expect(after.exit_code).toBe(1); +}); + +// ============================================================================= +// Cost estimation +// ============================================================================= + +test("estimateCost calculates opus pricing", () => { + const cost = estimateCost("claude-opus-4-20250514", 1_000_000, 100_000, 500_000); + // 1M * 15/1M + 100K * 75/1M + 500K * 1.5/1M = 15 + 7.5 + 0.75 = 23.25 + expect(cost).toBeCloseTo(23.25, 2); +}); + +test("estimateCost calculates sonnet pricing", () => { + const cost = estimateCost("claude-sonnet-4-20250514", 1_000_000, 100_000, 0); + // 1M * 3/1M + 100K * 15/1M = 3 + 1.5 = 4.5 + expect(cost).toBeCloseTo(4.5, 2); +}); + +test("estimateCost calculates haiku pricing", () => { + const cost = estimateCost("claude-haiku-4.5-20251001", 1_000_000, 100_000, 0); + // 1M * 0.8/1M + 100K * 4/1M = 0.8 + 0.4 = 1.2 + expect(cost).toBeCloseTo(1.2, 2); +}); + +test("estimateCost falls back to default pricing for unknown models", () => { + const cost = estimateCost("unknown-model", 1_000_000, 100_000, 0); + // default = sonnet pricing: 1M * 3/1M + 100K * 15/1M = 3 + 1.5 = 4.5 + expect(cost).toBeCloseTo(4.5, 2); +}); + +test("storeCosts accumulates estimated_cost_usd", () => { + storeCosts(db, "s-cost-usd", "claude-sonnet-4-20250514", 100_000, 10_000, 0, 1000); + storeCosts(db, "s-cost-usd", "claude-sonnet-4-20250514", 100_000, 10_000, 0, 500); + + const row = db + .prepare( + `SELECT estimated_cost_usd FROM smriti_session_costs WHERE session_id = ? AND model = ?` + ) + .get("s-cost-usd", "claude-sonnet-4-20250514") as { estimated_cost_usd: number } | null; + + expect(row).not.toBeNull(); + // Each call: 100K * 3/1M + 10K * 15/1M = 0.3 + 0.15 = 0.45, x2 = 0.9 + expect(row!.estimated_cost_usd).toBeCloseTo(0.9, 4); +}); From f35b92e1faa4e5546e7e2cbb5e3c349c0d468ac8 Mon Sep 17 00:00:00 2001 From: Baseline User Date: Tue, 3 Mar 2026 15:16:03 +0530 Subject: [PATCH 4/5] feat(insights): add cost & usage analytics module with CLI commands New src/insights/ module with query functions for overview dashboard, session deep-dives, project analysis, cost breakdowns, error analysis, and tool reliability metrics. Wire CLI subcommands: smriti insights [session|project|costs|errors|tools]. Co-Authored-By: Claude Opus 4.6 --- src/index.ts | 85 ++++++++ src/insights/format.ts | 345 +++++++++++++++++++++++++++++ src/insights/index.ts | 478 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 908 insertions(+) create mode 100644 src/insights/format.ts create mode 100644 src/insights/index.ts diff --git a/src/index.ts b/src/index.ts index 57e9f8a..b520131 100644 --- a/src/index.ts +++ b/src/index.ts @@ -23,6 +23,23 @@ import { recentSessionIds, formatCompare, } from "./context"; +import { + getOverview, + getSessionInsights, + getProjectInsights, + getCostBreakdown, + getErrorAnalysis, + getToolStats, + getRecommendations, +} from "./insights/index"; +import { + formatOverview, + formatSessionInsights, + formatProjectInsights, + formatCostBreakdown, + formatErrorAnalysis, + formatToolStats, +} from "./insights/format"; import { formatSessionList, formatSearchResults, @@ -91,6 +108,7 @@ Commands: show Show session messages status Memory statistics projects List projects + insights [subcommand] Cost & usage analysis dashboard embed Embed new messages for vector search upgrade Update smriti to the latest version help Show this help @@ -109,6 +127,7 @@ Ingest options: smriti ingest cursor --project-path smriti ingest file [--format chat|jsonl] [--title ] smriti ingest all Ingest from all known agents (claude, codex, cline, copilot) + --force Re-ingest sessions (delete sidecar data, re-extract) Recall options: --synthesize Synthesize results via Ollama @@ -128,6 +147,14 @@ Share options: --segmented Use 3-stage segmentation pipeline (beta) --min-relevance Relevance threshold for segmented mode (default: 6) +Insights options: + smriti insights Full dashboard + smriti insights session Session deep dive + smriti insights project Project analysis + smriti insights costs [--days N] Cost breakdown + smriti insights errors [--project ] Error analysis + smriti insights tools [--project ] Tool reliability + Examples: smriti ingest claude smriti ingest copilot @@ -137,6 +164,7 @@ Examples: smriti list --category decision --project myapp smriti share --category decision smriti sync + smriti insights --json smriti upgrade `; @@ -191,6 +219,7 @@ async function main() { title: getArg(args, "--title"), sessionId: getArg(args, "--session"), projectId: getArg(args, "--project"), + force: hasFlag(args, "--force"), }); console.log(formatIngestResult(result)); @@ -692,6 +721,62 @@ async function main() { } } + // ===================================================================== + // INSIGHTS + // ===================================================================== + case "insights": { + const sub = args[1]; + const useJson = hasFlag(args, "--json"); + + if (sub === "session") { + const id = args[2]; + if (!id) { + console.error("Usage: smriti insights session "); + process.exit(1); + } + const report = getSessionInsights(db, id); + if (!report) { + console.error(`Session not found: ${id}`); + process.exit(1); + } + console.log(useJson ? json(report) : formatSessionInsights(report)); + } else if (sub === "project") { + const id = args[2]; + if (!id) { + console.error("Usage: smriti insights project "); + process.exit(1); + } + const report = getProjectInsights(db, id); + if (!report) { + console.error(`Project not found or has no data: ${id}`); + process.exit(1); + } + console.log(useJson ? json(report) : formatProjectInsights(report)); + } else if (sub === "costs") { + const days = Number(getArg(args, "--days")) || undefined; + const report = getCostBreakdown(db, { days }); + console.log(useJson ? json(report) : formatCostBreakdown(report)); + } else if (sub === "errors") { + const project = getArg(args, "--project"); + const report = getErrorAnalysis(db, { project }); + console.log(useJson ? json(report) : formatErrorAnalysis(report)); + } else if (sub === "tools") { + const project = getArg(args, "--project"); + const report = getToolStats(db, { project }); + console.log(useJson ? json(report) : formatToolStats(report)); + } else { + // Default: full dashboard + const overview = getOverview(db); + const recs = getRecommendations(db); + if (useJson) { + console.log(json({ ...overview, recommendations: recs })); + } else { + console.log(formatOverview(overview, recs)); + } + } + break; + } + // ===================================================================== // UNKNOWN // ===================================================================== diff --git a/src/insights/format.ts b/src/insights/format.ts new file mode 100644 index 0000000..be0f8a2 --- /dev/null +++ b/src/insights/format.ts @@ -0,0 +1,345 @@ +/** + * insights/format.ts - CLI formatters for insights reports + */ + +import { table } from "../format"; +import type { + OverviewReport, + SessionReport, + ProjectReport, + CostReport, + ErrorReport, + ToolReport, + Recommendation, +} from "./index"; + +// ============================================================================= +// Helpers +// ============================================================================= + +function dollar(n: number): string { + return `$${n.toFixed(2)}`; +} + +function pct(n: number): string { + return `${(n * 100).toFixed(1)}%`; +} + +function num(n: number): string { + return n.toLocaleString(); +} + +// ============================================================================= +// Overview +// ============================================================================= + +export function formatOverview(report: OverviewReport, recs: Recommendation[]): string { + const lines: string[] = []; + + lines.push("## Smriti Insights Dashboard\n"); + lines.push(`Sessions: ${num(report.totalSessions)} | Messages: ${num(report.totalMessages)} | Total Cost: ${dollar(report.totalCost)}\n`); + + // Cost by model + if (report.costByModel.length > 0) { + lines.push("### Cost by Model\n"); + lines.push(table( + ["Model", "Cost", "Turns", "% of Total"], + report.costByModel.map((r) => [ + r.model, + dollar(r.cost), + num(r.turns), + pct(report.totalCost > 0 ? r.cost / report.totalCost : 0), + ]), + )); + lines.push(""); + } + + // Top projects + if (report.topProjects.length > 0) { + lines.push("### Top Projects by Spend\n"); + lines.push(table( + ["Project", "Cost", "Sessions"], + report.topProjects.map((r) => [r.project, dollar(r.cost), String(r.sessions)]), + )); + lines.push(""); + } + + // Failing tools + if (report.topFailingTools.length > 0) { + lines.push("### Tool Failures\n"); + lines.push(table( + ["Tool", "Failures", "Total", "Fail Rate"], + report.topFailingTools.map((r) => [r.tool, String(r.failures), String(r.total), pct(r.rate)]), + )); + lines.push(""); + } + + // Error hotspots + if (report.errorHotspots.length > 0) { + lines.push("### Error Hotspots\n"); + lines.push(table( + ["Session", "Title", "Errors"], + report.errorHotspots.map((r) => [r.sessionId.slice(0, 8), r.title || "-", String(r.errorCount)]), + )); + lines.push(""); + } + + // Recommendations + if (recs.length > 0) { + lines.push("### Recommendations\n"); + for (const rec of recs) { + const icon = rec.severity === "high" ? "[!]" : rec.severity === "medium" ? "[~]" : "[.]"; + lines.push(`${icon} ${rec.message}`); + lines.push(` ${rec.detail}`); + } + lines.push(""); + } + + return lines.join("\n"); +} + +// ============================================================================= +// Session +// ============================================================================= + +export function formatSessionInsights(report: SessionReport): string { + const lines: string[] = []; + + lines.push(`## Session: ${report.title}\n`); + lines.push(`ID: ${report.sessionId}`); + lines.push(`Created: ${report.createdAt}`); + lines.push(`Total Cost: ${dollar(report.totalCost)}\n`); + + // Cost by model + if (report.costByModel.length > 0) { + lines.push("### Cost by Model\n"); + lines.push(table( + ["Model", "Cost", "Input Tok", "Output Tok", "Cache Tok", "Turns"], + report.costByModel.map((r) => [ + r.model, + dollar(r.cost), + num(r.inputTokens), + num(r.outputTokens), + num(r.cacheTokens), + String(r.turns), + ]), + )); + lines.push(""); + } + + // Tools + if (report.tools.length > 0) { + lines.push("### Tool Usage\n"); + lines.push(table( + ["Tool", "Calls", "OK", "Fail"], + report.tools.map((r) => [r.name, String(r.count), String(r.successes), String(r.failures)]), + )); + lines.push(""); + } + + // Errors + if (report.errors.length > 0) { + lines.push("### Errors\n"); + lines.push(table( + ["Type", "Message", "Count"], + report.errors.map((r) => [r.type, (r.message || "").slice(0, 60), String(r.count)]), + )); + lines.push(""); + } + + // File operations + if (report.fileOps.length > 0) { + lines.push("### File Operations\n"); + lines.push(table( + ["Path", "Reads", "Edits", "Writes"], + report.fileOps.slice(0, 15).map((r) => [r.path, String(r.reads), String(r.edits), String(r.writes)]), + )); + lines.push(""); + } + + // Git operations + if (report.gitOps.length > 0) { + lines.push("### Git Operations\n"); + lines.push(table( + ["Operation", "Branch", "PR"], + report.gitOps.map((r) => [r.operation, r.branch || "-", r.prUrl || "-"]), + )); + lines.push(""); + } + + // Commands + if (report.commands.length > 0) { + lines.push("### Commands\n"); + lines.push(table( + ["Command", "Exit", "Git?"], + report.commands.slice(0, 20).map((r) => [ + r.command.slice(0, 60), + r.exitCode != null ? String(r.exitCode) : "-", + r.isGit ? "yes" : "", + ]), + )); + lines.push(""); + } + + return lines.join("\n"); +} + +// ============================================================================= +// Project +// ============================================================================= + +export function formatProjectInsights(report: ProjectReport): string { + const lines: string[] = []; + + lines.push(`## Project: ${report.projectId}\n`); + lines.push(`Sessions: ${report.sessionCount} | Total Cost: ${dollar(report.totalCost)} | Avg/Session: ${dollar(report.avgCostPerSession)}`); + lines.push(`Error Rate: ${report.errorRate.toFixed(1)} errors/session | Build/Test Fail Rate: ${pct(report.buildTestFailRate)}\n`); + + if (report.toolDistribution.length > 0) { + lines.push("### Tool Distribution\n"); + lines.push(table( + ["Tool", "Calls"], + report.toolDistribution.map((r) => [r.tool, String(r.count)]), + )); + lines.push(""); + } + + if (report.mostAccessedFiles.length > 0) { + lines.push("### Most Read Files (knowledge bottlenecks)\n"); + lines.push(table( + ["File", "Reads"], + report.mostAccessedFiles.map((r) => [r.path, String(r.count)]), + )); + lines.push(""); + } + + if (report.mostEditedFiles.length > 0) { + lines.push("### Most Edited Files (churn hotspots)\n"); + lines.push(table( + ["File", "Edits"], + report.mostEditedFiles.map((r) => [r.path, String(r.count)]), + )); + lines.push(""); + } + + return lines.join("\n"); +} + +// ============================================================================= +// Costs +// ============================================================================= + +export function formatCostBreakdown(report: CostReport): string { + const lines: string[] = []; + + lines.push(`## Cost Breakdown\n`); + lines.push(`Total: ${dollar(report.totalCost)}\n`); + + if (report.byModel.length > 0) { + lines.push("### By Model\n"); + lines.push(table( + ["Model", "Cost", "Input Tok", "Output Tok", "Cache Tok", "Turns"], + report.byModel.map((r) => [ + r.model, + dollar(r.cost), + num(r.inputTokens), + num(r.outputTokens), + num(r.cacheTokens), + String(r.turns), + ]), + )); + lines.push(""); + } + + if (report.byProject.length > 0) { + lines.push("### By Project\n"); + lines.push(table( + ["Project", "Cost", "Sessions"], + report.byProject.map((r) => [r.project, dollar(r.cost), String(r.sessions)]), + )); + lines.push(""); + } + + if (report.byDay.length > 0) { + lines.push("### By Day\n"); + lines.push(table( + ["Date", "Cost", "Sessions"], + report.byDay.map((r) => [r.date, dollar(r.cost), String(r.sessions)]), + )); + lines.push(""); + } + + return lines.join("\n"); +} + +// ============================================================================= +// Errors +// ============================================================================= + +export function formatErrorAnalysis(report: ErrorReport): string { + const lines: string[] = []; + + lines.push(`## Error Analysis\n`); + lines.push(`Total Errors: ${num(report.totalErrors)}\n`); + + if (report.byType.length > 0) { + lines.push("### By Type\n"); + lines.push(table( + ["Type", "Count"], + report.byType.map((r) => [r.type, String(r.count)]), + )); + lines.push(""); + } + + if (report.bySession.length > 0) { + lines.push("### By Session\n"); + lines.push(table( + ["Session", "Title", "Errors"], + report.bySession.map((r) => [r.sessionId.slice(0, 8), r.title || "-", String(r.count)]), + )); + lines.push(""); + } + + if (report.recentErrors.length > 0) { + lines.push("### Recent Errors\n"); + lines.push(table( + ["Type", "Message", "When"], + report.recentErrors.slice(0, 10).map((r) => [ + r.type, + (r.message || "").slice(0, 50), + r.createdAt?.slice(0, 16) || "-", + ]), + )); + lines.push(""); + } + + return lines.join("\n"); +} + +// ============================================================================= +// Tools +// ============================================================================= + +export function formatToolStats(report: ToolReport): string { + const lines: string[] = []; + + lines.push(`## Tool Reliability\n`); + lines.push(`Total Calls: ${num(report.totalCalls)}\n`); + + if (report.tools.length > 0) { + lines.push(table( + ["Tool", "Calls", "OK", "Fail", "Fail%", "Avg ms"], + report.tools.map((r) => [ + r.name, + String(r.count), + String(r.successes), + String(r.failures), + pct(r.rate), + r.avgDurationMs != null ? Math.round(r.avgDurationMs).toString() : "-", + ]), + )); + lines.push(""); + } + + return lines.join("\n"); +} diff --git a/src/insights/index.ts b/src/insights/index.ts new file mode 100644 index 0000000..61d513b --- /dev/null +++ b/src/insights/index.ts @@ -0,0 +1,478 @@ +/** + * insights/index.ts - Query functions for sidecar data analysis + * + * Surfaces patterns from tool usage, costs, errors, file operations, + * and git operations to help optimize AI-assisted development workflows. + */ + +import type { Database } from "bun:sqlite"; + +// ============================================================================= +// Types +// ============================================================================= + +export interface OverviewReport { + totalSessions: number; + totalMessages: number; + totalCost: number; + costByModel: Array<{ model: string; cost: number; turns: number }>; + topProjects: Array<{ project: string; cost: number; sessions: number }>; + topFailingTools: Array<{ tool: string; failures: number; total: number; rate: number }>; + errorHotspots: Array<{ sessionId: string; title: string; errorCount: number }>; +} + +export interface SessionReport { + sessionId: string; + title: string; + createdAt: string; + costByModel: Array<{ model: string; cost: number; inputTokens: number; outputTokens: number; cacheTokens: number; turns: number }>; + totalCost: number; + tools: Array<{ name: string; count: number; successes: number; failures: number }>; + errors: Array<{ type: string; message: string; count: number }>; + commands: Array<{ command: string; exitCode: number | null; isGit: boolean }>; + fileOps: Array<{ path: string; reads: number; edits: number; writes: number }>; + gitOps: Array<{ operation: string; branch: string | null; prUrl: string | null; details: string | null }>; +} + +export interface ProjectReport { + projectId: string; + sessionCount: number; + totalCost: number; + avgCostPerSession: number; + errorRate: number; + toolDistribution: Array<{ tool: string; count: number }>; + mostAccessedFiles: Array<{ path: string; count: number }>; + mostEditedFiles: Array<{ path: string; count: number }>; + buildTestFailRate: number; +} + +export interface CostReport { + totalCost: number; + byModel: Array<{ model: string; cost: number; inputTokens: number; outputTokens: number; cacheTokens: number; turns: number }>; + byProject: Array<{ project: string; cost: number; sessions: number }>; + byDay: Array<{ date: string; cost: number; sessions: number }>; +} + +export interface ErrorReport { + totalErrors: number; + byType: Array<{ type: string; count: number }>; + bySession: Array<{ sessionId: string; title: string; count: number }>; + recentErrors: Array<{ type: string; message: string; sessionId: string; createdAt: string }>; +} + +export interface ToolReport { + totalCalls: number; + tools: Array<{ name: string; count: number; successes: number; failures: number; rate: number; avgDurationMs: number | null }>; +} + +export interface Recommendation { + severity: "high" | "medium" | "low"; + message: string; + detail: string; +} + +// ============================================================================= +// Dashboard Overview +// ============================================================================= + +export function getOverview(db: Database): OverviewReport { + const totalSessions = (db.prepare(`SELECT COUNT(*) as n FROM memory_sessions`).get() as any).n; + const totalMessages = (db.prepare(`SELECT COUNT(*) as n FROM memory_messages`).get() as any).n; + const totalCost = (db.prepare(`SELECT COALESCE(SUM(estimated_cost_usd), 0) as n FROM smriti_session_costs`).get() as any).n; + + const costByModel = db.prepare(` + SELECT model, SUM(estimated_cost_usd) as cost, SUM(turn_count) as turns + FROM smriti_session_costs + GROUP BY model + ORDER BY cost DESC + `).all() as Array<{ model: string; cost: number; turns: number }>; + + const topProjects = db.prepare(` + SELECT sm.project_id as project, SUM(sc.estimated_cost_usd) as cost, COUNT(DISTINCT sc.session_id) as sessions + FROM smriti_session_costs sc + JOIN smriti_session_meta sm ON sc.session_id = sm.session_id + WHERE sm.project_id IS NOT NULL + GROUP BY sm.project_id + ORDER BY cost DESC + LIMIT 5 + `).all() as Array<{ project: string; cost: number; sessions: number }>; + + const topFailingTools = db.prepare(` + SELECT tool_name as tool, + SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failures, + COUNT(*) as total, + CAST(SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) AS REAL) / COUNT(*) as rate + FROM smriti_tool_usage + GROUP BY tool_name + HAVING failures > 0 + ORDER BY failures DESC + LIMIT 5 + `).all() as Array<{ tool: string; failures: number; total: number; rate: number }>; + + const errorHotspots = db.prepare(` + SELECT e.session_id as sessionId, COALESCE(ms.title, e.session_id) as title, COUNT(*) as errorCount + FROM smriti_errors e + LEFT JOIN memory_sessions ms ON e.session_id = ms.id + GROUP BY e.session_id + ORDER BY errorCount DESC + LIMIT 5 + `).all() as Array<{ sessionId: string; title: string; errorCount: number }>; + + return { totalSessions, totalMessages, totalCost, costByModel, topProjects, topFailingTools, errorHotspots }; +} + +// ============================================================================= +// Session Deep Dive +// ============================================================================= + +export function getSessionInsights(db: Database, sessionId: string): SessionReport | null { + const session = db.prepare(`SELECT id, title, created_at FROM memory_sessions WHERE id = ?`).get(sessionId) as any; + if (!session) return null; + + const costByModel = db.prepare(` + SELECT model, estimated_cost_usd as cost, total_input_tokens as inputTokens, + total_output_tokens as outputTokens, total_cache_tokens as cacheTokens, turn_count as turns + FROM smriti_session_costs WHERE session_id = ? + ORDER BY cost DESC + `).all(sessionId) as SessionReport["costByModel"]; + + const totalCost = costByModel.reduce((sum, r) => sum + r.cost, 0); + + const tools = db.prepare(` + SELECT tool_name as name, COUNT(*) as count, + SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successes, + SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failures + FROM smriti_tool_usage WHERE session_id = ? + GROUP BY tool_name ORDER BY count DESC + `).all(sessionId) as SessionReport["tools"]; + + const errors = db.prepare(` + SELECT error_type as type, message, COUNT(*) as count + FROM smriti_errors WHERE session_id = ? + GROUP BY error_type, message ORDER BY count DESC + `).all(sessionId) as SessionReport["errors"]; + + const commands = db.prepare(` + SELECT command, exit_code as exitCode, is_git as isGit + FROM smriti_commands WHERE session_id = ? + ORDER BY created_at + `).all(sessionId) as SessionReport["commands"]; + + const fileOps = db.prepare(` + SELECT file_path as path, + SUM(CASE WHEN operation = 'read' THEN 1 ELSE 0 END) as reads, + SUM(CASE WHEN operation = 'edit' THEN 1 ELSE 0 END) as edits, + SUM(CASE WHEN operation = 'write' THEN 1 ELSE 0 END) as writes + FROM smriti_file_operations WHERE session_id = ? + GROUP BY file_path ORDER BY (reads + edits + writes) DESC + `).all(sessionId) as SessionReport["fileOps"]; + + const gitOps = db.prepare(` + SELECT operation, branch, pr_url as prUrl, details + FROM smriti_git_operations WHERE session_id = ? + ORDER BY created_at + `).all(sessionId) as SessionReport["gitOps"]; + + return { + sessionId: session.id, + title: session.title || "(untitled)", + createdAt: session.created_at, + costByModel, + totalCost, + tools, + errors, + commands, + fileOps, + gitOps, + }; +} + +// ============================================================================= +// Project Analysis +// ============================================================================= + +export function getProjectInsights(db: Database, projectId: string): ProjectReport | null { + const sessionCount = (db.prepare(` + SELECT COUNT(*) as n FROM smriti_session_meta WHERE project_id = ? + `).get(projectId) as any)?.n; + if (!sessionCount) return null; + + const totalCost = (db.prepare(` + SELECT COALESCE(SUM(sc.estimated_cost_usd), 0) as n + FROM smriti_session_costs sc + JOIN smriti_session_meta sm ON sc.session_id = sm.session_id + WHERE sm.project_id = ? + `).get(projectId) as any).n; + + const errorCount = (db.prepare(` + SELECT COUNT(*) as n FROM smriti_errors e + JOIN smriti_session_meta sm ON e.session_id = sm.session_id + WHERE sm.project_id = ? + `).get(projectId) as any).n; + + const toolDistribution = db.prepare(` + SELECT tu.tool_name as tool, COUNT(*) as count + FROM smriti_tool_usage tu + JOIN smriti_session_meta sm ON tu.session_id = sm.session_id + WHERE sm.project_id = ? + GROUP BY tu.tool_name ORDER BY count DESC + LIMIT 10 + `).all(projectId) as Array<{ tool: string; count: number }>; + + const mostAccessedFiles = db.prepare(` + SELECT fo.file_path as path, COUNT(*) as count + FROM smriti_file_operations fo + JOIN smriti_session_meta sm ON fo.session_id = sm.session_id + WHERE sm.project_id = ? AND fo.operation = 'read' + GROUP BY fo.file_path ORDER BY count DESC + LIMIT 10 + `).all(projectId) as Array<{ path: string; count: number }>; + + const mostEditedFiles = db.prepare(` + SELECT fo.file_path as path, COUNT(*) as count + FROM smriti_file_operations fo + JOIN smriti_session_meta sm ON fo.session_id = sm.session_id + WHERE sm.project_id = ? AND fo.operation IN ('edit', 'write') + GROUP BY fo.file_path ORDER BY count DESC + LIMIT 10 + `).all(projectId) as Array<{ path: string; count: number }>; + + // Build/test failure rate: commands with "test" or "build" that failed + const buildTestTotal = (db.prepare(` + SELECT COUNT(*) as n FROM smriti_commands c + JOIN smriti_session_meta sm ON c.session_id = sm.session_id + WHERE sm.project_id = ? AND (c.command LIKE '%test%' OR c.command LIKE '%build%') + `).get(projectId) as any).n; + + const buildTestFails = buildTestTotal > 0 + ? (db.prepare(` + SELECT COUNT(*) as n FROM smriti_commands c + JOIN smriti_session_meta sm ON c.session_id = sm.session_id + WHERE sm.project_id = ? AND (c.command LIKE '%test%' OR c.command LIKE '%build%') AND c.exit_code != 0 + `).get(projectId) as any).n + : 0; + + return { + projectId, + sessionCount, + totalCost, + avgCostPerSession: sessionCount > 0 ? totalCost / sessionCount : 0, + errorRate: sessionCount > 0 ? errorCount / sessionCount : 0, + toolDistribution, + mostAccessedFiles, + mostEditedFiles, + buildTestFailRate: buildTestTotal > 0 ? buildTestFails / buildTestTotal : 0, + }; +} + +// ============================================================================= +// Cost Breakdown +// ============================================================================= + +export function getCostBreakdown(db: Database, options?: { days?: number }): CostReport { + const dayFilter = options?.days + ? `WHERE ms.created_at >= datetime('now', '-${options.days} days')` + : ""; + + const totalCost = (db.prepare(` + SELECT COALESCE(SUM(sc.estimated_cost_usd), 0) as n + FROM smriti_session_costs sc + ${options?.days ? `JOIN memory_sessions ms ON sc.session_id = ms.id ${dayFilter}` : ""} + `).get() as any).n; + + const byModel = db.prepare(` + SELECT sc.model, SUM(sc.estimated_cost_usd) as cost, + SUM(sc.total_input_tokens) as inputTokens, + SUM(sc.total_output_tokens) as outputTokens, + SUM(sc.total_cache_tokens) as cacheTokens, + SUM(sc.turn_count) as turns + FROM smriti_session_costs sc + ${options?.days ? `JOIN memory_sessions ms ON sc.session_id = ms.id ${dayFilter}` : ""} + GROUP BY sc.model ORDER BY cost DESC + `).all() as CostReport["byModel"]; + + const byProject = db.prepare(` + SELECT sm.project_id as project, SUM(sc.estimated_cost_usd) as cost, + COUNT(DISTINCT sc.session_id) as sessions + FROM smriti_session_costs sc + JOIN smriti_session_meta sm ON sc.session_id = sm.session_id + ${options?.days ? `JOIN memory_sessions ms ON sc.session_id = ms.id ${dayFilter}` : ""} + WHERE sm.project_id IS NOT NULL + GROUP BY sm.project_id ORDER BY cost DESC + `).all() as CostReport["byProject"]; + + const byDay = db.prepare(` + SELECT DATE(ms.created_at) as date, SUM(sc.estimated_cost_usd) as cost, + COUNT(DISTINCT sc.session_id) as sessions + FROM smriti_session_costs sc + JOIN memory_sessions ms ON sc.session_id = ms.id + ${dayFilter} + GROUP BY DATE(ms.created_at) ORDER BY date DESC + LIMIT 30 + `).all() as CostReport["byDay"]; + + return { totalCost, byModel, byProject, byDay }; +} + +// ============================================================================= +// Error Analysis +// ============================================================================= + +export function getErrorAnalysis(db: Database, options?: { project?: string }): ErrorReport { + const projectJoin = options?.project + ? `JOIN smriti_session_meta sm ON e.session_id = sm.session_id` + : ""; + const projectWhere = options?.project ? `WHERE sm.project_id = ?` : ""; + const params = options?.project ? [options.project] : []; + + const totalErrors = (db.prepare(` + SELECT COUNT(*) as n FROM smriti_errors e ${projectJoin} ${projectWhere} + `).get(...params) as any).n; + + const byType = db.prepare(` + SELECT e.error_type as type, COUNT(*) as count + FROM smriti_errors e ${projectJoin} ${projectWhere} + GROUP BY e.error_type ORDER BY count DESC + `).all(...params) as ErrorReport["byType"]; + + const bySession = db.prepare(` + SELECT e.session_id as sessionId, COALESCE(ms.title, e.session_id) as title, COUNT(*) as count + FROM smriti_errors e + LEFT JOIN memory_sessions ms ON e.session_id = ms.id + ${options?.project ? `JOIN smriti_session_meta sm ON e.session_id = sm.session_id WHERE sm.project_id = ?` : ""} + GROUP BY e.session_id ORDER BY count DESC + LIMIT 10 + `).all(...params) as ErrorReport["bySession"]; + + const recentErrors = db.prepare(` + SELECT e.error_type as type, e.message, e.session_id as sessionId, e.created_at as createdAt + FROM smriti_errors e ${projectJoin} ${projectWhere} + ORDER BY e.created_at DESC + LIMIT 20 + `).all(...params) as ErrorReport["recentErrors"]; + + return { totalErrors, byType, bySession, recentErrors }; +} + +// ============================================================================= +// Tool Reliability +// ============================================================================= + +export function getToolStats(db: Database, options?: { project?: string }): ToolReport { + const projectJoin = options?.project + ? `JOIN smriti_session_meta sm ON tu.session_id = sm.session_id` + : ""; + const projectWhere = options?.project ? `WHERE sm.project_id = ?` : ""; + const params = options?.project ? [options.project] : []; + + const totalCalls = (db.prepare(` + SELECT COUNT(*) as n FROM smriti_tool_usage tu ${projectJoin} ${projectWhere} + `).get(...params) as any).n; + + const tools = db.prepare(` + SELECT tu.tool_name as name, COUNT(*) as count, + SUM(CASE WHEN tu.success = 1 THEN 1 ELSE 0 END) as successes, + SUM(CASE WHEN tu.success = 0 THEN 1 ELSE 0 END) as failures, + CAST(SUM(CASE WHEN tu.success = 0 THEN 1 ELSE 0 END) AS REAL) / COUNT(*) as rate, + AVG(tu.duration_ms) as avgDurationMs + FROM smriti_tool_usage tu ${projectJoin} ${projectWhere} + GROUP BY tu.tool_name ORDER BY count DESC + `).all(...params) as ToolReport["tools"]; + + return { totalCalls, tools }; +} + +// ============================================================================= +// Recommendations +// ============================================================================= + +export function getRecommendations(db: Database): Recommendation[] { + const recs: Recommendation[] = []; + + // Long sessions (> 200 turns) + const longSessions = db.prepare(` + SELECT sc.session_id, SUM(sc.turn_count) as turns + FROM smriti_session_costs sc + GROUP BY sc.session_id + HAVING turns > 200 + `).all() as Array<{ session_id: string; turns: number }>; + if (longSessions.length > 0) { + recs.push({ + severity: "medium", + message: `${longSessions.length} session(s) exceed 200 turns`, + detail: "Consider splitting long sessions. Context quality degrades after ~200 turns and costs increase due to growing cache.", + }); + } + + // Exploration-heavy sessions (Read/Glob/Grep > 50% of tool calls) + const explorationHeavy = db.prepare(` + SELECT session_id, + SUM(CASE WHEN tool_name IN ('Read', 'Glob', 'Grep', 'LS') THEN 1 ELSE 0 END) as explore_calls, + COUNT(*) as total_calls + FROM smriti_tool_usage + GROUP BY session_id + HAVING total_calls > 20 AND CAST(explore_calls AS REAL) / total_calls > 0.5 + `).all() as Array<{ session_id: string; explore_calls: number; total_calls: number }>; + if (explorationHeavy.length > 0) { + recs.push({ + severity: "high", + message: `${explorationHeavy.length} session(s) are exploration-heavy (>50% read ops)`, + detail: "Use /explore (Haiku) for codebase research before implementation. Haiku is 19x cheaper than Opus for exploration.", + }); + } + + // Missing tools (exit code 127) + const missingTools = db.prepare(` + SELECT command, COUNT(*) as count + FROM smriti_commands + WHERE exit_code = 127 + GROUP BY command + HAVING count > 2 + `).all() as Array<{ command: string; count: number }>; + if (missingTools.length > 0) { + const tools = missingTools.map((t) => t.command.split(" ")[0]).join(", "); + recs.push({ + severity: "medium", + message: `Missing tools detected: ${tools}`, + detail: "Commands returned exit code 127 (not found). Install the missing tools or update your PATH.", + }); + } + + // Knowledge bottleneck files (read > 10 times) + const bottlenecks = db.prepare(` + SELECT file_path as path, COUNT(*) as count + FROM smriti_file_operations + WHERE operation = 'read' + GROUP BY file_path + HAVING count > 10 + ORDER BY count DESC + LIMIT 5 + `).all() as Array<{ path: string; count: number }>; + if (bottlenecks.length > 0) { + const files = bottlenecks.map((b) => `${b.path} (${b.count}x)`).join(", "); + recs.push({ + severity: "low", + message: "Knowledge bottleneck files detected", + detail: `These files are read repeatedly: ${files}. Consider adding summaries to CLAUDE.md to reduce redundant reads.`, + }); + } + + // High tool failure rate + const failingTools = db.prepare(` + SELECT tool_name, COUNT(*) as total, + SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) as failures + FROM smriti_tool_usage + GROUP BY tool_name + HAVING total > 10 AND CAST(failures AS REAL) / total > 0.3 + `).all() as Array<{ tool_name: string; total: number; failures: number }>; + if (failingTools.length > 0) { + const tools = failingTools.map((t) => `${t.tool_name} (${((t.failures / t.total) * 100).toFixed(0)}% fail)`).join(", "); + recs.push({ + severity: "medium", + message: "Tools with high failure rates", + detail: `${tools}. Investigate why these tools are failing frequently.`, + }); + } + + return recs; +} From f065d14393c7c84094b5a729d257883be09dc955 Mon Sep 17 00:00:00 2001 From: Ashutosh Tripathi Date: Mon, 9 Mar 2026 12:16:31 +0530 Subject: [PATCH 5/5] =?UTF-8?q?release:=20v0.6.0=20=E2=80=94=20ingest=20fo?= =?UTF-8?q?rce,=20sidecar=20search,=20cost=20&=20insights?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 15 +++++++++++++++ package.json | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 753f2a8..2c5af95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,18 @@ +## [0.6.0] - 2026-03-09 + +### Added + +- feat(ingest): `--force` flag for re-ingesting sessions (deletes sidecars, re-extracts) +- feat(db): sidecar content searchable via unified FTS — artifacts, thinking blocks, attachments, voice notes +- feat(insights): cost & usage analytics module with CLI commands (`smriti insights`) + +### Database + +- New tables: `smriti_artifacts`, `smriti_thinking`, `smriti_attachments`, `smriti_voice_notes` +- FTS migration to v2 includes sidecar content + +--- + ## [0.5.1] - 2026-03-09 ### Fixed diff --git a/package.json b/package.json index 0553a5b..16de12d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "smriti", - "version": "0.5.1", + "version": "0.6.0", "description": "Smriti - Unified memory layer across all AI agents", "type": "module", "bin": {