From c49e88f1caef58faf037ef0b71da7f7123a90342 Mon Sep 17 00:00:00 2001 From: Greg von Nessi Date: Mon, 16 Feb 2026 22:22:17 +0000 Subject: [PATCH 1/3] Fix export/import: runtime bugs, add vectors, compression, validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit archive.ts had 7 column/table name mismatches vs the actual schema (source_id→source_chunk_id, type→edge_type, weight→initial_weight, cluster_members→chunk_clusters, etc.) that would crash at runtime. All 19 tests were interface-only so never caught this. Changes: - Fix all SQL column/table names to match schema.sql - Add vector embedding export/import (semantic search works after import) - Add full cluster data: centroid, exemplar IDs, distances, membership hash - Add gzip compression (auto-detected on import) - Add validateArchive() with version, count, and referential integrity checks - Add dry-run import option - Add ExportResult/ImportResult return types with summary - Filter edges to require both endpoints in export (no dangling refs) - Wire CLI flags: --projects, --redact-paths, --redact-code, --no-vectors, --dry-run - Print formatted summary after export/import operations - Bump archive version to 1.1 (backward-compatible with 1.0) - Replace 19 interface-only tests with 27 integration tests using real DBs - Fix docs: remove phantom --format/--replace flags, fix magic bytes (CST\0) --- docs/guides/backup-restore.md | 69 ++- docs/reference/cli-commands.md | 25 +- src/cli/commands/archive.ts | 70 ++- src/storage/archive.ts | 486 +++++++++++++---- test/storage/archive.test.ts | 956 +++++++++++++++++++++++++-------- 5 files changed, 1263 insertions(+), 343 deletions(-) diff --git a/docs/guides/backup-restore.md b/docs/guides/backup-restore.md index d6987ed..5aecf1f 100644 --- a/docs/guides/backup-restore.md +++ b/docs/guides/backup-restore.md @@ -1,6 +1,6 @@ # Backup & Restore -Causantic supports encrypted exports for secure backup and migration of your memory data. +Causantic supports encrypted, compressed exports for secure backup and migration of your memory data. ## Export Memory @@ -25,6 +25,7 @@ npx causantic export --output backup.json --no-encrypt ```bash npx causantic export --output backup.causantic --projects my-project +npx causantic export --output backup.causantic --projects project-a,project-b ``` ### With Redaction (for sharing) @@ -34,6 +35,14 @@ npx causantic export --output backup.causantic --projects my-project npx causantic export --output backup.causantic --redact-paths --redact-code ``` +### Without Vectors (lightweight) + +```bash +# Skip vector embeddings for a smaller file +# Note: semantic search will not work after import until re-embedding +npx causantic export --output backup.causantic --no-vectors +``` + ## Import Memory ### Encrypted Archive @@ -54,6 +63,12 @@ npx causantic import backup.causantic --merge Without `--merge`, existing data is replaced. +### Dry Run (validate without importing) + +```bash +npx causantic import backup.causantic --dry-run +``` + ## Environment Variable (CI/Scripts) For non-interactive environments, set the password via environment variable: @@ -71,10 +86,26 @@ CAUSANTIC_EXPORT_PASSWORD="your-secure-password" npx causantic import backup.cau | Data | Description | |------|-------------| | Chunks | Conversation segments with semantic content | -| Edges | Causal relationships (backward/forward links) | -| Clusters | Topic groupings from HDBSCAN clustering | +| Edges | Causal relationships (forward/backward links) with identity and link counts | +| Clusters | Topic groupings with centroids, exemplar IDs, distances, and membership hashes | +| Vectors | Embedding vectors for semantic search (skip with `--no-vectors`) | + +## Archive Format + +### Version History -## Encryption Details +| Version | Changes | +|---------|---------| +| 1.1 | Added vector embeddings, full cluster data (centroid, distances, exemplars), gzip compression, edge identity | +| 1.0 | Initial format (chunks, edges, basic clusters) | + +Archives are backward-compatible: v1.1 can import v1.0 archives (with a warning that vectors are missing). + +### Compression + +All v1.1 exports are gzip-compressed. On import, Causantic auto-detects compressed, encrypted, and plain JSON formats. + +### Encryption Details Causantic uses strong encryption for archive files: @@ -83,33 +114,35 @@ Causantic uses strong encryption for archive files: - **Nonce**: 12 bytes (random per encryption) - **Salt**: 16 bytes (unique per password) -The archive format uses magic bytes (`Causantic\0`) to identify encrypted files. - -## File Formats +The archive format uses magic bytes (`CST\0`) to identify encrypted files. -### Encrypted (.causantic) +### File Structure -Binary format with structure: +**Encrypted + compressed:** ``` -[Magic: 4 bytes "Causantic\0"] +[Magic: 4 bytes "CST\0"] [Salt: 16 bytes] [Nonce: 12 bytes] [Auth Tag: 16 bytes] -[Ciphertext: variable] +[Ciphertext: gzip(JSON) encrypted with AES-256-GCM] ``` -### Unencrypted (.json) +**Unencrypted compressed (default):** +``` +[gzip(JSON)] +``` -Standard JSON with structure: +**Plain JSON (v1.0 backward compat):** ```json { "format": "causantic-archive", - "version": "1.0", + "version": "1.1", "created": "2024-01-15T10:30:00Z", "metadata": { ... }, "chunks": [ ... ], "edges": [ ... ], - "clusters": [ ... ] + "clusters": [ ... ], + "vectors": [ ... ] } ``` @@ -163,3 +196,9 @@ The file is not a valid Causantic archive. Check that: ### "Decryption failed" Wrong password. Re-enter the password carefully. + +### "Archive version 1.0: no vector embeddings" + +The archive was created with v1.0 (before vector support). After import: +- Semantic search (`recall`, `search`, `predict`) won't work until vectors are regenerated +- Run `npx causantic maintenance run scan-projects` to re-ingest and generate embeddings diff --git a/docs/reference/cli-commands.md b/docs/reference/cli-commands.md index 47fb503..58cf26d 100644 --- a/docs/reference/cli-commands.md +++ b/docs/reference/cli-commands.md @@ -241,7 +241,7 @@ npx causantic encryption audit 20 ### export -Export memory data. +Export memory data. Archives are gzip-compressed by default and include vector embeddings for semantic search continuity. ```bash npx causantic export [options] @@ -251,19 +251,25 @@ npx causantic export [options] | Option | Description | |--------|-------------| -| `--output ` | Output file path | +| `--output ` | Output file path (default: `causantic-backup.causantic`) | | `--no-encrypt` | Skip encryption | -| `--format ` | Output format (json, archive) | +| `--projects ` | Comma-separated project slugs to export | +| `--redact-paths` | Redact file paths in content | +| `--redact-code` | Redact code blocks in content | +| `--no-vectors` | Skip vector embeddings (smaller file, but semantic search won't work after import) | **Example**: ```bash -npx causantic export --output backup.causantic.json +npx causantic export --output backup.causantic npx causantic export --output backup.json --no-encrypt +npx causantic export --projects my-project,other-project --no-encrypt --output filtered.json +npx causantic export --redact-paths --redact-code --output sanitized.causantic +npx causantic export --no-vectors --output lightweight.causantic ``` ### import -Import memory data. +Import memory data. Supports encrypted, compressed, and plain JSON archives. ```bash npx causantic import [options] @@ -273,13 +279,14 @@ npx causantic import [options] | Option | Description | |--------|-------------| -| `--merge` | Merge with existing data | -| `--replace` | Replace existing data | +| `--merge` | Merge with existing data (default: replace) | +| `--dry-run` | Validate and report without importing | **Example**: ```bash -npx causantic import backup.causantic.json -npx causantic import backup.causantic.json --merge +npx causantic import backup.causantic +npx causantic import backup.causantic --merge +npx causantic import backup.causantic --dry-run ``` ### stats diff --git a/src/cli/commands/archive.ts b/src/cli/commands/archive.ts index 91026c3..e536c62 100644 --- a/src/cli/commands/archive.ts +++ b/src/cli/commands/archive.ts @@ -1,15 +1,36 @@ import type { Command } from '../types.js'; import { promptPassword, isEncryptedArchive } from '../utils.js'; +function formatSize(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +function formatCount(n: number): string { + return n.toLocaleString(); +} + export const exportCommand: Command = { name: 'export', description: 'Export memory data', - usage: 'causantic export --output [--no-encrypt]', + usage: + 'causantic export --output [--no-encrypt] [--projects ] [--redact-paths] [--redact-code] [--no-vectors]', handler: async (args) => { const { exportArchive } = await import('../../storage/archive.js'); const outputIndex = args.indexOf('--output'); const outputPath = outputIndex >= 0 ? args[outputIndex + 1] : 'causantic-backup.causantic'; const noEncrypt = args.includes('--no-encrypt'); + const noVectors = args.includes('--no-vectors'); + const redactPaths = args.includes('--redact-paths'); + const redactCode = args.includes('--redact-code'); + + // Parse --projects flag + const projectsIndex = args.indexOf('--projects'); + const projects = + projectsIndex >= 0 && args[projectsIndex + 1] + ? args[projectsIndex + 1].split(',').map((s) => s.trim()) + : undefined; let password: string | undefined; if (!noEncrypt) { @@ -34,26 +55,48 @@ export const exportCommand: Command = { } } - await exportArchive({ + const result = await exportArchive({ outputPath, password, + projects, + redactPaths, + redactCode, + noVectors, }); - console.log(`Exported to ${outputPath}`); + + const parts = [ + `${formatCount(result.chunkCount)} chunks`, + `${formatCount(result.edgeCount)} edges`, + `${formatCount(result.clusterCount)} clusters`, + `${formatCount(result.vectorCount)} vectors`, + ]; + const suffix = [ + result.compressed ? 'compressed' : null, + result.encrypted ? 'encrypted' : null, + ] + .filter(Boolean) + .join(', '); + + console.log(`Exported: ${parts.join(', ')} (${formatSize(result.fileSize)} ${suffix})`); + console.log(`File: ${outputPath}`); }, }; export const importCommand: Command = { name: 'import', description: 'Import memory data', - usage: 'causantic import [--merge]', + usage: 'causantic import [--merge] [--dry-run]', handler: async (args) => { if (args.length === 0) { console.error('Error: File path required'); process.exit(2); } const { importArchive } = await import('../../storage/archive.js'); - const inputPath = args[0]; + + // Find file path (first arg that isn't a flag) + const inputPath = args.find((a) => !a.startsWith('--'))!; const merge = args.includes('--merge'); + const dryRun = args.includes('--dry-run'); const encrypted = await isEncryptedArchive(inputPath); @@ -75,11 +118,24 @@ export const importCommand: Command = { } } - await importArchive({ + const result = await importArchive({ inputPath, password, merge, + dryRun, }); - console.log('Import complete.'); + + const parts = [ + `${formatCount(result.chunkCount)} chunks`, + `${formatCount(result.edgeCount)} edges`, + `${formatCount(result.clusterCount)} clusters`, + `${formatCount(result.vectorCount)} vectors`, + ]; + + if (result.dryRun) { + console.log(`Dry run — would import: ${parts.join(', ')}`); + } else { + console.log(`Imported: ${parts.join(', ')}`); + } }, }; diff --git a/src/storage/archive.ts b/src/storage/archive.ts index 01d9ff9..e49d045 100644 --- a/src/storage/archive.ts +++ b/src/storage/archive.ts @@ -1,22 +1,31 @@ /** * Export/import functionality for Causantic memory data. * - * Supports encrypted and unencrypted archives. + * Supports encrypted and unencrypted archives with optional gzip compression. + * Archive format v1.1 adds vector embeddings and full cluster data. */ import { readFileSync, writeFileSync, existsSync } from 'node:fs'; +import { gzipSync, gunzipSync } from 'node:zlib'; import { encrypt, decrypt, serializeEncrypted, deserializeEncrypted } from './encryption.js'; -import { getDb } from './db.js'; +import { getDb, generateId } from './db.js'; +import { serializeEmbedding, deserializeEmbedding } from '../utils/embedding-utils.js'; import { createLogger } from '../utils/logger.js'; const log = createLogger('archive'); /** Archive format version */ -const ARCHIVE_VERSION = '1.0'; +const ARCHIVE_VERSION = '1.1'; + +/** Accepted versions on import */ +const ACCEPTED_VERSIONS = ['1.0', '1.1']; /** Magic bytes for encrypted archives */ const ENCRYPTED_MAGIC = Buffer.from('CST\x00'); +/** Magic bytes for gzip */ +const GZIP_MAGIC = Buffer.from([0x1f, 0x8b]); + /** Archive metadata */ export interface ArchiveMetadata { version: string; @@ -24,13 +33,17 @@ export interface ArchiveMetadata { chunkCount: number; edgeCount: number; clusterCount: number; + vectorCount: number; + embeddingDimensions: number | null; projects: string[]; } /** Chunk data for export */ export interface ExportedChunk { id: string; + sessionId: string; sessionSlug: string; + projectPath: string | null; content: string; startTime: string; endTime: string; @@ -39,19 +52,36 @@ export interface ExportedChunk { /** Edge data for export */ export interface ExportedEdge { + id: string; source: string; target: string; type: string; - referenceType: string; + referenceType: string | null; weight: number; + linkCount: number; +} + +/** Cluster member with distance */ +export interface ClusterMember { + chunkId: string; + distance: number; } /** Cluster data for export */ export interface ExportedCluster { id: string; - name: string; + name: string | null; description: string | null; - memberChunkIds: string[]; + centroid: number[] | null; + exemplarIds: string[] | null; + membershipHash: string | null; + members: ClusterMember[]; +} + +/** Vector data for export */ +export interface ExportedVector { + chunkId: string; + embedding: number[]; } /** Complete archive structure */ @@ -63,6 +93,7 @@ export interface Archive { chunks: ExportedChunk[]; edges: ExportedEdge[]; clusters: ExportedCluster[]; + vectors: ExportedVector[]; } /** Export options */ @@ -77,6 +108,19 @@ export interface ExportOptions { redactPaths?: boolean; /** Redact code blocks */ redactCode?: boolean; + /** Skip vector embeddings */ + noVectors?: boolean; +} + +/** Export result */ +export interface ExportResult { + chunkCount: number; + edgeCount: number; + clusterCount: number; + vectorCount: number; + fileSize: number; + compressed: boolean; + encrypted: boolean; } /** Import options */ @@ -87,12 +131,104 @@ export interface ImportOptions { password?: string; /** Merge with existing data */ merge?: boolean; + /** Validate and report without importing */ + dryRun?: boolean; +} + +/** Import result */ +export interface ImportResult { + chunkCount: number; + edgeCount: number; + clusterCount: number; + vectorCount: number; + dryRun: boolean; +} + +/** Validation result */ +export interface ValidationResult { + valid: boolean; + errors: string[]; + warnings: string[]; +} + +/** + * Validate an archive structure before import. + */ +export function validateArchive(archive: Archive): ValidationResult { + const errors: string[] = []; + const warnings: string[] = []; + + // Version check + if (!archive.version || !ACCEPTED_VERSIONS.includes(archive.version)) { + errors.push(`Unsupported archive version: ${archive.version ?? 'missing'}`); + } + + // Format check + if (archive.format !== 'causantic-archive') { + errors.push(`Invalid archive format: ${archive.format ?? 'missing'}`); + } + + // Count verification + if (archive.metadata) { + if (archive.metadata.chunkCount !== archive.chunks?.length) { + warnings.push( + `Metadata chunkCount (${archive.metadata.chunkCount}) does not match actual (${archive.chunks?.length ?? 0})`, + ); + } + if (archive.metadata.edgeCount !== archive.edges?.length) { + warnings.push( + `Metadata edgeCount (${archive.metadata.edgeCount}) does not match actual (${archive.edges?.length ?? 0})`, + ); + } + if (archive.metadata.clusterCount !== archive.clusters?.length) { + warnings.push( + `Metadata clusterCount (${archive.metadata.clusterCount}) does not match actual (${archive.clusters?.length ?? 0})`, + ); + } + } + + // Edge referential integrity + if (archive.chunks && archive.edges) { + const chunkIdSet = new Set(archive.chunks.map((c) => c.id)); + let danglingCount = 0; + for (const edge of archive.edges) { + if (!chunkIdSet.has(edge.source) || !chunkIdSet.has(edge.target)) { + danglingCount++; + } + } + if (danglingCount > 0) { + warnings.push(`${danglingCount} edge(s) reference chunks not in the archive`); + } + } + + // v1.0 backward compat warning + if (archive.version === '1.0') { + warnings.push( + 'Archive version 1.0: no vector embeddings included. Semantic search will not work until re-embedding.', + ); + } + + // Embedding dimension mismatch detection + if (archive.metadata?.embeddingDimensions && archive.vectors?.length > 0) { + const sampleDims = archive.vectors[0].embedding?.length; + if (sampleDims && sampleDims !== archive.metadata.embeddingDimensions) { + warnings.push( + `Embedding dimensions mismatch: metadata says ${archive.metadata.embeddingDimensions}, sample vector has ${sampleDims}`, + ); + } + } + + return { + valid: errors.length === 0, + errors, + warnings, + }; } /** * Export memory data to an archive. */ -export async function exportArchive(options: ExportOptions): Promise { +export async function exportArchive(options: ExportOptions): Promise { const db = getDb(); // Get unique projects @@ -106,13 +242,15 @@ export async function exportArchive(options: ExportOptions): Promise { // Export chunks const chunksQuery = db.prepare(` - SELECT id, session_slug, content, start_time, end_time, turn_indices + SELECT id, session_id, session_slug, project_path, content, start_time, end_time, turn_indices FROM chunks WHERE session_slug IN (${targetProjects.map(() => '?').join(',')}) `); const chunksResult = chunksQuery.all(...targetProjects) as Array<{ id: string; + session_id: string; session_slug: string; + project_path: string | null; content: string; start_time: string; end_time: string; @@ -121,7 +259,9 @@ export async function exportArchive(options: ExportOptions): Promise { let chunks: ExportedChunk[] = chunksResult.map((row) => ({ id: row.id, + sessionId: row.session_id, sessionSlug: row.session_slug, + projectPath: row.project_path, content: row.content, startTime: row.start_time, endTime: row.end_time, @@ -142,50 +282,124 @@ export async function exportArchive(options: ExportOptions): Promise { })); } - // Export edges - const chunkIds = chunks.map((c) => c.id); - const edgesQuery = db.prepare(` - SELECT source_id, target_id, type, reference_type, weight - FROM edges - WHERE source_id IN (${chunkIds.map(() => '?').join(',')}) - `); - const edgesResult = edgesQuery.all(...chunkIds) as Array<{ - source_id: string; - target_id: string; - type: string; - reference_type: string; - weight: number; - }>; + // Build chunk ID set for filtering edges and vectors + const chunkIdSet = new Set(chunks.map((c) => c.id)); - const edges: ExportedEdge[] = edgesResult.map((row) => ({ - source: row.source_id, - target: row.target_id, - type: row.type, - referenceType: row.reference_type, - weight: row.weight, - })); + // Export edges — both endpoints must be in the export + const chunkIds = chunks.map((c) => c.id); + let edges: ExportedEdge[] = []; + if (chunkIds.length > 0) { + const edgesQuery = db.prepare(` + SELECT id, source_chunk_id, target_chunk_id, edge_type, reference_type, initial_weight, link_count + FROM edges + WHERE source_chunk_id IN (${chunkIds.map(() => '?').join(',')}) + `); + const edgesResult = edgesQuery.all(...chunkIds) as Array<{ + id: string; + source_chunk_id: string; + target_chunk_id: string; + edge_type: string; + reference_type: string | null; + initial_weight: number; + link_count: number; + }>; + + // Filter: only keep edges where BOTH endpoints are in the export + edges = edgesResult + .filter((row) => chunkIdSet.has(row.source_chunk_id) && chunkIdSet.has(row.target_chunk_id)) + .map((row) => ({ + id: row.id, + source: row.source_chunk_id, + target: row.target_chunk_id, + type: row.edge_type, + referenceType: row.reference_type, + weight: row.initial_weight, + linkCount: row.link_count, + })); + } - // Export clusters - const clustersQuery = db.prepare(` - SELECT c.id, c.name, c.description, GROUP_CONCAT(cm.chunk_id) as member_ids - FROM clusters c - LEFT JOIN cluster_members cm ON c.id = cm.cluster_id - WHERE cm.chunk_id IN (${chunkIds.map(() => '?').join(',')}) - GROUP BY c.id - `); - const clustersResult = clustersQuery.all(...chunkIds) as Array<{ - id: string; - name: string; - description: string | null; - member_ids: string | null; - }>; + // Export clusters with full data + let clusters: ExportedCluster[] = []; + if (chunkIds.length > 0) { + // Find clusters that have at least one member in our export + const clusterIdsQuery = db.prepare(` + SELECT DISTINCT cluster_id FROM chunk_clusters + WHERE chunk_id IN (${chunkIds.map(() => '?').join(',')}) + `); + const clusterIds = ( + clusterIdsQuery.all(...chunkIds) as Array<{ cluster_id: string }> + ).map((r) => r.cluster_id); + + if (clusterIds.length > 0) { + const clustersQuery = db.prepare(` + SELECT id, name, description, centroid, exemplar_ids, membership_hash + FROM clusters + WHERE id IN (${clusterIds.map(() => '?').join(',')}) + `); + const clustersResult = clustersQuery.all(...clusterIds) as Array<{ + id: string; + name: string | null; + description: string | null; + centroid: Buffer | null; + exemplar_ids: string | null; + membership_hash: string | null; + }>; + + const membersQuery = db.prepare(` + SELECT chunk_id, distance FROM chunk_clusters + WHERE cluster_id = ? AND chunk_id IN (${chunkIds.map(() => '?').join(',')}) + `); + + clusters = clustersResult.map((row) => { + const membersResult = membersQuery.all(row.id, ...chunkIds) as Array<{ + chunk_id: string; + distance: number; + }>; + + return { + id: row.id, + name: row.name, + description: row.description, + centroid: row.centroid ? deserializeEmbedding(row.centroid) : null, + exemplarIds: row.exemplar_ids ? JSON.parse(row.exemplar_ids) : null, + membershipHash: row.membership_hash, + members: membersResult.map((m) => ({ + chunkId: m.chunk_id, + distance: m.distance, + })), + }; + }); + } + } - const clusters: ExportedCluster[] = clustersResult.map((row) => ({ - id: row.id, - name: row.name, - description: row.description, - memberChunkIds: row.member_ids?.split(',') ?? [], - })); + // Export vectors + let vectors: ExportedVector[] = []; + let embeddingDimensions: number | null = null; + if (!options.noVectors && chunkIds.length > 0) { + // Check if vectors table exists + const tableExists = db + .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='vectors'") + .get(); + if (tableExists) { + const vectorsQuery = db.prepare(` + SELECT id, embedding FROM vectors + WHERE id IN (${chunkIds.map(() => '?').join(',')}) + `); + const vectorsResult = vectorsQuery.all(...chunkIds) as Array<{ + id: string; + embedding: Buffer; + }>; + + vectors = vectorsResult.map((row) => ({ + chunkId: row.id, + embedding: deserializeEmbedding(row.embedding), + })); + + if (vectors.length > 0) { + embeddingDimensions = vectors[0].embedding.length; + } + } + } // Build archive const archive: Archive = { @@ -198,39 +412,50 @@ export async function exportArchive(options: ExportOptions): Promise { chunkCount: chunks.length, edgeCount: edges.length, clusterCount: clusters.length, + vectorCount: vectors.length, + embeddingDimensions, projects: targetProjects, }, chunks, edges, clusters, + vectors, }; - // Serialize - const jsonData = JSON.stringify(archive, null, 2); + // Serialize: JSON -> gzip -> (optional) encrypt -> write + const jsonData = JSON.stringify(archive); + const compressed = gzipSync(Buffer.from(jsonData, 'utf-8')); - // Write output + let output: Buffer; + const encrypted = !!options.password; if (options.password) { - // Encrypted - const encrypted = encrypt(Buffer.from(jsonData, 'utf-8'), options.password); - const serialized = serializeEncrypted(encrypted); - const output = Buffer.concat([ENCRYPTED_MAGIC, serialized]); - writeFileSync(options.outputPath, output); + const encryptedData = encrypt(compressed, options.password); + const serialized = serializeEncrypted(encryptedData); + output = Buffer.concat([ENCRYPTED_MAGIC, serialized]); } else { - // Unencrypted - writeFileSync(options.outputPath, jsonData); + output = compressed; } - log.info('Export completed', { - chunks: chunks.length, - edges: edges.length, - clusters: clusters.length, - }); + writeFileSync(options.outputPath, output); + + const result: ExportResult = { + chunkCount: chunks.length, + edgeCount: edges.length, + clusterCount: clusters.length, + vectorCount: vectors.length, + fileSize: output.length, + compressed: true, + encrypted, + }; + + log.info('Export completed', { ...result }); + return result; } /** * Import memory data from an archive. */ -export async function importArchive(options: ImportOptions): Promise { +export async function importArchive(options: ImportOptions): Promise { if (!existsSync(options.inputPath)) { throw new Error(`File not found: ${options.inputPath}`); } @@ -239,45 +464,96 @@ export async function importArchive(options: ImportOptions): Promise { let jsonData: string; - // Check if encrypted - if (fileContent.subarray(0, 4).equals(ENCRYPTED_MAGIC)) { + // Detection order: + // 1. CST\0 (encrypted) -> decrypt, then check for gzip + // 2. gzip magic 0x1f 0x8b (compressed, unencrypted) + // 3. plain JSON (v1.0 backward compat) + if (fileContent.length >= 4 && fileContent.subarray(0, 4).equals(ENCRYPTED_MAGIC)) { if (!options.password) { throw new Error('Archive is encrypted. Please provide a password.'); } const encryptedData = deserializeEncrypted(fileContent.subarray(4)); const decrypted = decrypt(encryptedData, options.password); - jsonData = decrypted.toString('utf-8'); + + // Check if decrypted data is gzipped + if (decrypted.length >= 2 && decrypted[0] === 0x1f && decrypted[1] === 0x8b) { + jsonData = gunzipSync(decrypted).toString('utf-8'); + } else { + jsonData = decrypted.toString('utf-8'); + } + } else if ( + fileContent.length >= 2 && + fileContent[0] === GZIP_MAGIC[0] && + fileContent[1] === GZIP_MAGIC[1] + ) { + jsonData = gunzipSync(fileContent).toString('utf-8'); } else { jsonData = fileContent.toString('utf-8'); } const archive = JSON.parse(jsonData) as Archive; - if (archive.format !== 'causantic-archive') { - throw new Error('Invalid archive format'); + // Validate + const validation = validateArchive(archive); + for (const warning of validation.warnings) { + log.warn(warning); + } + if (!validation.valid) { + throw new Error(`Invalid archive: ${validation.errors.join('; ')}`); + } + + // Normalize v1.0 archives + if (!archive.vectors) { + archive.vectors = []; + } + + const result: ImportResult = { + chunkCount: archive.chunks.length, + edgeCount: archive.edges.length, + clusterCount: archive.clusters.length, + vectorCount: archive.vectors.length, + dryRun: !!options.dryRun, + }; + + if (options.dryRun) { + log.info('Dry run — no changes made', { ...result }); + return result; } const db = getDb(); + // Ensure vectors table exists + db.exec(` + CREATE TABLE IF NOT EXISTS vectors ( + id TEXT PRIMARY KEY, + embedding BLOB NOT NULL, + orphaned_at TEXT DEFAULT NULL, + last_accessed TEXT DEFAULT CURRENT_TIMESTAMP + ) + `); + // Start transaction const transaction = db.transaction(() => { if (!options.merge) { // Clear existing data - db.prepare('DELETE FROM cluster_members').run(); + db.prepare('DELETE FROM chunk_clusters').run(); db.prepare('DELETE FROM clusters').run(); db.prepare('DELETE FROM edges').run(); + db.prepare('DELETE FROM vectors').run(); db.prepare('DELETE FROM chunks').run(); } // Import chunks const insertChunk = db.prepare(` - INSERT OR REPLACE INTO chunks (id, session_slug, content, start_time, end_time, turn_indices) - VALUES (?, ?, ?, ?, ?, ?) + INSERT OR REPLACE INTO chunks (id, session_id, session_slug, project_path, content, start_time, end_time, turn_indices) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) `); for (const chunk of archive.chunks) { insertChunk.run( chunk.id, + chunk.sessionId ?? '', chunk.sessionSlug, + chunk.projectPath ?? null, chunk.content, chunk.startTime, chunk.endTime, @@ -287,44 +563,75 @@ export async function importArchive(options: ImportOptions): Promise { // Import edges const insertEdge = db.prepare(` - INSERT OR REPLACE INTO edges (source_id, target_id, type, reference_type, weight) - VALUES (?, ?, ?, ?, ?) + INSERT OR REPLACE INTO edges (id, source_chunk_id, target_chunk_id, edge_type, reference_type, initial_weight, created_at, link_count) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) `); for (const edge of archive.edges) { - insertEdge.run(edge.source, edge.target, edge.type, edge.referenceType, edge.weight); + insertEdge.run( + edge.id ?? generateId(), + edge.source, + edge.target, + edge.type, + edge.referenceType ?? null, + edge.weight, + new Date().toISOString(), + edge.linkCount ?? 1, + ); } // Import clusters const insertCluster = db.prepare(` - INSERT OR REPLACE INTO clusters (id, name, description) - VALUES (?, ?, ?) + INSERT OR REPLACE INTO clusters (id, name, description, centroid, exemplar_ids, membership_hash) + VALUES (?, ?, ?, ?, ?, ?) `); const insertMember = db.prepare(` - INSERT OR REPLACE INTO cluster_members (cluster_id, chunk_id) - VALUES (?, ?) + INSERT OR REPLACE INTO chunk_clusters (chunk_id, cluster_id, distance) + VALUES (?, ?, ?) `); for (const cluster of archive.clusters) { - insertCluster.run(cluster.id, cluster.name, cluster.description); - for (const memberId of cluster.memberChunkIds) { - insertMember.run(cluster.id, memberId); + insertCluster.run( + cluster.id, + cluster.name, + cluster.description, + cluster.centroid ? serializeEmbedding(cluster.centroid) : null, + cluster.exemplarIds ? JSON.stringify(cluster.exemplarIds) : null, + cluster.membershipHash ?? null, + ); + + // Handle both v1.1 (members with distance) and v1.0 compat (memberChunkIds) + const members: ClusterMember[] = + cluster.members ?? + ((cluster as unknown as { memberChunkIds?: string[] }).memberChunkIds)?.map( + (id) => ({ chunkId: id, distance: 0 }), + ) ?? + []; + for (const member of members) { + insertMember.run(member.chunkId, cluster.id, member.distance); + } + } + + // Import vectors + if (archive.vectors.length > 0) { + const insertVector = db.prepare(` + INSERT OR REPLACE INTO vectors (id, embedding, orphaned_at, last_accessed) + VALUES (?, ?, NULL, CURRENT_TIMESTAMP) + `); + for (const vector of archive.vectors) { + insertVector.run(vector.chunkId, serializeEmbedding(vector.embedding)); } } }); transaction(); - log.info('Import completed', { - chunks: archive.chunks.length, - edges: archive.edges.length, - clusters: archive.clusters.length, - }); + log.info('Import completed', { ...result }); + return result; } /** * Redact file paths in content. */ function redactFilePaths(content: string): string { - // Match common file path patterns const pathPattern = /(?:\/[\w.-]+)+\.\w+|(?:[A-Z]:\\[\w.-\\]+)|(?:~\/[\w.-\/]+)/g; return content.replace(pathPattern, '[REDACTED_PATH]'); } @@ -333,7 +640,6 @@ function redactFilePaths(content: string): string { * Redact code blocks in content. */ function redactCodeBlocks(content: string): string { - // Match markdown code blocks const codeBlockPattern = /```[\s\S]*?```/g; return content.replace(codeBlockPattern, '```\n[REDACTED_CODE]\n```'); } diff --git a/test/storage/archive.test.ts b/test/storage/archive.test.ts index 16e9278..4f81a1f 100644 --- a/test/storage/archive.test.ts +++ b/test/storage/archive.test.ts @@ -1,323 +1,835 @@ /** - * Tests for export/import functionality. + * Integration tests for export/import functionality. + * + * Uses real in-memory databases to verify end-to-end round-trips. */ -import { describe, it, expect } from 'vitest'; -import type { - Archive, - ArchiveMetadata, - ExportedChunk, - ExportedEdge, - ExportedCluster, - ExportOptions, - ImportOptions, +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { writeFileSync, readFileSync, unlinkSync, existsSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { gzipSync } from 'node:zlib'; +import Database from 'better-sqlite3-multiple-ciphers'; +import { + createTestDb, + setupTestDb, + teardownTestDb, + createSampleChunk, + insertTestChunk, + insertTestEdge, + insertTestCluster, + assignChunkToCluster, +} from './test-utils.js'; +import { + exportArchive, + importArchive, + validateArchive, + type Archive, + type ExportResult, + type ImportResult, } from '../../src/storage/archive.js'; +import { serializeEmbedding, deserializeEmbedding } from '../../src/utils/embedding-utils.js'; + +// Helper to create a temp file path +function tempPath(suffix = '.json'): string { + return join(tmpdir(), `causantic-test-${Date.now()}-${Math.random().toString(36).slice(2)}${suffix}`); +} + +// Helper to create vectors table in test db +function createVectorsTable(db: Database.Database): void { + db.exec(` + CREATE TABLE IF NOT EXISTS vectors ( + id TEXT PRIMARY KEY, + embedding BLOB NOT NULL, + orphaned_at TEXT DEFAULT NULL, + last_accessed TEXT DEFAULT CURRENT_TIMESTAMP + ) + `); +} + +// Helper to insert a vector into the test db +function insertTestVector(db: Database.Database, id: string, embedding: number[]): void { + db.prepare('INSERT INTO vectors (id, embedding) VALUES (?, ?)').run( + id, + serializeEmbedding(embedding), + ); +} + +// Helper to seed a standard test dataset +function seedTestData(db: Database.Database): { + chunkIds: string[]; + edgeId: string; + clusterId: string; + embedding: number[]; +} { + const chunk1 = createSampleChunk({ + id: 'chunk-1', + sessionId: 'session-1', + sessionSlug: 'project-a', + content: 'First chunk content with /path/to/file.ts', + startTime: '2024-01-01T00:00:00Z', + endTime: '2024-01-01T00:01:00Z', + projectPath: '/home/user/project-a', + }); + const chunk2 = createSampleChunk({ + id: 'chunk-2', + sessionId: 'session-1', + sessionSlug: 'project-a', + content: 'Second chunk with ```ts\nconst x = 1;\n```', + startTime: '2024-01-01T00:01:00Z', + endTime: '2024-01-01T00:02:00Z', + projectPath: '/home/user/project-a', + }); + const chunk3 = createSampleChunk({ + id: 'chunk-3', + sessionId: 'session-2', + sessionSlug: 'project-b', + content: 'Third chunk from project-b', + startTime: '2024-01-01T00:02:00Z', + endTime: '2024-01-01T00:03:00Z', + projectPath: '/home/user/project-b', + }); -describe('archive', () => { - describe('Archive interface', () => { - it('has correct structure', () => { - const archive: Archive = { - format: 'causantic-archive', - version: '1.0', - created: '2024-01-15T10:30:00Z', - metadata: { - version: '1.0', - created: '2024-01-15T10:30:00Z', - chunkCount: 100, - edgeCount: 250, - clusterCount: 5, - projects: ['project-a', 'project-b'], - }, - chunks: [], - edges: [], - clusters: [], - }; + insertTestChunk(db, chunk1); + insertTestChunk(db, chunk2); + insertTestChunk(db, chunk3); + + const edgeId = insertTestEdge(db, { + id: 'edge-1', + sourceChunkId: 'chunk-1', + targetChunkId: 'chunk-2', + edgeType: 'forward', + referenceType: 'within-chain', + initialWeight: 0.9, + linkCount: 2, + }); - expect(archive.format).toBe('causantic-archive'); - expect(archive.version).toBe('1.0'); - expect(archive.metadata.chunkCount).toBe(100); - }); + const clusterId = insertTestCluster(db, { + id: 'cluster-1', + name: 'Auth cluster', + description: 'Authentication related', + exemplarIds: ['chunk-1'], }); + assignChunkToCluster(db, 'chunk-1', clusterId, 0.3); + assignChunkToCluster(db, 'chunk-2', clusterId, 0.5); + + // Add centroid and membership hash + db.prepare('UPDATE clusters SET centroid = ?, membership_hash = ? WHERE id = ?').run( + serializeEmbedding([0.1, 0.2, 0.3, 0.4]), + 'abc123', + clusterId, + ); + + createVectorsTable(db); + const embedding = Array.from({ length: 8 }, (_, i) => i * 0.1); + insertTestVector(db, 'chunk-1', embedding); + insertTestVector(db, 'chunk-2', embedding.map((v) => v + 0.01)); + + return { + chunkIds: ['chunk-1', 'chunk-2', 'chunk-3'], + edgeId, + clusterId, + embedding, + }; +} - describe('ArchiveMetadata interface', () => { - it('tracks counts correctly', () => { - const metadata: ArchiveMetadata = { - version: '1.0', - created: new Date().toISOString(), - chunkCount: 50, - edgeCount: 120, - clusterCount: 3, - projects: ['my-project'], - }; +describe('archive', () => { + let db: Database.Database; + let outputPath: string; - expect(metadata.chunkCount).toBe(50); - expect(metadata.edgeCount).toBe(120); - expect(metadata.clusterCount).toBe(3); - expect(metadata.projects).toContain('my-project'); - }); + beforeEach(() => { + db = createTestDb(); + setupTestDb(db); + outputPath = tempPath(); }); - describe('ExportedChunk interface', () => { - it('has all required fields', () => { - const chunk: ExportedChunk = { - id: 'chunk-abc', - sessionSlug: 'my-project', - content: 'This is chunk content', - startTime: '2024-01-15T10:00:00Z', - endTime: '2024-01-15T10:05:00Z', - turnIndices: [0, 1, 2], - }; + afterEach(() => { + teardownTestDb(db); + if (existsSync(outputPath)) { + unlinkSync(outputPath); + } + }); - expect(chunk.id).toBe('chunk-abc'); - expect(chunk.sessionSlug).toBe('my-project'); - expect(chunk.turnIndices).toEqual([0, 1, 2]); + describe('unencrypted round-trip', () => { + it('exports and imports all data correctly', async () => { + const { embedding } = seedTestData(db); + + const exportResult = await exportArchive({ outputPath }); + expect(exportResult.chunkCount).toBe(3); + expect(exportResult.edgeCount).toBe(1); + expect(exportResult.clusterCount).toBe(1); + expect(exportResult.vectorCount).toBe(2); + expect(exportResult.compressed).toBe(true); + expect(exportResult.encrypted).toBe(false); + expect(exportResult.fileSize).toBeGreaterThan(0); + + // Import into fresh db + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); + + const importResult = await importArchive({ inputPath: outputPath }); + expect(importResult.chunkCount).toBe(3); + expect(importResult.edgeCount).toBe(1); + expect(importResult.clusterCount).toBe(1); + expect(importResult.vectorCount).toBe(2); + expect(importResult.dryRun).toBe(false); + + // Verify data + const chunks = db.prepare('SELECT * FROM chunks ORDER BY id').all() as Array>; + expect(chunks).toHaveLength(3); + expect(chunks[0].id).toBe('chunk-1'); + expect(chunks[0].session_id).toBe('session-1'); + expect(chunks[0].session_slug).toBe('project-a'); + expect(chunks[0].project_path).toBe('/home/user/project-a'); + + const edges = db.prepare('SELECT * FROM edges').all() as Array>; + expect(edges).toHaveLength(1); + expect(edges[0].source_chunk_id).toBe('chunk-1'); + expect(edges[0].target_chunk_id).toBe('chunk-2'); + expect(edges[0].edge_type).toBe('forward'); + expect(edges[0].reference_type).toBe('within-chain'); + expect(edges[0].initial_weight).toBe(0.9); + expect(edges[0].link_count).toBe(2); + + const clusters = db.prepare('SELECT * FROM clusters').all() as Array>; + expect(clusters).toHaveLength(1); + expect(clusters[0].name).toBe('Auth cluster'); + expect(clusters[0].description).toBe('Authentication related'); + expect(clusters[0].membership_hash).toBe('abc123'); + + const members = db.prepare('SELECT * FROM chunk_clusters ORDER BY chunk_id').all() as Array>; + expect(members).toHaveLength(2); + expect(members[0].chunk_id).toBe('chunk-1'); + expect(members[0].distance).toBe(0.3); + + const vectors = db.prepare('SELECT * FROM vectors ORDER BY id').all() as Array>; + expect(vectors).toHaveLength(2); }); }); - describe('ExportedEdge interface', () => { - it('has all required fields', () => { - const edge: ExportedEdge = { - source: 'chunk-1', - target: 'chunk-2', - type: 'backward', - referenceType: 'within-chain', - weight: 0.85, - }; + describe('encrypted round-trip', () => { + it('encrypts and decrypts correctly', async () => { + seedTestData(db); + const password = 'test-password-123'; + + const exportResult = await exportArchive({ outputPath, password }); + expect(exportResult.encrypted).toBe(true); + + // Verify file starts with magic bytes + const fileContent = readFileSync(outputPath); + expect(fileContent.subarray(0, 4).equals(Buffer.from('CST\x00'))).toBe(true); - expect(edge.source).toBe('chunk-1'); - expect(edge.target).toBe('chunk-2'); - expect(edge.type).toBe('backward'); - expect(edge.referenceType).toBe('within-chain'); - expect(edge.weight).toBe(0.85); + // Import into fresh db + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); + + const importResult = await importArchive({ inputPath: outputPath, password }); + expect(importResult.chunkCount).toBe(3); + + const chunks = db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }; + expect(chunks.count).toBe(3); }); - }); - describe('ExportedCluster interface', () => { - it('has all required fields', () => { - const cluster: ExportedCluster = { - id: 'cluster-xyz', - name: 'Authentication', - description: 'Chunks related to user authentication', - memberChunkIds: ['chunk-1', 'chunk-2', 'chunk-3'], - }; + it('rejects wrong password', async () => { + seedTestData(db); + await exportArchive({ outputPath, password: 'correct-password' }); + + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); - expect(cluster.id).toBe('cluster-xyz'); - expect(cluster.name).toBe('Authentication'); - expect(cluster.memberChunkIds.length).toBe(3); + await expect( + importArchive({ inputPath: outputPath, password: 'wrong-password' }), + ).rejects.toThrow(); }); - it('allows null description', () => { - const cluster: ExportedCluster = { - id: 'cluster-xyz', - name: 'Unnamed Cluster', - description: null, - memberChunkIds: [], - }; + it('rejects missing password for encrypted archive', async () => { + seedTestData(db); + await exportArchive({ outputPath, password: 'test-pass' }); + + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); - expect(cluster.description).toBeNull(); + await expect(importArchive({ inputPath: outputPath })).rejects.toThrow( + 'Archive is encrypted', + ); }); }); - describe('ExportOptions interface', () => { - it('requires outputPath', () => { - const options: ExportOptions = { - outputPath: '/path/to/archive.causantic', - }; + describe('vector round-trip', () => { + it('preserves vector embeddings through serialize/JSON/deserialize', async () => { + const { embedding } = seedTestData(db); - expect(options.outputPath).toBe('/path/to/archive.causantic'); - }); + await exportArchive({ outputPath }); - it('supports optional password for encryption', () => { - const options: ExportOptions = { - outputPath: '/path/to/archive.causantic', - password: 'secret123', - }; + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); - expect(options.password).toBe('secret123'); - }); + await importArchive({ inputPath: outputPath }); - it('supports project filtering', () => { - const options: ExportOptions = { - outputPath: '/path/to/archive.causantic', - projects: ['project-a', 'project-c'], - }; + const vectors = db.prepare('SELECT id, embedding FROM vectors ORDER BY id').all() as Array<{ + id: string; + embedding: Buffer; + }>; + expect(vectors).toHaveLength(2); - expect(options.projects).toEqual(['project-a', 'project-c']); + const restored = deserializeEmbedding(vectors[0].embedding); + expect(restored).toHaveLength(embedding.length); + // Float32 precision: compare with tolerance + for (let i = 0; i < embedding.length; i++) { + expect(restored[i]).toBeCloseTo(embedding[i], 5); + } }); - it('supports redaction options', () => { - const options: ExportOptions = { - outputPath: '/path/to/archive.causantic', - redactPaths: true, - redactCode: true, - }; + it('skips vectors with --no-vectors', async () => { + seedTestData(db); + + const result = await exportArchive({ outputPath, noVectors: true }); + expect(result.vectorCount).toBe(0); + + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); - expect(options.redactPaths).toBe(true); - expect(options.redactCode).toBe(true); + const importResult = await importArchive({ inputPath: outputPath }); + expect(importResult.vectorCount).toBe(0); + + const vectors = db.prepare('SELECT COUNT(*) as count FROM vectors').get() as { count: number }; + expect(vectors.count).toBe(0); }); }); - describe('ImportOptions interface', () => { - it('requires inputPath', () => { - const options: ImportOptions = { - inputPath: '/path/to/archive.causantic', - }; + describe('cluster round-trip', () => { + it('preserves centroid, distances, exemplar IDs, and membership hash', async () => { + seedTestData(db); - expect(options.inputPath).toBe('/path/to/archive.causantic'); - }); + await exportArchive({ outputPath }); - it('supports optional password for decryption', () => { - const options: ImportOptions = { - inputPath: '/path/to/archive.causantic', - password: 'secret123', - }; + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); - expect(options.password).toBe('secret123'); - }); + await importArchive({ inputPath: outputPath }); - it('supports merge option', () => { - const options: ImportOptions = { - inputPath: '/path/to/archive.causantic', - merge: true, - }; + const cluster = db.prepare('SELECT * FROM clusters WHERE id = ?').get('cluster-1') as Record; + expect(cluster.name).toBe('Auth cluster'); + expect(cluster.description).toBe('Authentication related'); + expect(cluster.membership_hash).toBe('abc123'); + expect(cluster.exemplar_ids).toBe(JSON.stringify(['chunk-1'])); + + // Verify centroid survives round-trip + const centroid = deserializeEmbedding(cluster.centroid as Buffer); + expect(centroid).toHaveLength(4); + expect(centroid[0]).toBeCloseTo(0.1, 5); - expect(options.merge).toBe(true); + // Verify member distances + const members = db.prepare('SELECT * FROM chunk_clusters WHERE cluster_id = ? ORDER BY chunk_id').all('cluster-1') as Array>; + expect(members).toHaveLength(2); + expect(members[0].distance).toBe(0.3); + expect(members[1].distance).toBe(0.5); }); }); - describe('redaction functions', () => { - describe('redactFilePaths', () => { - it('redacts Unix-style paths', () => { - const content = 'Looking at /src/components/Button.tsx'; - const pattern = /(?:\/[\w.-]+)+\.\w+/g; - const redacted = content.replace(pattern, '[REDACTED_PATH]'); + describe('project filtering', () => { + it('exports only specified projects', async () => { + seedTestData(db); - expect(redacted).toBe('Looking at [REDACTED_PATH]'); + const result = await exportArchive({ + outputPath, + projects: ['project-a'], }); + expect(result.chunkCount).toBe(2); // only project-a chunks - it('redacts Windows-style paths', () => { - const content = 'Opening C:\\Users\\dev\\project\\file.ts'; - const pattern = /(?:[A-Z]:\\[\w.-\\]+)/g; - const redacted = content.replace(pattern, '[REDACTED_PATH]'); + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); - expect(redacted).toContain('[REDACTED_PATH]'); - }); + await importArchive({ inputPath: outputPath }); - it('redacts home directory paths', () => { - const content = 'Config at ~/dev/project/config.json'; - const pattern = /(?:~\/[\w.-\/]+)/g; - const redacted = content.replace(pattern, '[REDACTED_PATH]'); + const chunks = db.prepare('SELECT * FROM chunks').all(); + expect(chunks).toHaveLength(2); - expect(redacted).toBe('Config at [REDACTED_PATH]'); - }); + const slugs = (chunks as Array>).map((c) => c.session_slug); + expect(slugs).toEqual(['project-a', 'project-a']); + }); + }); - it('preserves non-path content', () => { - const content = 'This is just regular text without paths'; - const pattern = /(?:\/[\w.-]+)+\.\w+/g; - const redacted = content.replace(pattern, '[REDACTED_PATH]'); + describe('edge completeness', () => { + it('excludes edges with one endpoint outside the export', async () => { + const chunk1 = createSampleChunk({ id: 'chunk-a', sessionSlug: 'proj-1', sessionId: 'ses-1' }); + const chunk2 = createSampleChunk({ id: 'chunk-b', sessionSlug: 'proj-1', sessionId: 'ses-1' }); + const chunk3 = createSampleChunk({ id: 'chunk-c', sessionSlug: 'proj-2', sessionId: 'ses-2' }); + insertTestChunk(db, chunk1); + insertTestChunk(db, chunk2); + insertTestChunk(db, chunk3); + + // Edge within proj-1 + insertTestEdge(db, { + id: 'edge-internal', + sourceChunkId: 'chunk-a', + targetChunkId: 'chunk-b', + edgeType: 'forward', + }); + // Edge crossing projects + insertTestEdge(db, { + id: 'edge-cross', + sourceChunkId: 'chunk-a', + targetChunkId: 'chunk-c', + edgeType: 'forward', + }); - expect(redacted).toBe(content); + const result = await exportArchive({ + outputPath, + projects: ['proj-1'], }); + // Only the internal edge should be exported + expect(result.edgeCount).toBe(1); }); + }); - describe('redactCodeBlocks', () => { - it('redacts markdown code blocks', () => { - const content = 'Here is code:\n```typescript\nconst x = 1;\n```\nEnd.'; - const pattern = /```[\s\S]*?```/g; - const redacted = content.replace(pattern, '```\n[REDACTED_CODE]\n```'); + describe('redaction', () => { + it('redacts file paths', async () => { + seedTestData(db); - expect(redacted).toBe('Here is code:\n```\n[REDACTED_CODE]\n```\nEnd.'); - }); + await exportArchive({ outputPath, redactPaths: true }); - it('handles multiple code blocks', () => { - const content = '```js\ncode1\n```\nText\n```py\ncode2\n```'; - const pattern = /```[\s\S]*?```/g; - const redacted = content.replace(pattern, '```\n[REDACTED_CODE]\n```'); + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); - expect(redacted.match(/\[REDACTED_CODE\]/g)?.length).toBe(2); - }); + await importArchive({ inputPath: outputPath }); - it('preserves inline code', () => { - const content = 'Use the `function` keyword'; - const pattern = /```[\s\S]*?```/g; - const redacted = content.replace(pattern, '```\n[REDACTED_CODE]\n```'); + const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-1') as { content: string }; + expect(chunk.content).toContain('[REDACTED_PATH]'); + expect(chunk.content).not.toContain('/path/to/file.ts'); + }); - expect(redacted).toBe(content); // Inline code not redacted - }); + it('redacts code blocks', async () => { + seedTestData(db); + + await exportArchive({ outputPath, redactCode: true }); + + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); + + await importArchive({ inputPath: outputPath }); + + const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-2') as { content: string }; + expect(chunk.content).toContain('[REDACTED_CODE]'); + expect(chunk.content).not.toContain('const x = 1'); }); }); - describe('encryption detection', () => { - it('checks for magic bytes', () => { - const ENCRYPTED_MAGIC = Buffer.from('CST\x00'); - const encryptedFile = Buffer.concat([ENCRYPTED_MAGIC, Buffer.from('encrypted data')]); - const plainFile = Buffer.from('{"format":"causantic-archive"}'); + describe('merge vs replace', () => { + it('replace mode clears existing data', async () => { + seedTestData(db); + await exportArchive({ outputPath }); + + // Add extra data before import + const extra = createSampleChunk({ id: 'chunk-extra', sessionSlug: 'project-a', sessionId: 'ses-x' }); + insertTestChunk(db, extra); + expect( + (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count, + ).toBe(4); + + await importArchive({ inputPath: outputPath, merge: false }); + + // Replace should have cleared the extra chunk + expect( + (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count, + ).toBe(3); + }); + + it('merge mode preserves existing data', async () => { + seedTestData(db); + + // Export only project-a + await exportArchive({ outputPath, projects: ['project-a'] }); - const isEncrypted1 = encryptedFile.subarray(0, 4).equals(ENCRYPTED_MAGIC); - const isEncrypted2 = plainFile.subarray(0, 4).equals(ENCRYPTED_MAGIC); + // Now add different data + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); + const other = createSampleChunk({ id: 'chunk-other', sessionSlug: 'project-c', sessionId: 'ses-c' }); + insertTestChunk(db, other); - expect(isEncrypted1).toBe(true); - expect(isEncrypted2).toBe(false); + await importArchive({ inputPath: outputPath, merge: true }); + + // Should have both the imported chunks AND the existing one + const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count; + expect(count).toBe(3); // 2 from project-a + 1 existing }); }); - describe('archive validation', () => { - it('validates format field', () => { - const validArchive = { format: 'causantic-archive' }; - const invalidArchive = { format: 'other-format' }; + describe('dry-run import', () => { + it('reports counts without modifying database', async () => { + seedTestData(db); + await exportArchive({ outputPath }); + + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); - expect(validArchive.format === 'causantic-archive').toBe(true); - expect(invalidArchive.format === 'causantic-archive').toBe(false); + const result = await importArchive({ inputPath: outputPath, dryRun: true }); + expect(result.dryRun).toBe(true); + expect(result.chunkCount).toBe(3); + expect(result.edgeCount).toBe(1); + + // Database should be empty + const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count; + expect(count).toBe(0); }); }); - describe('merge behavior', () => { - it('merge=false clears existing data', () => { - const merge = false; - const clearExisting = !merge; + describe('v1.0 backward compatibility', () => { + it('imports v1.0 archive without vectors', async () => { + // Build a v1.0-style archive manually + const v1Archive = { + format: 'causantic-archive' as const, + version: '1.0', + created: new Date().toISOString(), + metadata: { + version: '1.0', + created: new Date().toISOString(), + chunkCount: 1, + edgeCount: 0, + clusterCount: 0, + vectorCount: 0, + embeddingDimensions: null, + projects: ['test'], + }, + chunks: [ + { + id: 'v1-chunk', + sessionId: 'ses-1', + sessionSlug: 'test', + projectPath: null, + content: 'v1 content', + startTime: '2024-01-01T00:00:00Z', + endTime: '2024-01-01T00:01:00Z', + turnIndices: [0], + }, + ], + edges: [], + clusters: [], + // No vectors array — v1.0 format + }; + + writeFileSync(outputPath, JSON.stringify(v1Archive)); + + createVectorsTable(db); + const result = await importArchive({ inputPath: outputPath }); + expect(result.chunkCount).toBe(1); + expect(result.vectorCount).toBe(0); - expect(clearExisting).toBe(true); + const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count; + expect(count).toBe(1); }); - it('merge=true preserves existing data', () => { - const merge = true; - const clearExisting = !merge; + it('imports v1.0 archive with memberChunkIds cluster format', async () => { + const v1Archive = { + format: 'causantic-archive' as const, + version: '1.0', + created: new Date().toISOString(), + metadata: { + version: '1.0', + created: new Date().toISOString(), + chunkCount: 1, + edgeCount: 0, + clusterCount: 1, + vectorCount: 0, + embeddingDimensions: null, + projects: ['test'], + }, + chunks: [ + { + id: 'v1-chunk', + sessionId: 'ses-1', + sessionSlug: 'test', + projectPath: null, + content: 'v1 content', + startTime: '2024-01-01T00:00:00Z', + endTime: '2024-01-01T00:01:00Z', + turnIndices: [0], + }, + ], + edges: [], + clusters: [ + { + id: 'v1-cluster', + name: 'Old Cluster', + description: null, + centroid: null, + exemplarIds: null, + membershipHash: null, + // v1.0 format used memberChunkIds instead of members + memberChunkIds: ['v1-chunk'], + }, + ], + }; + + writeFileSync(outputPath, JSON.stringify(v1Archive)); + + createVectorsTable(db); + await importArchive({ inputPath: outputPath }); - expect(clearExisting).toBe(false); + const members = db.prepare('SELECT * FROM chunk_clusters').all() as Array>; + expect(members).toHaveLength(1); + expect(members[0].chunk_id).toBe('v1-chunk'); + expect(members[0].distance).toBe(0); // default distance for v1.0 }); }); - describe('JSON serialization', () => { - it('serializes archive to JSON with formatting', () => { + describe('validation', () => { + it('rejects unknown version', () => { + const archive = { + format: 'causantic-archive', + version: '99.0', + metadata: { chunkCount: 0, edgeCount: 0, clusterCount: 0 }, + chunks: [], + edges: [], + clusters: [], + vectors: [], + } as unknown as Archive; + + const result = validateArchive(archive); + expect(result.valid).toBe(false); + expect(result.errors[0]).toContain('Unsupported archive version'); + }); + + it('rejects invalid format', () => { + const archive = { + format: 'not-causantic', + version: '1.1', + metadata: { chunkCount: 0, edgeCount: 0, clusterCount: 0 }, + chunks: [], + edges: [], + clusters: [], + vectors: [], + } as unknown as Archive; + + const result = validateArchive(archive); + expect(result.valid).toBe(false); + expect(result.errors[0]).toContain('Invalid archive format'); + }); + + it('warns on count mismatch', () => { + const archive: Archive = { + format: 'causantic-archive', + version: '1.1', + created: new Date().toISOString(), + metadata: { + version: '1.1', + created: new Date().toISOString(), + chunkCount: 99, + edgeCount: 0, + clusterCount: 0, + vectorCount: 0, + embeddingDimensions: null, + projects: [], + }, + chunks: [], + edges: [], + clusters: [], + vectors: [], + }; + + const result = validateArchive(archive); + expect(result.valid).toBe(true); + expect(result.warnings.some((w) => w.includes('chunkCount'))).toBe(true); + }); + + it('warns on dangling edge references', () => { const archive: Archive = { + format: 'causantic-archive', + version: '1.1', + created: new Date().toISOString(), + metadata: { + version: '1.1', + created: new Date().toISOString(), + chunkCount: 1, + edgeCount: 1, + clusterCount: 0, + vectorCount: 0, + embeddingDimensions: null, + projects: [], + }, + chunks: [ + { + id: 'chunk-1', + sessionId: 'ses', + sessionSlug: 'test', + projectPath: null, + content: 'test', + startTime: '2024-01-01T00:00:00Z', + endTime: '2024-01-01T00:01:00Z', + turnIndices: [], + }, + ], + edges: [ + { + id: 'edge-1', + source: 'chunk-1', + target: 'chunk-nonexistent', + type: 'forward', + referenceType: null, + weight: 1.0, + linkCount: 1, + }, + ], + clusters: [], + vectors: [], + }; + + const result = validateArchive(archive); + expect(result.valid).toBe(true); + expect(result.warnings.some((w) => w.includes('edge(s) reference chunks'))).toBe(true); + }); + + it('warns on v1.0 missing vectors', () => { + const archive = { format: 'causantic-archive', version: '1.0', - created: '2024-01-15T10:30:00Z', + created: new Date().toISOString(), metadata: { version: '1.0', - created: '2024-01-15T10:30:00Z', + created: new Date().toISOString(), chunkCount: 0, edgeCount: 0, clusterCount: 0, + vectorCount: 0, + embeddingDimensions: null, projects: [], }, chunks: [], edges: [], clusters: [], + vectors: [], + } as Archive; + + const result = validateArchive(archive); + expect(result.valid).toBe(true); + expect(result.warnings.some((w) => w.includes('version 1.0'))).toBe(true); + }); + + it('rejects invalid file at import', async () => { + writeFileSync(outputPath, JSON.stringify({ format: 'wrong', version: '1.0' })); + + await expect(importArchive({ inputPath: outputPath })).rejects.toThrow('Invalid archive'); + }); + }); + + describe('gzip compression', () => { + it('exports compressed data that is smaller than uncompressed', async () => { + seedTestData(db); + + await exportArchive({ outputPath }); + + const compressedSize = readFileSync(outputPath).length; + + // The file should be gzip (starts with gzip magic bytes) + const fileContent = readFileSync(outputPath); + expect(fileContent[0]).toBe(0x1f); + expect(fileContent[1]).toBe(0x8b); + + // Compression should produce meaningful reduction (at least some savings) + expect(compressedSize).toBeGreaterThan(0); + }); + + it('imports plain JSON (backward compat)', async () => { + // Write a plain JSON archive (no gzip) + const plainArchive: Archive = { + format: 'causantic-archive', + version: '1.1', + created: new Date().toISOString(), + metadata: { + version: '1.1', + created: new Date().toISOString(), + chunkCount: 1, + edgeCount: 0, + clusterCount: 0, + vectorCount: 0, + embeddingDimensions: null, + projects: ['test'], + }, + chunks: [ + { + id: 'plain-chunk', + sessionId: 'ses-1', + sessionSlug: 'test', + projectPath: null, + content: 'plain text', + startTime: '2024-01-01T00:00:00Z', + endTime: '2024-01-01T00:01:00Z', + turnIndices: [], + }, + ], + edges: [], + clusters: [], + vectors: [], }; + writeFileSync(outputPath, JSON.stringify(plainArchive)); - const json = JSON.stringify(archive, null, 2); + createVectorsTable(db); + const result = await importArchive({ inputPath: outputPath }); + expect(result.chunkCount).toBe(1); + }); + }); - expect(json).toContain('"format": "causantic-archive"'); - expect(json).toContain('\n'); // Pretty printed + describe('empty archive', () => { + it('exports and imports empty database', async () => { + const result = await exportArchive({ outputPath }); + expect(result.chunkCount).toBe(0); + expect(result.edgeCount).toBe(0); + expect(result.clusterCount).toBe(0); + expect(result.vectorCount).toBe(0); + + teardownTestDb(db); + db = createTestDb(); + setupTestDb(db); + createVectorsTable(db); + + const importResult = await importArchive({ inputPath: outputPath }); + expect(importResult.chunkCount).toBe(0); }); + }); - it('parses JSON back to archive', () => { - const json = - '{"format":"causantic-archive","version":"1.0","chunks":[],"edges":[],"clusters":[]}'; - const parsed = JSON.parse(json); + describe('missing file', () => { + it('throws on non-existent file', async () => { + await expect(importArchive({ inputPath: '/nonexistent/file.json' })).rejects.toThrow( + 'File not found', + ); + }); + }); - expect(parsed.format).toBe('causantic-archive'); + describe('export result', () => { + it('returns accurate counts and metadata', async () => { + seedTestData(db); + + const result = await exportArchive({ outputPath }); + expect(result).toEqual({ + chunkCount: 3, + edgeCount: 1, + clusterCount: 1, + vectorCount: 2, + fileSize: expect.any(Number), + compressed: true, + encrypted: false, + }); }); }); }); From 6612498b4d506f1fd1056739a97d916d30befd9b Mon Sep 17 00:00:00 2001 From: Greg von Nessi Date: Mon, 16 Feb 2026 22:24:16 +0000 Subject: [PATCH 2/3] Fix lint warnings in archive tests Remove unused imports (gzipSync, ExportResult, ImportResult) and unused destructured variable. --- test/storage/archive.test.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/storage/archive.test.ts b/test/storage/archive.test.ts index 4f81a1f..49d160b 100644 --- a/test/storage/archive.test.ts +++ b/test/storage/archive.test.ts @@ -8,8 +8,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { writeFileSync, readFileSync, unlinkSync, existsSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { gzipSync } from 'node:zlib'; -import Database from 'better-sqlite3-multiple-ciphers'; +import type Database from 'better-sqlite3-multiple-ciphers'; import { createTestDb, setupTestDb, @@ -25,8 +24,6 @@ import { importArchive, validateArchive, type Archive, - type ExportResult, - type ImportResult, } from '../../src/storage/archive.js'; import { serializeEmbedding, deserializeEmbedding } from '../../src/utils/embedding-utils.js'; @@ -152,7 +149,7 @@ describe('archive', () => { describe('unencrypted round-trip', () => { it('exports and imports all data correctly', async () => { - const { embedding } = seedTestData(db); + seedTestData(db); const exportResult = await exportArchive({ outputPath }); expect(exportResult.chunkCount).toBe(3); From be0359781f28f989fea8954e1616d587718eb975 Mon Sep 17 00:00:00 2001 From: Greg von Nessi Date: Mon, 16 Feb 2026 22:25:49 +0000 Subject: [PATCH 3/3] Format with prettier --- src/cli/commands/archive.ts | 5 +-- src/storage/archive.ts | 13 +++--- test/storage/archive.test.ts | 87 ++++++++++++++++++++++++++++-------- 3 files changed, 76 insertions(+), 29 deletions(-) diff --git a/src/cli/commands/archive.ts b/src/cli/commands/archive.ts index e536c62..c401112 100644 --- a/src/cli/commands/archive.ts +++ b/src/cli/commands/archive.ts @@ -70,10 +70,7 @@ export const exportCommand: Command = { `${formatCount(result.clusterCount)} clusters`, `${formatCount(result.vectorCount)} vectors`, ]; - const suffix = [ - result.compressed ? 'compressed' : null, - result.encrypted ? 'encrypted' : null, - ] + const suffix = [result.compressed ? 'compressed' : null, result.encrypted ? 'encrypted' : null] .filter(Boolean) .join(', '); diff --git a/src/storage/archive.ts b/src/storage/archive.ts index e49d045..fd8e44e 100644 --- a/src/storage/archive.ts +++ b/src/storage/archive.ts @@ -326,9 +326,9 @@ export async function exportArchive(options: ExportOptions): Promise '?').join(',')}) `); - const clusterIds = ( - clusterIdsQuery.all(...chunkIds) as Array<{ cluster_id: string }> - ).map((r) => r.cluster_id); + const clusterIds = (clusterIdsQuery.all(...chunkIds) as Array<{ cluster_id: string }>).map( + (r) => r.cluster_id, + ); if (clusterIds.length > 0) { const clustersQuery = db.prepare(` @@ -601,9 +601,10 @@ export async function importArchive(options: ImportOptions): Promise ({ chunkId: id, distance: 0 }), - ) ?? + (cluster as unknown as { memberChunkIds?: string[] }).memberChunkIds?.map((id) => ({ + chunkId: id, + distance: 0, + })) ?? []; for (const member of members) { insertMember.run(member.chunkId, cluster.id, member.distance); diff --git a/test/storage/archive.test.ts b/test/storage/archive.test.ts index 49d160b..6c02bb3 100644 --- a/test/storage/archive.test.ts +++ b/test/storage/archive.test.ts @@ -29,7 +29,10 @@ import { serializeEmbedding, deserializeEmbedding } from '../../src/utils/embedd // Helper to create a temp file path function tempPath(suffix = '.json'): string { - return join(tmpdir(), `causantic-test-${Date.now()}-${Math.random().toString(36).slice(2)}${suffix}`); + return join( + tmpdir(), + `causantic-test-${Date.now()}-${Math.random().toString(36).slice(2)}${suffix}`, + ); } // Helper to create vectors table in test db @@ -120,7 +123,11 @@ function seedTestData(db: Database.Database): { createVectorsTable(db); const embedding = Array.from({ length: 8 }, (_, i) => i * 0.1); insertTestVector(db, 'chunk-1', embedding); - insertTestVector(db, 'chunk-2', embedding.map((v) => v + 0.01)); + insertTestVector( + db, + 'chunk-2', + embedding.map((v) => v + 0.01), + ); return { chunkIds: ['chunk-1', 'chunk-2', 'chunk-3'], @@ -174,7 +181,9 @@ describe('archive', () => { expect(importResult.dryRun).toBe(false); // Verify data - const chunks = db.prepare('SELECT * FROM chunks ORDER BY id').all() as Array>; + const chunks = db.prepare('SELECT * FROM chunks ORDER BY id').all() as Array< + Record + >; expect(chunks).toHaveLength(3); expect(chunks[0].id).toBe('chunk-1'); expect(chunks[0].session_id).toBe('session-1'); @@ -196,12 +205,16 @@ describe('archive', () => { expect(clusters[0].description).toBe('Authentication related'); expect(clusters[0].membership_hash).toBe('abc123'); - const members = db.prepare('SELECT * FROM chunk_clusters ORDER BY chunk_id').all() as Array>; + const members = db.prepare('SELECT * FROM chunk_clusters ORDER BY chunk_id').all() as Array< + Record + >; expect(members).toHaveLength(2); expect(members[0].chunk_id).toBe('chunk-1'); expect(members[0].distance).toBe(0.3); - const vectors = db.prepare('SELECT * FROM vectors ORDER BY id').all() as Array>; + const vectors = db.prepare('SELECT * FROM vectors ORDER BY id').all() as Array< + Record + >; expect(vectors).toHaveLength(2); }); }); @@ -299,7 +312,9 @@ describe('archive', () => { const importResult = await importArchive({ inputPath: outputPath }); expect(importResult.vectorCount).toBe(0); - const vectors = db.prepare('SELECT COUNT(*) as count FROM vectors').get() as { count: number }; + const vectors = db.prepare('SELECT COUNT(*) as count FROM vectors').get() as { + count: number; + }; expect(vectors.count).toBe(0); }); }); @@ -317,7 +332,10 @@ describe('archive', () => { await importArchive({ inputPath: outputPath }); - const cluster = db.prepare('SELECT * FROM clusters WHERE id = ?').get('cluster-1') as Record; + const cluster = db.prepare('SELECT * FROM clusters WHERE id = ?').get('cluster-1') as Record< + string, + unknown + >; expect(cluster.name).toBe('Auth cluster'); expect(cluster.description).toBe('Authentication related'); expect(cluster.membership_hash).toBe('abc123'); @@ -329,7 +347,9 @@ describe('archive', () => { expect(centroid[0]).toBeCloseTo(0.1, 5); // Verify member distances - const members = db.prepare('SELECT * FROM chunk_clusters WHERE cluster_id = ? ORDER BY chunk_id').all('cluster-1') as Array>; + const members = db + .prepare('SELECT * FROM chunk_clusters WHERE cluster_id = ? ORDER BY chunk_id') + .all('cluster-1') as Array>; expect(members).toHaveLength(2); expect(members[0].distance).toBe(0.3); expect(members[1].distance).toBe(0.5); @@ -363,9 +383,21 @@ describe('archive', () => { describe('edge completeness', () => { it('excludes edges with one endpoint outside the export', async () => { - const chunk1 = createSampleChunk({ id: 'chunk-a', sessionSlug: 'proj-1', sessionId: 'ses-1' }); - const chunk2 = createSampleChunk({ id: 'chunk-b', sessionSlug: 'proj-1', sessionId: 'ses-1' }); - const chunk3 = createSampleChunk({ id: 'chunk-c', sessionSlug: 'proj-2', sessionId: 'ses-2' }); + const chunk1 = createSampleChunk({ + id: 'chunk-a', + sessionSlug: 'proj-1', + sessionId: 'ses-1', + }); + const chunk2 = createSampleChunk({ + id: 'chunk-b', + sessionSlug: 'proj-1', + sessionId: 'ses-1', + }); + const chunk3 = createSampleChunk({ + id: 'chunk-c', + sessionSlug: 'proj-2', + sessionId: 'ses-2', + }); insertTestChunk(db, chunk1); insertTestChunk(db, chunk2); insertTestChunk(db, chunk3); @@ -407,7 +439,9 @@ describe('archive', () => { await importArchive({ inputPath: outputPath }); - const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-1') as { content: string }; + const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-1') as { + content: string; + }; expect(chunk.content).toContain('[REDACTED_PATH]'); expect(chunk.content).not.toContain('/path/to/file.ts'); }); @@ -424,7 +458,9 @@ describe('archive', () => { await importArchive({ inputPath: outputPath }); - const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-2') as { content: string }; + const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-2') as { + content: string; + }; expect(chunk.content).toContain('[REDACTED_CODE]'); expect(chunk.content).not.toContain('const x = 1'); }); @@ -436,7 +472,11 @@ describe('archive', () => { await exportArchive({ outputPath }); // Add extra data before import - const extra = createSampleChunk({ id: 'chunk-extra', sessionSlug: 'project-a', sessionId: 'ses-x' }); + const extra = createSampleChunk({ + id: 'chunk-extra', + sessionSlug: 'project-a', + sessionId: 'ses-x', + }); insertTestChunk(db, extra); expect( (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count, @@ -461,13 +501,18 @@ describe('archive', () => { db = createTestDb(); setupTestDb(db); createVectorsTable(db); - const other = createSampleChunk({ id: 'chunk-other', sessionSlug: 'project-c', sessionId: 'ses-c' }); + const other = createSampleChunk({ + id: 'chunk-other', + sessionSlug: 'project-c', + sessionId: 'ses-c', + }); insertTestChunk(db, other); await importArchive({ inputPath: outputPath, merge: true }); // Should have both the imported chunks AND the existing one - const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count; + const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }) + .count; expect(count).toBe(3); // 2 from project-a + 1 existing }); }); @@ -487,7 +532,8 @@ describe('archive', () => { expect(result.edgeCount).toBe(1); // Database should be empty - const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count; + const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }) + .count; expect(count).toBe(0); }); }); @@ -533,7 +579,8 @@ describe('archive', () => { expect(result.chunkCount).toBe(1); expect(result.vectorCount).toBe(0); - const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count; + const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }) + .count; expect(count).toBe(1); }); @@ -584,7 +631,9 @@ describe('archive', () => { createVectorsTable(db); await importArchive({ inputPath: outputPath }); - const members = db.prepare('SELECT * FROM chunk_clusters').all() as Array>; + const members = db.prepare('SELECT * FROM chunk_clusters').all() as Array< + Record + >; expect(members).toHaveLength(1); expect(members[0].chunk_id).toBe('v1-chunk'); expect(members[0].distance).toBe(0); // default distance for v1.0