From c49e88f1caef58faf037ef0b71da7f7123a90342 Mon Sep 17 00:00:00 2001
From: Greg von Nessi <greg.vonnessi@entrolution.ai>
Date: Mon, 16 Feb 2026 22:22:17 +0000
Subject: [PATCH 1/3] Fix export/import: runtime bugs, add vectors,
 compression, validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

archive.ts had 7 column/table name mismatches vs the actual schema
(source_id→source_chunk_id, type→edge_type, weight→initial_weight,
cluster_members→chunk_clusters, etc.) that would crash at runtime.
All 19 tests were interface-only so never caught this.

Changes:
- Fix all SQL column/table names to match schema.sql
- Add vector embedding export/import (semantic search works after import)
- Add full cluster data: centroid, exemplar IDs, distances, membership hash
- Add gzip compression (auto-detected on import)
- Add validateArchive() with version, count, and referential integrity checks
- Add dry-run import option
- Add ExportResult/ImportResult return types with summary
- Filter edges to require both endpoints in export (no dangling refs)
- Wire CLI flags: --projects, --redact-paths, --redact-code, --no-vectors, --dry-run
- Print formatted summary after export/import operations
- Bump archive version to 1.1 (backward-compatible with 1.0)
- Replace 19 interface-only tests with 27 integration tests using real DBs
- Fix docs: remove phantom --format/--replace flags, fix magic bytes (CST\0)
---
 docs/guides/backup-restore.md  |  69 ++-
 docs/reference/cli-commands.md |  25 +-
 src/cli/commands/archive.ts    |  70 ++-
 src/storage/archive.ts         | 486 +++++++++++++----
 test/storage/archive.test.ts   | 956 +++++++++++++++++++++++++--------
 5 files changed, 1263 insertions(+), 343 deletions(-)

diff --git a/docs/guides/backup-restore.md b/docs/guides/backup-restore.md
index d6987ed..5aecf1f 100644
--- a/docs/guides/backup-restore.md
+++ b/docs/guides/backup-restore.md
@@ -1,6 +1,6 @@
 # Backup & Restore
 
-Causantic supports encrypted exports for secure backup and migration of your memory data.
+Causantic supports encrypted, compressed exports for secure backup and migration of your memory data.
 
 ## Export Memory
 
@@ -25,6 +25,7 @@ npx causantic export --output backup.json --no-encrypt
 
 ```bash
 npx causantic export --output backup.causantic --projects my-project
+npx causantic export --output backup.causantic --projects project-a,project-b
 ```
 
 ### With Redaction (for sharing)
@@ -34,6 +35,14 @@ npx causantic export --output backup.causantic --projects my-project
 npx causantic export --output backup.causantic --redact-paths --redact-code
 ```
 
+### Without Vectors (lightweight)
+
+```bash
+# Skip vector embeddings for a smaller file
+# Note: semantic search will not work after import until re-embedding
+npx causantic export --output backup.causantic --no-vectors
+```
+
 ## Import Memory
 
 ### Encrypted Archive
@@ -54,6 +63,12 @@ npx causantic import backup.causantic --merge
 
 Without `--merge`, existing data is replaced.
 
+### Dry Run (validate without importing)
+
+```bash
+npx causantic import backup.causantic --dry-run
+```
+
 ## Environment Variable (CI/Scripts)
 
 For non-interactive environments, set the password via environment variable:
@@ -71,10 +86,26 @@ CAUSANTIC_EXPORT_PASSWORD="your-secure-password" npx causantic import backup.cau
 | Data | Description |
 |------|-------------|
 | Chunks | Conversation segments with semantic content |
-| Edges | Causal relationships (backward/forward links) |
-| Clusters | Topic groupings from HDBSCAN clustering |
+| Edges | Causal relationships (forward/backward links) with identity and link counts |
+| Clusters | Topic groupings with centroids, exemplar IDs, distances, and membership hashes |
+| Vectors | Embedding vectors for semantic search (skip with `--no-vectors`) |
+
+## Archive Format
+
+### Version History
 
-## Encryption Details
+| Version | Changes |
+|---------|---------|
+| 1.1 | Added vector embeddings, full cluster data (centroid, distances, exemplars), gzip compression, edge identity |
+| 1.0 | Initial format (chunks, edges, basic clusters) |
+
+Archives are backward-compatible: v1.1 can import v1.0 archives (with a warning that vectors are missing).
+
+### Compression
+
+All v1.1 exports are gzip-compressed. On import, Causantic auto-detects compressed, encrypted, and plain JSON formats.
+
+### Encryption Details
 
 Causantic uses strong encryption for archive files:
 
@@ -83,33 +114,35 @@ Causantic uses strong encryption for archive files:
 - **Nonce**: 12 bytes (random per encryption)
 - **Salt**: 16 bytes (unique per password)
 
-The archive format uses magic bytes (`Causantic\0`) to identify encrypted files.
-
-## File Formats
+The archive format uses magic bytes (`CST\0`) to identify encrypted files.
 
-### Encrypted (.causantic)
+### File Structure
 
-Binary format with structure:
+**Encrypted + compressed:**
 ```
-[Magic: 4 bytes "Causantic\0"]
+[Magic: 4 bytes "CST\0"]
 [Salt: 16 bytes]
 [Nonce: 12 bytes]
 [Auth Tag: 16 bytes]
-[Ciphertext: variable]
+[Ciphertext: gzip(JSON) encrypted with AES-256-GCM]
 ```
 
-### Unencrypted (.json)
+**Unencrypted compressed (default):**
+```
+[gzip(JSON)]
+```
 
-Standard JSON with structure:
+**Plain JSON (v1.0 backward compat):**
 ```json
 {
   "format": "causantic-archive",
-  "version": "1.0",
+  "version": "1.1",
   "created": "2024-01-15T10:30:00Z",
   "metadata": { ... },
   "chunks": [ ... ],
   "edges": [ ... ],
-  "clusters": [ ... ]
+  "clusters": [ ... ],
+  "vectors": [ ... ]
 }
 ```
 
@@ -163,3 +196,9 @@ The file is not a valid Causantic archive. Check that:
 ### "Decryption failed"
 
 Wrong password. Re-enter the password carefully.
+
+### "Archive version 1.0: no vector embeddings"
+
+The archive was created with v1.0 (before vector support). After import:
+- Semantic search (`recall`, `search`, `predict`) won't work until vectors are regenerated
+- Run `npx causantic maintenance run scan-projects` to re-ingest and generate embeddings
diff --git a/docs/reference/cli-commands.md b/docs/reference/cli-commands.md
index 47fb503..58cf26d 100644
--- a/docs/reference/cli-commands.md
+++ b/docs/reference/cli-commands.md
@@ -241,7 +241,7 @@ npx causantic encryption audit 20
 
 ### export
 
-Export memory data.
+Export memory data. Archives are gzip-compressed by default and include vector embeddings for semantic search continuity.
 
 ```bash
 npx causantic export [options]
@@ -251,19 +251,25 @@ npx causantic export [options]
 
 | Option | Description |
 |--------|-------------|
-| `--output <path>` | Output file path |
+| `--output <path>` | Output file path (default: `causantic-backup.causantic`) |
 | `--no-encrypt` | Skip encryption |
-| `--format <fmt>` | Output format (json, archive) |
+| `--projects <slugs>` | Comma-separated project slugs to export |
+| `--redact-paths` | Redact file paths in content |
+| `--redact-code` | Redact code blocks in content |
+| `--no-vectors` | Skip vector embeddings (smaller file, but semantic search won't work after import) |
 
 **Example**:
 ```bash
-npx causantic export --output backup.causantic.json
+npx causantic export --output backup.causantic
 npx causantic export --output backup.json --no-encrypt
+npx causantic export --projects my-project,other-project --no-encrypt --output filtered.json
+npx causantic export --redact-paths --redact-code --output sanitized.causantic
+npx causantic export --no-vectors --output lightweight.causantic
 ```
 
 ### import
 
-Import memory data.
+Import memory data. Supports encrypted, compressed, and plain JSON archives.
 
 ```bash
 npx causantic import <file> [options]
@@ -273,13 +279,14 @@ npx causantic import <file> [options]
 
 | Option | Description |
 |--------|-------------|
-| `--merge` | Merge with existing data |
-| `--replace` | Replace existing data |
+| `--merge` | Merge with existing data (default: replace) |
+| `--dry-run` | Validate and report without importing |
 
 **Example**:
 ```bash
-npx causantic import backup.causantic.json
-npx causantic import backup.causantic.json --merge
+npx causantic import backup.causantic
+npx causantic import backup.causantic --merge
+npx causantic import backup.causantic --dry-run
 ```
 
 ### stats
diff --git a/src/cli/commands/archive.ts b/src/cli/commands/archive.ts
index 91026c3..e536c62 100644
--- a/src/cli/commands/archive.ts
+++ b/src/cli/commands/archive.ts
@@ -1,15 +1,36 @@
 import type { Command } from '../types.js';
 import { promptPassword, isEncryptedArchive } from '../utils.js';
 
+function formatSize(bytes: number): string {
+  if (bytes < 1024) return `${bytes} B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+}
+
+function formatCount(n: number): string {
+  return n.toLocaleString();
+}
+
 export const exportCommand: Command = {
   name: 'export',
   description: 'Export memory data',
-  usage: 'causantic export --output <path> [--no-encrypt]',
+  usage:
+    'causantic export --output <path> [--no-encrypt] [--projects <slugs>] [--redact-paths] [--redact-code] [--no-vectors]',
   handler: async (args) => {
     const { exportArchive } = await import('../../storage/archive.js');
     const outputIndex = args.indexOf('--output');
     const outputPath = outputIndex >= 0 ? args[outputIndex + 1] : 'causantic-backup.causantic';
     const noEncrypt = args.includes('--no-encrypt');
+    const noVectors = args.includes('--no-vectors');
+    const redactPaths = args.includes('--redact-paths');
+    const redactCode = args.includes('--redact-code');
+
+    // Parse --projects flag
+    const projectsIndex = args.indexOf('--projects');
+    const projects =
+      projectsIndex >= 0 && args[projectsIndex + 1]
+        ? args[projectsIndex + 1].split(',').map((s) => s.trim())
+        : undefined;
 
     let password: string | undefined;
     if (!noEncrypt) {
@@ -34,26 +55,48 @@ export const exportCommand: Command = {
       }
     }
 
-    await exportArchive({
+    const result = await exportArchive({
       outputPath,
       password,
+      projects,
+      redactPaths,
+      redactCode,
+      noVectors,
     });
-    console.log(`Exported to ${outputPath}`);
+
+    const parts = [
+      `${formatCount(result.chunkCount)} chunks`,
+      `${formatCount(result.edgeCount)} edges`,
+      `${formatCount(result.clusterCount)} clusters`,
+      `${formatCount(result.vectorCount)} vectors`,
+    ];
+    const suffix = [
+      result.compressed ? 'compressed' : null,
+      result.encrypted ? 'encrypted' : null,
+    ]
+      .filter(Boolean)
+      .join(', ');
+
+    console.log(`Exported: ${parts.join(', ')} (${formatSize(result.fileSize)} ${suffix})`);
+    console.log(`File: ${outputPath}`);
   },
 };
 
 export const importCommand: Command = {
   name: 'import',
   description: 'Import memory data',
-  usage: 'causantic import <file> [--merge]',
+  usage: 'causantic import <file> [--merge] [--dry-run]',
   handler: async (args) => {
     if (args.length === 0) {
       console.error('Error: File path required');
       process.exit(2);
     }
     const { importArchive } = await import('../../storage/archive.js');
-    const inputPath = args[0];
+
+    // Find file path (first arg that isn't a flag)
+    const inputPath = args.find((a) => !a.startsWith('--'))!;
     const merge = args.includes('--merge');
+    const dryRun = args.includes('--dry-run');
 
     const encrypted = await isEncryptedArchive(inputPath);
 
@@ -75,11 +118,24 @@ export const importCommand: Command = {
       }
     }
 
-    await importArchive({
+    const result = await importArchive({
       inputPath,
       password,
       merge,
+      dryRun,
     });
-    console.log('Import complete.');
+
+    const parts = [
+      `${formatCount(result.chunkCount)} chunks`,
+      `${formatCount(result.edgeCount)} edges`,
+      `${formatCount(result.clusterCount)} clusters`,
+      `${formatCount(result.vectorCount)} vectors`,
+    ];
+
+    if (result.dryRun) {
+      console.log(`Dry run — would import: ${parts.join(', ')}`);
+    } else {
+      console.log(`Imported: ${parts.join(', ')}`);
+    }
   },
 };
diff --git a/src/storage/archive.ts b/src/storage/archive.ts
index 01d9ff9..e49d045 100644
--- a/src/storage/archive.ts
+++ b/src/storage/archive.ts
@@ -1,22 +1,31 @@
 /**
  * Export/import functionality for Causantic memory data.
  *
- * Supports encrypted and unencrypted archives.
+ * Supports encrypted and unencrypted archives with optional gzip compression.
+ * Archive format v1.1 adds vector embeddings and full cluster data.
  */
 
 import { readFileSync, writeFileSync, existsSync } from 'node:fs';
+import { gzipSync, gunzipSync } from 'node:zlib';
 import { encrypt, decrypt, serializeEncrypted, deserializeEncrypted } from './encryption.js';
-import { getDb } from './db.js';
+import { getDb, generateId } from './db.js';
+import { serializeEmbedding, deserializeEmbedding } from '../utils/embedding-utils.js';
 import { createLogger } from '../utils/logger.js';
 
 const log = createLogger('archive');
 
 /** Archive format version */
-const ARCHIVE_VERSION = '1.0';
+const ARCHIVE_VERSION = '1.1';
+
+/** Accepted versions on import */
+const ACCEPTED_VERSIONS = ['1.0', '1.1'];
 
 /** Magic bytes for encrypted archives */
 const ENCRYPTED_MAGIC = Buffer.from('CST\x00');
 
+/** Magic bytes for gzip */
+const GZIP_MAGIC = Buffer.from([0x1f, 0x8b]);
+
 /** Archive metadata */
 export interface ArchiveMetadata {
   version: string;
@@ -24,13 +33,17 @@ export interface ArchiveMetadata {
   chunkCount: number;
   edgeCount: number;
   clusterCount: number;
+  vectorCount: number;
+  embeddingDimensions: number | null;
   projects: string[];
 }
 
 /** Chunk data for export */
 export interface ExportedChunk {
   id: string;
+  sessionId: string;
   sessionSlug: string;
+  projectPath: string | null;
   content: string;
   startTime: string;
   endTime: string;
@@ -39,19 +52,36 @@ export interface ExportedChunk {
 
 /** Edge data for export */
 export interface ExportedEdge {
+  id: string;
   source: string;
   target: string;
   type: string;
-  referenceType: string;
+  referenceType: string | null;
   weight: number;
+  linkCount: number;
+}
+
+/** Cluster member with distance */
+export interface ClusterMember {
+  chunkId: string;
+  distance: number;
 }
 
 /** Cluster data for export */
 export interface ExportedCluster {
   id: string;
-  name: string;
+  name: string | null;
   description: string | null;
-  memberChunkIds: string[];
+  centroid: number[] | null;
+  exemplarIds: string[] | null;
+  membershipHash: string | null;
+  members: ClusterMember[];
+}
+
+/** Vector data for export */
+export interface ExportedVector {
+  chunkId: string;
+  embedding: number[];
 }
 
 /** Complete archive structure */
@@ -63,6 +93,7 @@ export interface Archive {
   chunks: ExportedChunk[];
   edges: ExportedEdge[];
   clusters: ExportedCluster[];
+  vectors: ExportedVector[];
 }
 
 /** Export options */
@@ -77,6 +108,19 @@ export interface ExportOptions {
   redactPaths?: boolean;
   /** Redact code blocks */
   redactCode?: boolean;
+  /** Skip vector embeddings */
+  noVectors?: boolean;
+}
+
+/** Export result */
+export interface ExportResult {
+  chunkCount: number;
+  edgeCount: number;
+  clusterCount: number;
+  vectorCount: number;
+  fileSize: number;
+  compressed: boolean;
+  encrypted: boolean;
 }
 
 /** Import options */
@@ -87,12 +131,104 @@ export interface ImportOptions {
   password?: string;
   /** Merge with existing data */
   merge?: boolean;
+  /** Validate and report without importing */
+  dryRun?: boolean;
+}
+
+/** Import result */
+export interface ImportResult {
+  chunkCount: number;
+  edgeCount: number;
+  clusterCount: number;
+  vectorCount: number;
+  dryRun: boolean;
+}
+
+/** Validation result */
+export interface ValidationResult {
+  valid: boolean;
+  errors: string[];
+  warnings: string[];
+}
+
+/**
+ * Validate an archive structure before import.
+ */
+export function validateArchive(archive: Archive): ValidationResult {
+  const errors: string[] = [];
+  const warnings: string[] = [];
+
+  // Version check
+  if (!archive.version || !ACCEPTED_VERSIONS.includes(archive.version)) {
+    errors.push(`Unsupported archive version: ${archive.version ?? 'missing'}`);
+  }
+
+  // Format check
+  if (archive.format !== 'causantic-archive') {
+    errors.push(`Invalid archive format: ${archive.format ?? 'missing'}`);
+  }
+
+  // Count verification
+  if (archive.metadata) {
+    if (archive.metadata.chunkCount !== archive.chunks?.length) {
+      warnings.push(
+        `Metadata chunkCount (${archive.metadata.chunkCount}) does not match actual (${archive.chunks?.length ?? 0})`,
+      );
+    }
+    if (archive.metadata.edgeCount !== archive.edges?.length) {
+      warnings.push(
+        `Metadata edgeCount (${archive.metadata.edgeCount}) does not match actual (${archive.edges?.length ?? 0})`,
+      );
+    }
+    if (archive.metadata.clusterCount !== archive.clusters?.length) {
+      warnings.push(
+        `Metadata clusterCount (${archive.metadata.clusterCount}) does not match actual (${archive.clusters?.length ?? 0})`,
+      );
+    }
+  }
+
+  // Edge referential integrity
+  if (archive.chunks && archive.edges) {
+    const chunkIdSet = new Set(archive.chunks.map((c) => c.id));
+    let danglingCount = 0;
+    for (const edge of archive.edges) {
+      if (!chunkIdSet.has(edge.source) || !chunkIdSet.has(edge.target)) {
+        danglingCount++;
+      }
+    }
+    if (danglingCount > 0) {
+      warnings.push(`${danglingCount} edge(s) reference chunks not in the archive`);
+    }
+  }
+
+  // v1.0 backward compat warning
+  if (archive.version === '1.0') {
+    warnings.push(
+      'Archive version 1.0: no vector embeddings included. Semantic search will not work until re-embedding.',
+    );
+  }
+
+  // Embedding dimension mismatch detection
+  if (archive.metadata?.embeddingDimensions && archive.vectors?.length > 0) {
+    const sampleDims = archive.vectors[0].embedding?.length;
+    if (sampleDims && sampleDims !== archive.metadata.embeddingDimensions) {
+      warnings.push(
+        `Embedding dimensions mismatch: metadata says ${archive.metadata.embeddingDimensions}, sample vector has ${sampleDims}`,
+      );
+    }
+  }
+
+  return {
+    valid: errors.length === 0,
+    errors,
+    warnings,
+  };
 }
 
 /**
  * Export memory data to an archive.
  */
-export async function exportArchive(options: ExportOptions): Promise<void> {
+export async function exportArchive(options: ExportOptions): Promise<ExportResult> {
   const db = getDb();
 
   // Get unique projects
@@ -106,13 +242,15 @@ export async function exportArchive(options: ExportOptions): Promise<void> {
 
   // Export chunks
   const chunksQuery = db.prepare(`
-    SELECT id, session_slug, content, start_time, end_time, turn_indices
+    SELECT id, session_id, session_slug, project_path, content, start_time, end_time, turn_indices
     FROM chunks
     WHERE session_slug IN (${targetProjects.map(() => '?').join(',')})
   `);
   const chunksResult = chunksQuery.all(...targetProjects) as Array<{
     id: string;
+    session_id: string;
     session_slug: string;
+    project_path: string | null;
     content: string;
     start_time: string;
     end_time: string;
@@ -121,7 +259,9 @@ export async function exportArchive(options: ExportOptions): Promise<void> {
 
   let chunks: ExportedChunk[] = chunksResult.map((row) => ({
     id: row.id,
+    sessionId: row.session_id,
     sessionSlug: row.session_slug,
+    projectPath: row.project_path,
     content: row.content,
     startTime: row.start_time,
     endTime: row.end_time,
@@ -142,50 +282,124 @@ export async function exportArchive(options: ExportOptions): Promise<void> {
     }));
   }
 
-  // Export edges
-  const chunkIds = chunks.map((c) => c.id);
-  const edgesQuery = db.prepare(`
-    SELECT source_id, target_id, type, reference_type, weight
-    FROM edges
-    WHERE source_id IN (${chunkIds.map(() => '?').join(',')})
-  `);
-  const edgesResult = edgesQuery.all(...chunkIds) as Array<{
-    source_id: string;
-    target_id: string;
-    type: string;
-    reference_type: string;
-    weight: number;
-  }>;
+  // Build chunk ID set for filtering edges and vectors
+  const chunkIdSet = new Set(chunks.map((c) => c.id));
 
-  const edges: ExportedEdge[] = edgesResult.map((row) => ({
-    source: row.source_id,
-    target: row.target_id,
-    type: row.type,
-    referenceType: row.reference_type,
-    weight: row.weight,
-  }));
+  // Export edges — both endpoints must be in the export
+  const chunkIds = chunks.map((c) => c.id);
+  let edges: ExportedEdge[] = [];
+  if (chunkIds.length > 0) {
+    const edgesQuery = db.prepare(`
+      SELECT id, source_chunk_id, target_chunk_id, edge_type, reference_type, initial_weight, link_count
+      FROM edges
+      WHERE source_chunk_id IN (${chunkIds.map(() => '?').join(',')})
+    `);
+    const edgesResult = edgesQuery.all(...chunkIds) as Array<{
+      id: string;
+      source_chunk_id: string;
+      target_chunk_id: string;
+      edge_type: string;
+      reference_type: string | null;
+      initial_weight: number;
+      link_count: number;
+    }>;
+
+    // Filter: only keep edges where BOTH endpoints are in the export
+    edges = edgesResult
+      .filter((row) => chunkIdSet.has(row.source_chunk_id) && chunkIdSet.has(row.target_chunk_id))
+      .map((row) => ({
+        id: row.id,
+        source: row.source_chunk_id,
+        target: row.target_chunk_id,
+        type: row.edge_type,
+        referenceType: row.reference_type,
+        weight: row.initial_weight,
+        linkCount: row.link_count,
+      }));
+  }
 
-  // Export clusters
-  const clustersQuery = db.prepare(`
-    SELECT c.id, c.name, c.description, GROUP_CONCAT(cm.chunk_id) as member_ids
-    FROM clusters c
-    LEFT JOIN cluster_members cm ON c.id = cm.cluster_id
-    WHERE cm.chunk_id IN (${chunkIds.map(() => '?').join(',')})
-    GROUP BY c.id
-  `);
-  const clustersResult = clustersQuery.all(...chunkIds) as Array<{
-    id: string;
-    name: string;
-    description: string | null;
-    member_ids: string | null;
-  }>;
+  // Export clusters with full data
+  let clusters: ExportedCluster[] = [];
+  if (chunkIds.length > 0) {
+    // Find clusters that have at least one member in our export
+    const clusterIdsQuery = db.prepare(`
+      SELECT DISTINCT cluster_id FROM chunk_clusters
+      WHERE chunk_id IN (${chunkIds.map(() => '?').join(',')})
+    `);
+    const clusterIds = (
+      clusterIdsQuery.all(...chunkIds) as Array<{ cluster_id: string }>
+    ).map((r) => r.cluster_id);
+
+    if (clusterIds.length > 0) {
+      const clustersQuery = db.prepare(`
+        SELECT id, name, description, centroid, exemplar_ids, membership_hash
+        FROM clusters
+        WHERE id IN (${clusterIds.map(() => '?').join(',')})
+      `);
+      const clustersResult = clustersQuery.all(...clusterIds) as Array<{
+        id: string;
+        name: string | null;
+        description: string | null;
+        centroid: Buffer | null;
+        exemplar_ids: string | null;
+        membership_hash: string | null;
+      }>;
+
+      const membersQuery = db.prepare(`
+        SELECT chunk_id, distance FROM chunk_clusters
+        WHERE cluster_id = ? AND chunk_id IN (${chunkIds.map(() => '?').join(',')})
+      `);
+
+      clusters = clustersResult.map((row) => {
+        const membersResult = membersQuery.all(row.id, ...chunkIds) as Array<{
+          chunk_id: string;
+          distance: number;
+        }>;
+
+        return {
+          id: row.id,
+          name: row.name,
+          description: row.description,
+          centroid: row.centroid ? deserializeEmbedding(row.centroid) : null,
+          exemplarIds: row.exemplar_ids ? JSON.parse(row.exemplar_ids) : null,
+          membershipHash: row.membership_hash,
+          members: membersResult.map((m) => ({
+            chunkId: m.chunk_id,
+            distance: m.distance,
+          })),
+        };
+      });
+    }
+  }
 
-  const clusters: ExportedCluster[] = clustersResult.map((row) => ({
-    id: row.id,
-    name: row.name,
-    description: row.description,
-    memberChunkIds: row.member_ids?.split(',') ?? [],
-  }));
+  // Export vectors
+  let vectors: ExportedVector[] = [];
+  let embeddingDimensions: number | null = null;
+  if (!options.noVectors && chunkIds.length > 0) {
+    // Check if vectors table exists
+    const tableExists = db
+      .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='vectors'")
+      .get();
+    if (tableExists) {
+      const vectorsQuery = db.prepare(`
+        SELECT id, embedding FROM vectors
+        WHERE id IN (${chunkIds.map(() => '?').join(',')})
+      `);
+      const vectorsResult = vectorsQuery.all(...chunkIds) as Array<{
+        id: string;
+        embedding: Buffer;
+      }>;
+
+      vectors = vectorsResult.map((row) => ({
+        chunkId: row.id,
+        embedding: deserializeEmbedding(row.embedding),
+      }));
+
+      if (vectors.length > 0) {
+        embeddingDimensions = vectors[0].embedding.length;
+      }
+    }
+  }
 
   // Build archive
   const archive: Archive = {
@@ -198,39 +412,50 @@ export async function exportArchive(options: ExportOptions): Promise<void> {
       chunkCount: chunks.length,
       edgeCount: edges.length,
       clusterCount: clusters.length,
+      vectorCount: vectors.length,
+      embeddingDimensions,
       projects: targetProjects,
     },
     chunks,
     edges,
     clusters,
+    vectors,
   };
 
-  // Serialize
-  const jsonData = JSON.stringify(archive, null, 2);
+  // Serialize: JSON -> gzip -> (optional) encrypt -> write
+  const jsonData = JSON.stringify(archive);
+  const compressed = gzipSync(Buffer.from(jsonData, 'utf-8'));
 
-  // Write output
+  let output: Buffer;
+  const encrypted = !!options.password;
   if (options.password) {
-    // Encrypted
-    const encrypted = encrypt(Buffer.from(jsonData, 'utf-8'), options.password);
-    const serialized = serializeEncrypted(encrypted);
-    const output = Buffer.concat([ENCRYPTED_MAGIC, serialized]);
-    writeFileSync(options.outputPath, output);
+    const encryptedData = encrypt(compressed, options.password);
+    const serialized = serializeEncrypted(encryptedData);
+    output = Buffer.concat([ENCRYPTED_MAGIC, serialized]);
   } else {
-    // Unencrypted
-    writeFileSync(options.outputPath, jsonData);
+    output = compressed;
   }
 
-  log.info('Export completed', {
-    chunks: chunks.length,
-    edges: edges.length,
-    clusters: clusters.length,
-  });
+  writeFileSync(options.outputPath, output);
+
+  const result: ExportResult = {
+    chunkCount: chunks.length,
+    edgeCount: edges.length,
+    clusterCount: clusters.length,
+    vectorCount: vectors.length,
+    fileSize: output.length,
+    compressed: true,
+    encrypted,
+  };
+
+  log.info('Export completed', { ...result });
+  return result;
 }
 
 /**
  * Import memory data from an archive.
  */
-export async function importArchive(options: ImportOptions): Promise<void> {
+export async function importArchive(options: ImportOptions): Promise<ImportResult> {
   if (!existsSync(options.inputPath)) {
     throw new Error(`File not found: ${options.inputPath}`);
   }
@@ -239,45 +464,96 @@ export async function importArchive(options: ImportOptions): Promise<void> {
 
   let jsonData: string;
 
-  // Check if encrypted
-  if (fileContent.subarray(0, 4).equals(ENCRYPTED_MAGIC)) {
+  // Detection order:
+  // 1. CST\0 (encrypted) -> decrypt, then check for gzip
+  // 2. gzip magic 0x1f 0x8b (compressed, unencrypted)
+  // 3. plain JSON (v1.0 backward compat)
+  if (fileContent.length >= 4 && fileContent.subarray(0, 4).equals(ENCRYPTED_MAGIC)) {
     if (!options.password) {
       throw new Error('Archive is encrypted. Please provide a password.');
     }
     const encryptedData = deserializeEncrypted(fileContent.subarray(4));
     const decrypted = decrypt(encryptedData, options.password);
-    jsonData = decrypted.toString('utf-8');
+
+    // Check if decrypted data is gzipped
+    if (decrypted.length >= 2 && decrypted[0] === 0x1f && decrypted[1] === 0x8b) {
+      jsonData = gunzipSync(decrypted).toString('utf-8');
+    } else {
+      jsonData = decrypted.toString('utf-8');
+    }
+  } else if (
+    fileContent.length >= 2 &&
+    fileContent[0] === GZIP_MAGIC[0] &&
+    fileContent[1] === GZIP_MAGIC[1]
+  ) {
+    jsonData = gunzipSync(fileContent).toString('utf-8');
   } else {
     jsonData = fileContent.toString('utf-8');
   }
 
   const archive = JSON.parse(jsonData) as Archive;
 
-  if (archive.format !== 'causantic-archive') {
-    throw new Error('Invalid archive format');
+  // Validate
+  const validation = validateArchive(archive);
+  for (const warning of validation.warnings) {
+    log.warn(warning);
+  }
+  if (!validation.valid) {
+    throw new Error(`Invalid archive: ${validation.errors.join('; ')}`);
+  }
+
+  // Normalize v1.0 archives
+  if (!archive.vectors) {
+    archive.vectors = [];
+  }
+
+  const result: ImportResult = {
+    chunkCount: archive.chunks.length,
+    edgeCount: archive.edges.length,
+    clusterCount: archive.clusters.length,
+    vectorCount: archive.vectors.length,
+    dryRun: !!options.dryRun,
+  };
+
+  if (options.dryRun) {
+    log.info('Dry run — no changes made', { ...result });
+    return result;
   }
 
   const db = getDb();
 
+  // Ensure vectors table exists
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS vectors (
+      id TEXT PRIMARY KEY,
+      embedding BLOB NOT NULL,
+      orphaned_at TEXT DEFAULT NULL,
+      last_accessed TEXT DEFAULT CURRENT_TIMESTAMP
+    )
+  `);
+
   // Start transaction
   const transaction = db.transaction(() => {
     if (!options.merge) {
       // Clear existing data
-      db.prepare('DELETE FROM cluster_members').run();
+      db.prepare('DELETE FROM chunk_clusters').run();
       db.prepare('DELETE FROM clusters').run();
       db.prepare('DELETE FROM edges').run();
+      db.prepare('DELETE FROM vectors').run();
       db.prepare('DELETE FROM chunks').run();
     }
 
     // Import chunks
     const insertChunk = db.prepare(`
-      INSERT OR REPLACE INTO chunks (id, session_slug, content, start_time, end_time, turn_indices)
-      VALUES (?, ?, ?, ?, ?, ?)
+      INSERT OR REPLACE INTO chunks (id, session_id, session_slug, project_path, content, start_time, end_time, turn_indices)
+      VALUES (?, ?, ?, ?, ?, ?, ?, ?)
     `);
     for (const chunk of archive.chunks) {
       insertChunk.run(
         chunk.id,
+        chunk.sessionId ?? '',
         chunk.sessionSlug,
+        chunk.projectPath ?? null,
         chunk.content,
         chunk.startTime,
         chunk.endTime,
@@ -287,44 +563,75 @@ export async function importArchive(options: ImportOptions): Promise<void> {
 
     // Import edges
     const insertEdge = db.prepare(`
-      INSERT OR REPLACE INTO edges (source_id, target_id, type, reference_type, weight)
-      VALUES (?, ?, ?, ?, ?)
+      INSERT OR REPLACE INTO edges (id, source_chunk_id, target_chunk_id, edge_type, reference_type, initial_weight, created_at, link_count)
+      VALUES (?, ?, ?, ?, ?, ?, ?, ?)
     `);
     for (const edge of archive.edges) {
-      insertEdge.run(edge.source, edge.target, edge.type, edge.referenceType, edge.weight);
+      insertEdge.run(
+        edge.id ?? generateId(),
+        edge.source,
+        edge.target,
+        edge.type,
+        edge.referenceType ?? null,
+        edge.weight,
+        new Date().toISOString(),
+        edge.linkCount ?? 1,
+      );
     }
 
     // Import clusters
     const insertCluster = db.prepare(`
-      INSERT OR REPLACE INTO clusters (id, name, description)
-      VALUES (?, ?, ?)
+      INSERT OR REPLACE INTO clusters (id, name, description, centroid, exemplar_ids, membership_hash)
+      VALUES (?, ?, ?, ?, ?, ?)
     `);
     const insertMember = db.prepare(`
-      INSERT OR REPLACE INTO cluster_members (cluster_id, chunk_id)
-      VALUES (?, ?)
+      INSERT OR REPLACE INTO chunk_clusters (chunk_id, cluster_id, distance)
+      VALUES (?, ?, ?)
     `);
     for (const cluster of archive.clusters) {
-      insertCluster.run(cluster.id, cluster.name, cluster.description);
-      for (const memberId of cluster.memberChunkIds) {
-        insertMember.run(cluster.id, memberId);
+      insertCluster.run(
+        cluster.id,
+        cluster.name,
+        cluster.description,
+        cluster.centroid ? serializeEmbedding(cluster.centroid) : null,
+        cluster.exemplarIds ? JSON.stringify(cluster.exemplarIds) : null,
+        cluster.membershipHash ?? null,
+      );
+
+      // Handle both v1.1 (members with distance) and v1.0 compat (memberChunkIds)
+      const members: ClusterMember[] =
+        cluster.members ??
+        ((cluster as unknown as { memberChunkIds?: string[] }).memberChunkIds)?.map(
+          (id) => ({ chunkId: id, distance: 0 }),
+        ) ??
+        [];
+      for (const member of members) {
+        insertMember.run(member.chunkId, cluster.id, member.distance);
+      }
+    }
+
+    // Import vectors
+    if (archive.vectors.length > 0) {
+      const insertVector = db.prepare(`
+        INSERT OR REPLACE INTO vectors (id, embedding, orphaned_at, last_accessed)
+        VALUES (?, ?, NULL, CURRENT_TIMESTAMP)
+      `);
+      for (const vector of archive.vectors) {
+        insertVector.run(vector.chunkId, serializeEmbedding(vector.embedding));
       }
     }
   });
 
   transaction();
 
-  log.info('Import completed', {
-    chunks: archive.chunks.length,
-    edges: archive.edges.length,
-    clusters: archive.clusters.length,
-  });
+  log.info('Import completed', { ...result });
+  return result;
 }
 
 /**
  * Redact file paths in content.
  */
 function redactFilePaths(content: string): string {
-  // Match common file path patterns
   const pathPattern = /(?:\/[\w.-]+)+\.\w+|(?:[A-Z]:\\[\w.-\\]+)|(?:~\/[\w.-\/]+)/g;
   return content.replace(pathPattern, '[REDACTED_PATH]');
 }
@@ -333,7 +640,6 @@ function redactFilePaths(content: string): string {
  * Redact code blocks in content.
  */
 function redactCodeBlocks(content: string): string {
-  // Match markdown code blocks
   const codeBlockPattern = /```[\s\S]*?```/g;
   return content.replace(codeBlockPattern, '```\n[REDACTED_CODE]\n```');
 }
diff --git a/test/storage/archive.test.ts b/test/storage/archive.test.ts
index 16e9278..4f81a1f 100644
--- a/test/storage/archive.test.ts
+++ b/test/storage/archive.test.ts
@@ -1,323 +1,835 @@
 /**
- * Tests for export/import functionality.
+ * Integration tests for export/import functionality.
+ *
+ * Uses real in-memory databases to verify end-to-end round-trips.
  */
 
-import { describe, it, expect } from 'vitest';
-import type {
-  Archive,
-  ArchiveMetadata,
-  ExportedChunk,
-  ExportedEdge,
-  ExportedCluster,
-  ExportOptions,
-  ImportOptions,
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { writeFileSync, readFileSync, unlinkSync, existsSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { gzipSync } from 'node:zlib';
+import Database from 'better-sqlite3-multiple-ciphers';
+import {
+  createTestDb,
+  setupTestDb,
+  teardownTestDb,
+  createSampleChunk,
+  insertTestChunk,
+  insertTestEdge,
+  insertTestCluster,
+  assignChunkToCluster,
+} from './test-utils.js';
+import {
+  exportArchive,
+  importArchive,
+  validateArchive,
+  type Archive,
+  type ExportResult,
+  type ImportResult,
 } from '../../src/storage/archive.js';
+import { serializeEmbedding, deserializeEmbedding } from '../../src/utils/embedding-utils.js';
+
+// Helper to create a temp file path
+function tempPath(suffix = '.json'): string {
+  return join(tmpdir(), `causantic-test-${Date.now()}-${Math.random().toString(36).slice(2)}${suffix}`);
+}
+
+// Helper to create vectors table in test db
+function createVectorsTable(db: Database.Database): void {
+  db.exec(`
+    CREATE TABLE IF NOT EXISTS vectors (
+      id TEXT PRIMARY KEY,
+      embedding BLOB NOT NULL,
+      orphaned_at TEXT DEFAULT NULL,
+      last_accessed TEXT DEFAULT CURRENT_TIMESTAMP
+    )
+  `);
+}
+
+// Helper to insert a vector into the test db
+function insertTestVector(db: Database.Database, id: string, embedding: number[]): void {
+  db.prepare('INSERT INTO vectors (id, embedding) VALUES (?, ?)').run(
+    id,
+    serializeEmbedding(embedding),
+  );
+}
+
+// Helper to seed a standard test dataset
+function seedTestData(db: Database.Database): {
+  chunkIds: string[];
+  edgeId: string;
+  clusterId: string;
+  embedding: number[];
+} {
+  const chunk1 = createSampleChunk({
+    id: 'chunk-1',
+    sessionId: 'session-1',
+    sessionSlug: 'project-a',
+    content: 'First chunk content with /path/to/file.ts',
+    startTime: '2024-01-01T00:00:00Z',
+    endTime: '2024-01-01T00:01:00Z',
+    projectPath: '/home/user/project-a',
+  });
+  const chunk2 = createSampleChunk({
+    id: 'chunk-2',
+    sessionId: 'session-1',
+    sessionSlug: 'project-a',
+    content: 'Second chunk with ```ts\nconst x = 1;\n```',
+    startTime: '2024-01-01T00:01:00Z',
+    endTime: '2024-01-01T00:02:00Z',
+    projectPath: '/home/user/project-a',
+  });
+  const chunk3 = createSampleChunk({
+    id: 'chunk-3',
+    sessionId: 'session-2',
+    sessionSlug: 'project-b',
+    content: 'Third chunk from project-b',
+    startTime: '2024-01-01T00:02:00Z',
+    endTime: '2024-01-01T00:03:00Z',
+    projectPath: '/home/user/project-b',
+  });
 
-describe('archive', () => {
-  describe('Archive interface', () => {
-    it('has correct structure', () => {
-      const archive: Archive = {
-        format: 'causantic-archive',
-        version: '1.0',
-        created: '2024-01-15T10:30:00Z',
-        metadata: {
-          version: '1.0',
-          created: '2024-01-15T10:30:00Z',
-          chunkCount: 100,
-          edgeCount: 250,
-          clusterCount: 5,
-          projects: ['project-a', 'project-b'],
-        },
-        chunks: [],
-        edges: [],
-        clusters: [],
-      };
+  insertTestChunk(db, chunk1);
+  insertTestChunk(db, chunk2);
+  insertTestChunk(db, chunk3);
+
+  const edgeId = insertTestEdge(db, {
+    id: 'edge-1',
+    sourceChunkId: 'chunk-1',
+    targetChunkId: 'chunk-2',
+    edgeType: 'forward',
+    referenceType: 'within-chain',
+    initialWeight: 0.9,
+    linkCount: 2,
+  });
 
-      expect(archive.format).toBe('causantic-archive');
-      expect(archive.version).toBe('1.0');
-      expect(archive.metadata.chunkCount).toBe(100);
-    });
+  const clusterId = insertTestCluster(db, {
+    id: 'cluster-1',
+    name: 'Auth cluster',
+    description: 'Authentication related',
+    exemplarIds: ['chunk-1'],
   });
+  assignChunkToCluster(db, 'chunk-1', clusterId, 0.3);
+  assignChunkToCluster(db, 'chunk-2', clusterId, 0.5);
+
+  // Add centroid and membership hash
+  db.prepare('UPDATE clusters SET centroid = ?, membership_hash = ? WHERE id = ?').run(
+    serializeEmbedding([0.1, 0.2, 0.3, 0.4]),
+    'abc123',
+    clusterId,
+  );
+
+  createVectorsTable(db);
+  const embedding = Array.from({ length: 8 }, (_, i) => i * 0.1);
+  insertTestVector(db, 'chunk-1', embedding);
+  insertTestVector(db, 'chunk-2', embedding.map((v) => v + 0.01));
+
+  return {
+    chunkIds: ['chunk-1', 'chunk-2', 'chunk-3'],
+    edgeId,
+    clusterId,
+    embedding,
+  };
+}
 
-  describe('ArchiveMetadata interface', () => {
-    it('tracks counts correctly', () => {
-      const metadata: ArchiveMetadata = {
-        version: '1.0',
-        created: new Date().toISOString(),
-        chunkCount: 50,
-        edgeCount: 120,
-        clusterCount: 3,
-        projects: ['my-project'],
-      };
+describe('archive', () => {
+  let db: Database.Database;
+  let outputPath: string;
 
-      expect(metadata.chunkCount).toBe(50);
-      expect(metadata.edgeCount).toBe(120);
-      expect(metadata.clusterCount).toBe(3);
-      expect(metadata.projects).toContain('my-project');
-    });
+  beforeEach(() => {
+    db = createTestDb();
+    setupTestDb(db);
+    outputPath = tempPath();
   });
 
-  describe('ExportedChunk interface', () => {
-    it('has all required fields', () => {
-      const chunk: ExportedChunk = {
-        id: 'chunk-abc',
-        sessionSlug: 'my-project',
-        content: 'This is chunk content',
-        startTime: '2024-01-15T10:00:00Z',
-        endTime: '2024-01-15T10:05:00Z',
-        turnIndices: [0, 1, 2],
-      };
+  afterEach(() => {
+    teardownTestDb(db);
+    if (existsSync(outputPath)) {
+      unlinkSync(outputPath);
+    }
+  });
 
-      expect(chunk.id).toBe('chunk-abc');
-      expect(chunk.sessionSlug).toBe('my-project');
-      expect(chunk.turnIndices).toEqual([0, 1, 2]);
+  describe('unencrypted round-trip', () => {
+    it('exports and imports all data correctly', async () => {
+      const { embedding } = seedTestData(db);
+
+      const exportResult = await exportArchive({ outputPath });
+      expect(exportResult.chunkCount).toBe(3);
+      expect(exportResult.edgeCount).toBe(1);
+      expect(exportResult.clusterCount).toBe(1);
+      expect(exportResult.vectorCount).toBe(2);
+      expect(exportResult.compressed).toBe(true);
+      expect(exportResult.encrypted).toBe(false);
+      expect(exportResult.fileSize).toBeGreaterThan(0);
+
+      // Import into fresh db
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
+
+      const importResult = await importArchive({ inputPath: outputPath });
+      expect(importResult.chunkCount).toBe(3);
+      expect(importResult.edgeCount).toBe(1);
+      expect(importResult.clusterCount).toBe(1);
+      expect(importResult.vectorCount).toBe(2);
+      expect(importResult.dryRun).toBe(false);
+
+      // Verify data
+      const chunks = db.prepare('SELECT * FROM chunks ORDER BY id').all() as Array<Record<string, unknown>>;
+      expect(chunks).toHaveLength(3);
+      expect(chunks[0].id).toBe('chunk-1');
+      expect(chunks[0].session_id).toBe('session-1');
+      expect(chunks[0].session_slug).toBe('project-a');
+      expect(chunks[0].project_path).toBe('/home/user/project-a');
+
+      const edges = db.prepare('SELECT * FROM edges').all() as Array<Record<string, unknown>>;
+      expect(edges).toHaveLength(1);
+      expect(edges[0].source_chunk_id).toBe('chunk-1');
+      expect(edges[0].target_chunk_id).toBe('chunk-2');
+      expect(edges[0].edge_type).toBe('forward');
+      expect(edges[0].reference_type).toBe('within-chain');
+      expect(edges[0].initial_weight).toBe(0.9);
+      expect(edges[0].link_count).toBe(2);
+
+      const clusters = db.prepare('SELECT * FROM clusters').all() as Array<Record<string, unknown>>;
+      expect(clusters).toHaveLength(1);
+      expect(clusters[0].name).toBe('Auth cluster');
+      expect(clusters[0].description).toBe('Authentication related');
+      expect(clusters[0].membership_hash).toBe('abc123');
+
+      const members = db.prepare('SELECT * FROM chunk_clusters ORDER BY chunk_id').all() as Array<Record<string, unknown>>;
+      expect(members).toHaveLength(2);
+      expect(members[0].chunk_id).toBe('chunk-1');
+      expect(members[0].distance).toBe(0.3);
+
+      const vectors = db.prepare('SELECT * FROM vectors ORDER BY id').all() as Array<Record<string, unknown>>;
+      expect(vectors).toHaveLength(2);
     });
   });
 
-  describe('ExportedEdge interface', () => {
-    it('has all required fields', () => {
-      const edge: ExportedEdge = {
-        source: 'chunk-1',
-        target: 'chunk-2',
-        type: 'backward',
-        referenceType: 'within-chain',
-        weight: 0.85,
-      };
+  describe('encrypted round-trip', () => {
+    it('encrypts and decrypts correctly', async () => {
+      seedTestData(db);
+      const password = 'test-password-123';
+
+      const exportResult = await exportArchive({ outputPath, password });
+      expect(exportResult.encrypted).toBe(true);
+
+      // Verify file starts with magic bytes
+      const fileContent = readFileSync(outputPath);
+      expect(fileContent.subarray(0, 4).equals(Buffer.from('CST\x00'))).toBe(true);
 
-      expect(edge.source).toBe('chunk-1');
-      expect(edge.target).toBe('chunk-2');
-      expect(edge.type).toBe('backward');
-      expect(edge.referenceType).toBe('within-chain');
-      expect(edge.weight).toBe(0.85);
+      // Import into fresh db
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
+
+      const importResult = await importArchive({ inputPath: outputPath, password });
+      expect(importResult.chunkCount).toBe(3);
+
+      const chunks = db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number };
+      expect(chunks.count).toBe(3);
     });
-  });
 
-  describe('ExportedCluster interface', () => {
-    it('has all required fields', () => {
-      const cluster: ExportedCluster = {
-        id: 'cluster-xyz',
-        name: 'Authentication',
-        description: 'Chunks related to user authentication',
-        memberChunkIds: ['chunk-1', 'chunk-2', 'chunk-3'],
-      };
+    it('rejects wrong password', async () => {
+      seedTestData(db);
+      await exportArchive({ outputPath, password: 'correct-password' });
+
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
 
-      expect(cluster.id).toBe('cluster-xyz');
-      expect(cluster.name).toBe('Authentication');
-      expect(cluster.memberChunkIds.length).toBe(3);
+      await expect(
+        importArchive({ inputPath: outputPath, password: 'wrong-password' }),
+      ).rejects.toThrow();
     });
 
-    it('allows null description', () => {
-      const cluster: ExportedCluster = {
-        id: 'cluster-xyz',
-        name: 'Unnamed Cluster',
-        description: null,
-        memberChunkIds: [],
-      };
+    it('rejects missing password for encrypted archive', async () => {
+      seedTestData(db);
+      await exportArchive({ outputPath, password: 'test-pass' });
+
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
 
-      expect(cluster.description).toBeNull();
+      await expect(importArchive({ inputPath: outputPath })).rejects.toThrow(
+        'Archive is encrypted',
+      );
     });
   });
 
-  describe('ExportOptions interface', () => {
-    it('requires outputPath', () => {
-      const options: ExportOptions = {
-        outputPath: '/path/to/archive.causantic',
-      };
+  describe('vector round-trip', () => {
+    it('preserves vector embeddings through serialize/JSON/deserialize', async () => {
+      const { embedding } = seedTestData(db);
 
-      expect(options.outputPath).toBe('/path/to/archive.causantic');
-    });
+      await exportArchive({ outputPath });
 
-    it('supports optional password for encryption', () => {
-      const options: ExportOptions = {
-        outputPath: '/path/to/archive.causantic',
-        password: 'secret123',
-      };
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
 
-      expect(options.password).toBe('secret123');
-    });
+      await importArchive({ inputPath: outputPath });
 
-    it('supports project filtering', () => {
-      const options: ExportOptions = {
-        outputPath: '/path/to/archive.causantic',
-        projects: ['project-a', 'project-c'],
-      };
+      const vectors = db.prepare('SELECT id, embedding FROM vectors ORDER BY id').all() as Array<{
+        id: string;
+        embedding: Buffer;
+      }>;
+      expect(vectors).toHaveLength(2);
 
-      expect(options.projects).toEqual(['project-a', 'project-c']);
+      const restored = deserializeEmbedding(vectors[0].embedding);
+      expect(restored).toHaveLength(embedding.length);
+      // Float32 precision: compare with tolerance
+      for (let i = 0; i < embedding.length; i++) {
+        expect(restored[i]).toBeCloseTo(embedding[i], 5);
+      }
     });
 
-    it('supports redaction options', () => {
-      const options: ExportOptions = {
-        outputPath: '/path/to/archive.causantic',
-        redactPaths: true,
-        redactCode: true,
-      };
+    it('skips vectors with --no-vectors', async () => {
+      seedTestData(db);
+
+      const result = await exportArchive({ outputPath, noVectors: true });
+      expect(result.vectorCount).toBe(0);
+
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
 
-      expect(options.redactPaths).toBe(true);
-      expect(options.redactCode).toBe(true);
+      const importResult = await importArchive({ inputPath: outputPath });
+      expect(importResult.vectorCount).toBe(0);
+
+      const vectors = db.prepare('SELECT COUNT(*) as count FROM vectors').get() as { count: number };
+      expect(vectors.count).toBe(0);
     });
   });
 
-  describe('ImportOptions interface', () => {
-    it('requires inputPath', () => {
-      const options: ImportOptions = {
-        inputPath: '/path/to/archive.causantic',
-      };
+  describe('cluster round-trip', () => {
+    it('preserves centroid, distances, exemplar IDs, and membership hash', async () => {
+      seedTestData(db);
 
-      expect(options.inputPath).toBe('/path/to/archive.causantic');
-    });
+      await exportArchive({ outputPath });
 
-    it('supports optional password for decryption', () => {
-      const options: ImportOptions = {
-        inputPath: '/path/to/archive.causantic',
-        password: 'secret123',
-      };
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
 
-      expect(options.password).toBe('secret123');
-    });
+      await importArchive({ inputPath: outputPath });
 
-    it('supports merge option', () => {
-      const options: ImportOptions = {
-        inputPath: '/path/to/archive.causantic',
-        merge: true,
-      };
+      const cluster = db.prepare('SELECT * FROM clusters WHERE id = ?').get('cluster-1') as Record<string, unknown>;
+      expect(cluster.name).toBe('Auth cluster');
+      expect(cluster.description).toBe('Authentication related');
+      expect(cluster.membership_hash).toBe('abc123');
+      expect(cluster.exemplar_ids).toBe(JSON.stringify(['chunk-1']));
+
+      // Verify centroid survives round-trip
+      const centroid = deserializeEmbedding(cluster.centroid as Buffer);
+      expect(centroid).toHaveLength(4);
+      expect(centroid[0]).toBeCloseTo(0.1, 5);
 
-      expect(options.merge).toBe(true);
+      // Verify member distances
+      const members = db.prepare('SELECT * FROM chunk_clusters WHERE cluster_id = ? ORDER BY chunk_id').all('cluster-1') as Array<Record<string, unknown>>;
+      expect(members).toHaveLength(2);
+      expect(members[0].distance).toBe(0.3);
+      expect(members[1].distance).toBe(0.5);
     });
   });
 
-  describe('redaction functions', () => {
-    describe('redactFilePaths', () => {
-      it('redacts Unix-style paths', () => {
-        const content = 'Looking at /src/components/Button.tsx';
-        const pattern = /(?:\/[\w.-]+)+\.\w+/g;
-        const redacted = content.replace(pattern, '[REDACTED_PATH]');
+  describe('project filtering', () => {
+    it('exports only specified projects', async () => {
+      seedTestData(db);
 
-        expect(redacted).toBe('Looking at [REDACTED_PATH]');
+      const result = await exportArchive({
+        outputPath,
+        projects: ['project-a'],
       });
+      expect(result.chunkCount).toBe(2); // only project-a chunks
 
-      it('redacts Windows-style paths', () => {
-        const content = 'Opening C:\\Users\\dev\\project\\file.ts';
-        const pattern = /(?:[A-Z]:\\[\w.-\\]+)/g;
-        const redacted = content.replace(pattern, '[REDACTED_PATH]');
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
 
-        expect(redacted).toContain('[REDACTED_PATH]');
-      });
+      await importArchive({ inputPath: outputPath });
 
-      it('redacts home directory paths', () => {
-        const content = 'Config at ~/dev/project/config.json';
-        const pattern = /(?:~\/[\w.-\/]+)/g;
-        const redacted = content.replace(pattern, '[REDACTED_PATH]');
+      const chunks = db.prepare('SELECT * FROM chunks').all();
+      expect(chunks).toHaveLength(2);
 
-        expect(redacted).toBe('Config at [REDACTED_PATH]');
-      });
+      const slugs = (chunks as Array<Record<string, unknown>>).map((c) => c.session_slug);
+      expect(slugs).toEqual(['project-a', 'project-a']);
+    });
+  });
 
-      it('preserves non-path content', () => {
-        const content = 'This is just regular text without paths';
-        const pattern = /(?:\/[\w.-]+)+\.\w+/g;
-        const redacted = content.replace(pattern, '[REDACTED_PATH]');
+  describe('edge completeness', () => {
+    it('excludes edges with one endpoint outside the export', async () => {
+      const chunk1 = createSampleChunk({ id: 'chunk-a', sessionSlug: 'proj-1', sessionId: 'ses-1' });
+      const chunk2 = createSampleChunk({ id: 'chunk-b', sessionSlug: 'proj-1', sessionId: 'ses-1' });
+      const chunk3 = createSampleChunk({ id: 'chunk-c', sessionSlug: 'proj-2', sessionId: 'ses-2' });
+      insertTestChunk(db, chunk1);
+      insertTestChunk(db, chunk2);
+      insertTestChunk(db, chunk3);
+
+      // Edge within proj-1
+      insertTestEdge(db, {
+        id: 'edge-internal',
+        sourceChunkId: 'chunk-a',
+        targetChunkId: 'chunk-b',
+        edgeType: 'forward',
+      });
+      // Edge crossing projects
+      insertTestEdge(db, {
+        id: 'edge-cross',
+        sourceChunkId: 'chunk-a',
+        targetChunkId: 'chunk-c',
+        edgeType: 'forward',
+      });
 
-        expect(redacted).toBe(content);
+      const result = await exportArchive({
+        outputPath,
+        projects: ['proj-1'],
       });
+      // Only the internal edge should be exported
+      expect(result.edgeCount).toBe(1);
     });
+  });
 
-    describe('redactCodeBlocks', () => {
-      it('redacts markdown code blocks', () => {
-        const content = 'Here is code:\n```typescript\nconst x = 1;\n```\nEnd.';
-        const pattern = /```[\s\S]*?```/g;
-        const redacted = content.replace(pattern, '```\n[REDACTED_CODE]\n```');
+  describe('redaction', () => {
+    it('redacts file paths', async () => {
+      seedTestData(db);
 
-        expect(redacted).toBe('Here is code:\n```\n[REDACTED_CODE]\n```\nEnd.');
-      });
+      await exportArchive({ outputPath, redactPaths: true });
 
-      it('handles multiple code blocks', () => {
-        const content = '```js\ncode1\n```\nText\n```py\ncode2\n```';
-        const pattern = /```[\s\S]*?```/g;
-        const redacted = content.replace(pattern, '```\n[REDACTED_CODE]\n```');
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
 
-        expect(redacted.match(/\[REDACTED_CODE\]/g)?.length).toBe(2);
-      });
+      await importArchive({ inputPath: outputPath });
 
-      it('preserves inline code', () => {
-        const content = 'Use the `function` keyword';
-        const pattern = /```[\s\S]*?```/g;
-        const redacted = content.replace(pattern, '```\n[REDACTED_CODE]\n```');
+      const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-1') as { content: string };
+      expect(chunk.content).toContain('[REDACTED_PATH]');
+      expect(chunk.content).not.toContain('/path/to/file.ts');
+    });
 
-        expect(redacted).toBe(content); // Inline code not redacted
-      });
+    it('redacts code blocks', async () => {
+      seedTestData(db);
+
+      await exportArchive({ outputPath, redactCode: true });
+
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
+
+      await importArchive({ inputPath: outputPath });
+
+      const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-2') as { content: string };
+      expect(chunk.content).toContain('[REDACTED_CODE]');
+      expect(chunk.content).not.toContain('const x = 1');
     });
   });
 
-  describe('encryption detection', () => {
-    it('checks for magic bytes', () => {
-      const ENCRYPTED_MAGIC = Buffer.from('CST\x00');
-      const encryptedFile = Buffer.concat([ENCRYPTED_MAGIC, Buffer.from('encrypted data')]);
-      const plainFile = Buffer.from('{"format":"causantic-archive"}');
+  describe('merge vs replace', () => {
+    it('replace mode clears existing data', async () => {
+      seedTestData(db);
+      await exportArchive({ outputPath });
+
+      // Add extra data before import
+      const extra = createSampleChunk({ id: 'chunk-extra', sessionSlug: 'project-a', sessionId: 'ses-x' });
+      insertTestChunk(db, extra);
+      expect(
+        (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count,
+      ).toBe(4);
+
+      await importArchive({ inputPath: outputPath, merge: false });
+
+      // Replace should have cleared the extra chunk
+      expect(
+        (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count,
+      ).toBe(3);
+    });
+
+    it('merge mode preserves existing data', async () => {
+      seedTestData(db);
+
+      // Export only project-a
+      await exportArchive({ outputPath, projects: ['project-a'] });
 
-      const isEncrypted1 = encryptedFile.subarray(0, 4).equals(ENCRYPTED_MAGIC);
-      const isEncrypted2 = plainFile.subarray(0, 4).equals(ENCRYPTED_MAGIC);
+      // Now add different data
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
+      const other = createSampleChunk({ id: 'chunk-other', sessionSlug: 'project-c', sessionId: 'ses-c' });
+      insertTestChunk(db, other);
 
-      expect(isEncrypted1).toBe(true);
-      expect(isEncrypted2).toBe(false);
+      await importArchive({ inputPath: outputPath, merge: true });
+
+      // Should have both the imported chunks AND the existing one
+      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count;
+      expect(count).toBe(3); // 2 from project-a + 1 existing
     });
   });
 
-  describe('archive validation', () => {
-    it('validates format field', () => {
-      const validArchive = { format: 'causantic-archive' };
-      const invalidArchive = { format: 'other-format' };
+  describe('dry-run import', () => {
+    it('reports counts without modifying database', async () => {
+      seedTestData(db);
+      await exportArchive({ outputPath });
+
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
 
-      expect(validArchive.format === 'causantic-archive').toBe(true);
-      expect(invalidArchive.format === 'causantic-archive').toBe(false);
+      const result = await importArchive({ inputPath: outputPath, dryRun: true });
+      expect(result.dryRun).toBe(true);
+      expect(result.chunkCount).toBe(3);
+      expect(result.edgeCount).toBe(1);
+
+      // Database should be empty
+      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count;
+      expect(count).toBe(0);
     });
   });
 
-  describe('merge behavior', () => {
-    it('merge=false clears existing data', () => {
-      const merge = false;
-      const clearExisting = !merge;
+  describe('v1.0 backward compatibility', () => {
+    it('imports v1.0 archive without vectors', async () => {
+      // Build a v1.0-style archive manually
+      const v1Archive = {
+        format: 'causantic-archive' as const,
+        version: '1.0',
+        created: new Date().toISOString(),
+        metadata: {
+          version: '1.0',
+          created: new Date().toISOString(),
+          chunkCount: 1,
+          edgeCount: 0,
+          clusterCount: 0,
+          vectorCount: 0,
+          embeddingDimensions: null,
+          projects: ['test'],
+        },
+        chunks: [
+          {
+            id: 'v1-chunk',
+            sessionId: 'ses-1',
+            sessionSlug: 'test',
+            projectPath: null,
+            content: 'v1 content',
+            startTime: '2024-01-01T00:00:00Z',
+            endTime: '2024-01-01T00:01:00Z',
+            turnIndices: [0],
+          },
+        ],
+        edges: [],
+        clusters: [],
+        // No vectors array — v1.0 format
+      };
+
+      writeFileSync(outputPath, JSON.stringify(v1Archive));
+
+      createVectorsTable(db);
+      const result = await importArchive({ inputPath: outputPath });
+      expect(result.chunkCount).toBe(1);
+      expect(result.vectorCount).toBe(0);
 
-      expect(clearExisting).toBe(true);
+      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count;
+      expect(count).toBe(1);
     });
 
-    it('merge=true preserves existing data', () => {
-      const merge = true;
-      const clearExisting = !merge;
+    it('imports v1.0 archive with memberChunkIds cluster format', async () => {
+      const v1Archive = {
+        format: 'causantic-archive' as const,
+        version: '1.0',
+        created: new Date().toISOString(),
+        metadata: {
+          version: '1.0',
+          created: new Date().toISOString(),
+          chunkCount: 1,
+          edgeCount: 0,
+          clusterCount: 1,
+          vectorCount: 0,
+          embeddingDimensions: null,
+          projects: ['test'],
+        },
+        chunks: [
+          {
+            id: 'v1-chunk',
+            sessionId: 'ses-1',
+            sessionSlug: 'test',
+            projectPath: null,
+            content: 'v1 content',
+            startTime: '2024-01-01T00:00:00Z',
+            endTime: '2024-01-01T00:01:00Z',
+            turnIndices: [0],
+          },
+        ],
+        edges: [],
+        clusters: [
+          {
+            id: 'v1-cluster',
+            name: 'Old Cluster',
+            description: null,
+            centroid: null,
+            exemplarIds: null,
+            membershipHash: null,
+            // v1.0 format used memberChunkIds instead of members
+            memberChunkIds: ['v1-chunk'],
+          },
+        ],
+      };
+
+      writeFileSync(outputPath, JSON.stringify(v1Archive));
+
+      createVectorsTable(db);
+      await importArchive({ inputPath: outputPath });
 
-      expect(clearExisting).toBe(false);
+      const members = db.prepare('SELECT * FROM chunk_clusters').all() as Array<Record<string, unknown>>;
+      expect(members).toHaveLength(1);
+      expect(members[0].chunk_id).toBe('v1-chunk');
+      expect(members[0].distance).toBe(0); // default distance for v1.0
     });
   });
 
-  describe('JSON serialization', () => {
-    it('serializes archive to JSON with formatting', () => {
+  describe('validation', () => {
+    it('rejects unknown version', () => {
+      const archive = {
+        format: 'causantic-archive',
+        version: '99.0',
+        metadata: { chunkCount: 0, edgeCount: 0, clusterCount: 0 },
+        chunks: [],
+        edges: [],
+        clusters: [],
+        vectors: [],
+      } as unknown as Archive;
+
+      const result = validateArchive(archive);
+      expect(result.valid).toBe(false);
+      expect(result.errors[0]).toContain('Unsupported archive version');
+    });
+
+    it('rejects invalid format', () => {
+      const archive = {
+        format: 'not-causantic',
+        version: '1.1',
+        metadata: { chunkCount: 0, edgeCount: 0, clusterCount: 0 },
+        chunks: [],
+        edges: [],
+        clusters: [],
+        vectors: [],
+      } as unknown as Archive;
+
+      const result = validateArchive(archive);
+      expect(result.valid).toBe(false);
+      expect(result.errors[0]).toContain('Invalid archive format');
+    });
+
+    it('warns on count mismatch', () => {
+      const archive: Archive = {
+        format: 'causantic-archive',
+        version: '1.1',
+        created: new Date().toISOString(),
+        metadata: {
+          version: '1.1',
+          created: new Date().toISOString(),
+          chunkCount: 99,
+          edgeCount: 0,
+          clusterCount: 0,
+          vectorCount: 0,
+          embeddingDimensions: null,
+          projects: [],
+        },
+        chunks: [],
+        edges: [],
+        clusters: [],
+        vectors: [],
+      };
+
+      const result = validateArchive(archive);
+      expect(result.valid).toBe(true);
+      expect(result.warnings.some((w) => w.includes('chunkCount'))).toBe(true);
+    });
+
+    it('warns on dangling edge references', () => {
       const archive: Archive = {
+        format: 'causantic-archive',
+        version: '1.1',
+        created: new Date().toISOString(),
+        metadata: {
+          version: '1.1',
+          created: new Date().toISOString(),
+          chunkCount: 1,
+          edgeCount: 1,
+          clusterCount: 0,
+          vectorCount: 0,
+          embeddingDimensions: null,
+          projects: [],
+        },
+        chunks: [
+          {
+            id: 'chunk-1',
+            sessionId: 'ses',
+            sessionSlug: 'test',
+            projectPath: null,
+            content: 'test',
+            startTime: '2024-01-01T00:00:00Z',
+            endTime: '2024-01-01T00:01:00Z',
+            turnIndices: [],
+          },
+        ],
+        edges: [
+          {
+            id: 'edge-1',
+            source: 'chunk-1',
+            target: 'chunk-nonexistent',
+            type: 'forward',
+            referenceType: null,
+            weight: 1.0,
+            linkCount: 1,
+          },
+        ],
+        clusters: [],
+        vectors: [],
+      };
+
+      const result = validateArchive(archive);
+      expect(result.valid).toBe(true);
+      expect(result.warnings.some((w) => w.includes('edge(s) reference chunks'))).toBe(true);
+    });
+
+    it('warns on v1.0 missing vectors', () => {
+      const archive = {
         format: 'causantic-archive',
         version: '1.0',
-        created: '2024-01-15T10:30:00Z',
+        created: new Date().toISOString(),
         metadata: {
           version: '1.0',
-          created: '2024-01-15T10:30:00Z',
+          created: new Date().toISOString(),
           chunkCount: 0,
           edgeCount: 0,
           clusterCount: 0,
+          vectorCount: 0,
+          embeddingDimensions: null,
           projects: [],
         },
         chunks: [],
         edges: [],
         clusters: [],
+        vectors: [],
+      } as Archive;
+
+      const result = validateArchive(archive);
+      expect(result.valid).toBe(true);
+      expect(result.warnings.some((w) => w.includes('version 1.0'))).toBe(true);
+    });
+
+    it('rejects invalid file at import', async () => {
+      writeFileSync(outputPath, JSON.stringify({ format: 'wrong', version: '1.0' }));
+
+      await expect(importArchive({ inputPath: outputPath })).rejects.toThrow('Invalid archive');
+    });
+  });
+
+  describe('gzip compression', () => {
+    it('exports compressed data that is smaller than uncompressed', async () => {
+      seedTestData(db);
+
+      await exportArchive({ outputPath });
+
+      const compressedSize = readFileSync(outputPath).length;
+
+      // The file should be gzip (starts with gzip magic bytes)
+      const fileContent = readFileSync(outputPath);
+      expect(fileContent[0]).toBe(0x1f);
+      expect(fileContent[1]).toBe(0x8b);
+
+      // Compression should produce meaningful reduction (at least some savings)
+      expect(compressedSize).toBeGreaterThan(0);
+    });
+
+    it('imports plain JSON (backward compat)', async () => {
+      // Write a plain JSON archive (no gzip)
+      const plainArchive: Archive = {
+        format: 'causantic-archive',
+        version: '1.1',
+        created: new Date().toISOString(),
+        metadata: {
+          version: '1.1',
+          created: new Date().toISOString(),
+          chunkCount: 1,
+          edgeCount: 0,
+          clusterCount: 0,
+          vectorCount: 0,
+          embeddingDimensions: null,
+          projects: ['test'],
+        },
+        chunks: [
+          {
+            id: 'plain-chunk',
+            sessionId: 'ses-1',
+            sessionSlug: 'test',
+            projectPath: null,
+            content: 'plain text',
+            startTime: '2024-01-01T00:00:00Z',
+            endTime: '2024-01-01T00:01:00Z',
+            turnIndices: [],
+          },
+        ],
+        edges: [],
+        clusters: [],
+        vectors: [],
       };
+      writeFileSync(outputPath, JSON.stringify(plainArchive));
 
-      const json = JSON.stringify(archive, null, 2);
+      createVectorsTable(db);
+      const result = await importArchive({ inputPath: outputPath });
+      expect(result.chunkCount).toBe(1);
+    });
+  });
 
-      expect(json).toContain('"format": "causantic-archive"');
-      expect(json).toContain('\n'); // Pretty printed
+  describe('empty archive', () => {
+    it('exports and imports empty database', async () => {
+      const result = await exportArchive({ outputPath });
+      expect(result.chunkCount).toBe(0);
+      expect(result.edgeCount).toBe(0);
+      expect(result.clusterCount).toBe(0);
+      expect(result.vectorCount).toBe(0);
+
+      teardownTestDb(db);
+      db = createTestDb();
+      setupTestDb(db);
+      createVectorsTable(db);
+
+      const importResult = await importArchive({ inputPath: outputPath });
+      expect(importResult.chunkCount).toBe(0);
     });
+  });
 
-    it('parses JSON back to archive', () => {
-      const json =
-        '{"format":"causantic-archive","version":"1.0","chunks":[],"edges":[],"clusters":[]}';
-      const parsed = JSON.parse(json);
+  describe('missing file', () => {
+    it('throws on non-existent file', async () => {
+      await expect(importArchive({ inputPath: '/nonexistent/file.json' })).rejects.toThrow(
+        'File not found',
+      );
+    });
+  });
 
-      expect(parsed.format).toBe('causantic-archive');
+  describe('export result', () => {
+    it('returns accurate counts and metadata', async () => {
+      seedTestData(db);
+
+      const result = await exportArchive({ outputPath });
+      expect(result).toEqual({
+        chunkCount: 3,
+        edgeCount: 1,
+        clusterCount: 1,
+        vectorCount: 2,
+        fileSize: expect.any(Number),
+        compressed: true,
+        encrypted: false,
+      });
     });
   });
 });

From 6612498b4d506f1fd1056739a97d916d30befd9b Mon Sep 17 00:00:00 2001
From: Greg von Nessi <greg.vonnessi@entrolution.ai>
Date: Mon, 16 Feb 2026 22:24:16 +0000
Subject: [PATCH 2/3] Fix lint warnings in archive tests

Remove unused imports (gzipSync, ExportResult, ImportResult) and
unused destructured variable.
---
 test/storage/archive.test.ts | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/test/storage/archive.test.ts b/test/storage/archive.test.ts
index 4f81a1f..49d160b 100644
--- a/test/storage/archive.test.ts
+++ b/test/storage/archive.test.ts
@@ -8,8 +8,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import { writeFileSync, readFileSync, unlinkSync, existsSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
-import { gzipSync } from 'node:zlib';
-import Database from 'better-sqlite3-multiple-ciphers';
+import type Database from 'better-sqlite3-multiple-ciphers';
 import {
   createTestDb,
   setupTestDb,
@@ -25,8 +24,6 @@ import {
   importArchive,
   validateArchive,
   type Archive,
-  type ExportResult,
-  type ImportResult,
 } from '../../src/storage/archive.js';
 import { serializeEmbedding, deserializeEmbedding } from '../../src/utils/embedding-utils.js';
 
@@ -152,7 +149,7 @@ describe('archive', () => {
 
   describe('unencrypted round-trip', () => {
     it('exports and imports all data correctly', async () => {
-      const { embedding } = seedTestData(db);
+      seedTestData(db);
 
       const exportResult = await exportArchive({ outputPath });
       expect(exportResult.chunkCount).toBe(3);

From be0359781f28f989fea8954e1616d587718eb975 Mon Sep 17 00:00:00 2001
From: Greg von Nessi <greg.vonnessi@entrolution.ai>
Date: Mon, 16 Feb 2026 22:25:49 +0000
Subject: [PATCH 3/3] Format with prettier

---
 src/cli/commands/archive.ts  |  5 +--
 src/storage/archive.ts       | 13 +++---
 test/storage/archive.test.ts | 87 ++++++++++++++++++++++++++++--------
 3 files changed, 76 insertions(+), 29 deletions(-)

diff --git a/src/cli/commands/archive.ts b/src/cli/commands/archive.ts
index e536c62..c401112 100644
--- a/src/cli/commands/archive.ts
+++ b/src/cli/commands/archive.ts
@@ -70,10 +70,7 @@ export const exportCommand: Command = {
       `${formatCount(result.clusterCount)} clusters`,
       `${formatCount(result.vectorCount)} vectors`,
     ];
-    const suffix = [
-      result.compressed ? 'compressed' : null,
-      result.encrypted ? 'encrypted' : null,
-    ]
+    const suffix = [result.compressed ? 'compressed' : null, result.encrypted ? 'encrypted' : null]
       .filter(Boolean)
       .join(', ');
 
diff --git a/src/storage/archive.ts b/src/storage/archive.ts
index e49d045..fd8e44e 100644
--- a/src/storage/archive.ts
+++ b/src/storage/archive.ts
@@ -326,9 +326,9 @@ export async function exportArchive(options: ExportOptions): Promise<ExportResul
       SELECT DISTINCT cluster_id FROM chunk_clusters
       WHERE chunk_id IN (${chunkIds.map(() => '?').join(',')})
     `);
-    const clusterIds = (
-      clusterIdsQuery.all(...chunkIds) as Array<{ cluster_id: string }>
-    ).map((r) => r.cluster_id);
+    const clusterIds = (clusterIdsQuery.all(...chunkIds) as Array<{ cluster_id: string }>).map(
+      (r) => r.cluster_id,
+    );
 
     if (clusterIds.length > 0) {
       const clustersQuery = db.prepare(`
@@ -601,9 +601,10 @@ export async function importArchive(options: ImportOptions): Promise<ImportResul
       // Handle both v1.1 (members with distance) and v1.0 compat (memberChunkIds)
       const members: ClusterMember[] =
         cluster.members ??
-        ((cluster as unknown as { memberChunkIds?: string[] }).memberChunkIds)?.map(
-          (id) => ({ chunkId: id, distance: 0 }),
-        ) ??
+        (cluster as unknown as { memberChunkIds?: string[] }).memberChunkIds?.map((id) => ({
+          chunkId: id,
+          distance: 0,
+        })) ??
         [];
       for (const member of members) {
         insertMember.run(member.chunkId, cluster.id, member.distance);
diff --git a/test/storage/archive.test.ts b/test/storage/archive.test.ts
index 49d160b..6c02bb3 100644
--- a/test/storage/archive.test.ts
+++ b/test/storage/archive.test.ts
@@ -29,7 +29,10 @@ import { serializeEmbedding, deserializeEmbedding } from '../../src/utils/embedd
 
 // Helper to create a temp file path
 function tempPath(suffix = '.json'): string {
-  return join(tmpdir(), `causantic-test-${Date.now()}-${Math.random().toString(36).slice(2)}${suffix}`);
+  return join(
+    tmpdir(),
+    `causantic-test-${Date.now()}-${Math.random().toString(36).slice(2)}${suffix}`,
+  );
 }
 
 // Helper to create vectors table in test db
@@ -120,7 +123,11 @@ function seedTestData(db: Database.Database): {
   createVectorsTable(db);
   const embedding = Array.from({ length: 8 }, (_, i) => i * 0.1);
   insertTestVector(db, 'chunk-1', embedding);
-  insertTestVector(db, 'chunk-2', embedding.map((v) => v + 0.01));
+  insertTestVector(
+    db,
+    'chunk-2',
+    embedding.map((v) => v + 0.01),
+  );
 
   return {
     chunkIds: ['chunk-1', 'chunk-2', 'chunk-3'],
@@ -174,7 +181,9 @@ describe('archive', () => {
       expect(importResult.dryRun).toBe(false);
 
       // Verify data
-      const chunks = db.prepare('SELECT * FROM chunks ORDER BY id').all() as Array<Record<string, unknown>>;
+      const chunks = db.prepare('SELECT * FROM chunks ORDER BY id').all() as Array<
+        Record<string, unknown>
+      >;
       expect(chunks).toHaveLength(3);
       expect(chunks[0].id).toBe('chunk-1');
       expect(chunks[0].session_id).toBe('session-1');
@@ -196,12 +205,16 @@ describe('archive', () => {
       expect(clusters[0].description).toBe('Authentication related');
       expect(clusters[0].membership_hash).toBe('abc123');
 
-      const members = db.prepare('SELECT * FROM chunk_clusters ORDER BY chunk_id').all() as Array<Record<string, unknown>>;
+      const members = db.prepare('SELECT * FROM chunk_clusters ORDER BY chunk_id').all() as Array<
+        Record<string, unknown>
+      >;
       expect(members).toHaveLength(2);
       expect(members[0].chunk_id).toBe('chunk-1');
       expect(members[0].distance).toBe(0.3);
 
-      const vectors = db.prepare('SELECT * FROM vectors ORDER BY id').all() as Array<Record<string, unknown>>;
+      const vectors = db.prepare('SELECT * FROM vectors ORDER BY id').all() as Array<
+        Record<string, unknown>
+      >;
       expect(vectors).toHaveLength(2);
     });
   });
@@ -299,7 +312,9 @@ describe('archive', () => {
       const importResult = await importArchive({ inputPath: outputPath });
       expect(importResult.vectorCount).toBe(0);
 
-      const vectors = db.prepare('SELECT COUNT(*) as count FROM vectors').get() as { count: number };
+      const vectors = db.prepare('SELECT COUNT(*) as count FROM vectors').get() as {
+        count: number;
+      };
       expect(vectors.count).toBe(0);
     });
   });
@@ -317,7 +332,10 @@ describe('archive', () => {
 
       await importArchive({ inputPath: outputPath });
 
-      const cluster = db.prepare('SELECT * FROM clusters WHERE id = ?').get('cluster-1') as Record<string, unknown>;
+      const cluster = db.prepare('SELECT * FROM clusters WHERE id = ?').get('cluster-1') as Record<
+        string,
+        unknown
+      >;
       expect(cluster.name).toBe('Auth cluster');
       expect(cluster.description).toBe('Authentication related');
       expect(cluster.membership_hash).toBe('abc123');
@@ -329,7 +347,9 @@ describe('archive', () => {
       expect(centroid[0]).toBeCloseTo(0.1, 5);
 
       // Verify member distances
-      const members = db.prepare('SELECT * FROM chunk_clusters WHERE cluster_id = ? ORDER BY chunk_id').all('cluster-1') as Array<Record<string, unknown>>;
+      const members = db
+        .prepare('SELECT * FROM chunk_clusters WHERE cluster_id = ? ORDER BY chunk_id')
+        .all('cluster-1') as Array<Record<string, unknown>>;
       expect(members).toHaveLength(2);
       expect(members[0].distance).toBe(0.3);
       expect(members[1].distance).toBe(0.5);
@@ -363,9 +383,21 @@ describe('archive', () => {
 
   describe('edge completeness', () => {
     it('excludes edges with one endpoint outside the export', async () => {
-      const chunk1 = createSampleChunk({ id: 'chunk-a', sessionSlug: 'proj-1', sessionId: 'ses-1' });
-      const chunk2 = createSampleChunk({ id: 'chunk-b', sessionSlug: 'proj-1', sessionId: 'ses-1' });
-      const chunk3 = createSampleChunk({ id: 'chunk-c', sessionSlug: 'proj-2', sessionId: 'ses-2' });
+      const chunk1 = createSampleChunk({
+        id: 'chunk-a',
+        sessionSlug: 'proj-1',
+        sessionId: 'ses-1',
+      });
+      const chunk2 = createSampleChunk({
+        id: 'chunk-b',
+        sessionSlug: 'proj-1',
+        sessionId: 'ses-1',
+      });
+      const chunk3 = createSampleChunk({
+        id: 'chunk-c',
+        sessionSlug: 'proj-2',
+        sessionId: 'ses-2',
+      });
       insertTestChunk(db, chunk1);
       insertTestChunk(db, chunk2);
       insertTestChunk(db, chunk3);
@@ -407,7 +439,9 @@ describe('archive', () => {
 
       await importArchive({ inputPath: outputPath });
 
-      const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-1') as { content: string };
+      const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-1') as {
+        content: string;
+      };
       expect(chunk.content).toContain('[REDACTED_PATH]');
       expect(chunk.content).not.toContain('/path/to/file.ts');
     });
@@ -424,7 +458,9 @@ describe('archive', () => {
 
       await importArchive({ inputPath: outputPath });
 
-      const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-2') as { content: string };
+      const chunk = db.prepare('SELECT content FROM chunks WHERE id = ?').get('chunk-2') as {
+        content: string;
+      };
       expect(chunk.content).toContain('[REDACTED_CODE]');
       expect(chunk.content).not.toContain('const x = 1');
     });
@@ -436,7 +472,11 @@ describe('archive', () => {
       await exportArchive({ outputPath });
 
       // Add extra data before import
-      const extra = createSampleChunk({ id: 'chunk-extra', sessionSlug: 'project-a', sessionId: 'ses-x' });
+      const extra = createSampleChunk({
+        id: 'chunk-extra',
+        sessionSlug: 'project-a',
+        sessionId: 'ses-x',
+      });
       insertTestChunk(db, extra);
       expect(
         (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count,
@@ -461,13 +501,18 @@ describe('archive', () => {
       db = createTestDb();
       setupTestDb(db);
       createVectorsTable(db);
-      const other = createSampleChunk({ id: 'chunk-other', sessionSlug: 'project-c', sessionId: 'ses-c' });
+      const other = createSampleChunk({
+        id: 'chunk-other',
+        sessionSlug: 'project-c',
+        sessionId: 'ses-c',
+      });
       insertTestChunk(db, other);
 
       await importArchive({ inputPath: outputPath, merge: true });
 
       // Should have both the imported chunks AND the existing one
-      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count;
+      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number })
+        .count;
       expect(count).toBe(3); // 2 from project-a + 1 existing
     });
   });
@@ -487,7 +532,8 @@ describe('archive', () => {
       expect(result.edgeCount).toBe(1);
 
       // Database should be empty
-      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count;
+      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number })
+        .count;
       expect(count).toBe(0);
     });
   });
@@ -533,7 +579,8 @@ describe('archive', () => {
       expect(result.chunkCount).toBe(1);
       expect(result.vectorCount).toBe(0);
 
-      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number }).count;
+      const count = (db.prepare('SELECT COUNT(*) as count FROM chunks').get() as { count: number })
+        .count;
       expect(count).toBe(1);
     });
 
@@ -584,7 +631,9 @@ describe('archive', () => {
       createVectorsTable(db);
       await importArchive({ inputPath: outputPath });
 
-      const members = db.prepare('SELECT * FROM chunk_clusters').all() as Array<Record<string, unknown>>;
+      const members = db.prepare('SELECT * FROM chunk_clusters').all() as Array<
+        Record<string, unknown>
+      >;
       expect(members).toHaveLength(1);
       expect(members[0].chunk_id).toBe('v1-chunk');
       expect(members[0].distance).toBe(0); // default distance for v1.0