SpillwaveSolutions · RichardHightower · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -80,7 +80,7 @@ jobs:
           shared-key: "test-${{ matrix.os }}"
 
       - name: Run tests
-        run: cargo test --workspace --all-features
+        run: cargo test --workspace --all-features --exclude e2e-tests
 
   build:
     name: Build (${{ matrix.os }})
@@ -139,10 +139,51 @@ jobs:
           RUSTDOCFLAGS: "-D warnings"
         run: cargo doc --no-deps --workspace --all-features
 
+  e2e:
+    name: E2E Tests
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y protobuf-compiler libclang-dev
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo registry
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "e2e"
+
+      - name: Run E2E tests
+        id: e2e_run
+        continue-on-error: true
+        run: cargo test -p e2e-tests --all-features -- --show-output 2>&1 | tee e2e-results.txt
+
+      - name: Report E2E summary
+        if: always()
+        run: |
+          echo "## E2E Test Results" >> $GITHUB_STEP_SUMMARY
+          echo '```' >> $GITHUB_STEP_SUMMARY
+          grep -E "^test |^running |ok|FAILED|test result:" e2e-results.txt >> $GITHUB_STEP_SUMMARY || true
+          echo '```' >> $GITHUB_STEP_SUMMARY
+
+      - name: Check E2E test result
+        if: always()
+        run: |
+          if [[ "${{ steps.e2e_run.outcome }}" != "success" ]]; then
+            echo "E2E tests failed"
+            exit 1
+          fi
+          echo "E2E tests passed!"
+
   # Summary job that depends on all other jobs
   ci-success:
     name: CI Success
-    needs: [fmt, clippy, test, build, doc]
+    needs: [fmt, clippy, test, build, doc, e2e]
     runs-on: ubuntu-24.04
     if: always()
     steps:
@@ -152,7 +193,8 @@ jobs:
              [[ "${{ needs.clippy.result }}" != "success" ]] || \
              [[ "${{ needs.test.result }}" != "success" ]] || \
              [[ "${{ needs.build.result }}" != "success" ]] || \
-             [[ "${{ needs.doc.result }}" != "success" ]]; then
+             [[ "${{ needs.doc.result }}" != "success" ]] || \
+             [[ "${{ needs.e2e.result }}" != "success" ]]; then
             echo "One or more jobs failed"
             exit 1
           fi

diff --git a/.planning/MILESTONES.md b/.planning/MILESTONES.md
@@ -1,5 +1,61 @@
 # Project Milestones: Agent Memory
 
+## v2.2 Production Hardening (Shipped: 2026-02-11)
+
+**Delivered:** Production-hardened system with all stub RPCs wired, 29 E2E tests across 7 files, and dedicated E2E CI job in GitHub Actions required for PR merge.
+
+**Phases completed:** 24-27 (10 plans total)
+
+**Key accomplishments:**
+
+- All gRPC stub RPCs wired (GetRankingStatus, PruneVectorIndex, PruneBm25Index)
+- ListAgents session_count fixed via event scanning (was returning 0)
+- Agent field added to TeleportResult and VectorTeleportMatch for cross-agent attribution
+- 29 E2E tests across 7 files: pipeline, BM25, vector, topic graph, multi-agent, degradation, error paths
+- Dedicated E2E CI job in GitHub Actions with separate pass/fail reporting
+- E2E tests required for PR merge via ci-success gate
+
+**Stats:**
+
+- 43,932 total LOC Rust
+- 4 phases, 10 plans, 17 commits
+- 1 day from start to ship (2026-02-11)
+
+**Git range:** `feat(24-01)` → `feat(27-01)`
+
+**What's next:** Performance benchmarks, cross-project memory, or v2.3 enhancements
+
+---
+
+## v2.1 Multi-Agent Ecosystem (Shipped: 2026-02-10)
+
+**Delivered:** Multi-agent ecosystem with 4 adapter plugins (Claude Code, OpenCode, Gemini CLI, Copilot CLI), cross-agent discovery (agent listing, activity timeline, topic-by-agent), and CLOD universal command format.
+
+**Phases completed:** 18-23 (22 plans total)
+
+**Key accomplishments:**
+
+- Agent tagging infrastructure — Event.agent field, TocNode.contributing_agents, AgentAdapter trait SDK
+- OpenCode plugin — 3 commands, 5 skills, navigator agent, TypeScript event capture plugin
+- OpenCode event capture — agent field through ingest-to-retrieval pipeline, multi-agent query results
+- Gemini CLI adapter — shell hook handler, TOML commands, skills with embedded navigator, install skill
+- Copilot CLI adapter — session ID synthesis, skills, .agent.md navigator, plugin.json manifest
+- Cross-agent discovery — ListAgents/GetAgentActivity RPCs, agent-filtered topics, CLOD spec + converter CLI
+- Comprehensive documentation — cross-agent usage guide, adapter authoring guide, UPGRADING.md
+
+**Stats:**
+
+- 155 files created/modified
+- 31,544 lines added (40,817 total LOC Rust)
+- 6 phases, 22 plans, 76 commits
+- 2 days from start to ship (2026-02-09 → 2026-02-10)
+
+**Git range:** `feat(18-01)` → `docs(phase-23)`
+
+**What's next:** E2E automated tests, performance benchmarks, or v2.2 enhancements
+
+---
+
 ## v2.0.0 Scheduler+Teleport (Shipped: 2026-02-07)
 
 **Delivered:** Full cognitive architecture with layered search (Agentic TOC → BM25 → Vector → Topics), ranking policy (salience, usage, novelty), and retrieval brainstem (intent routing, tier detection, fallback chains).
@@ -54,35 +110,3 @@
 **Git range:** `feat(01-00)` → `feat(08-01)`
 
 **What's next:** Teleport indexes (BM25/vector search), additional hook adapters (OpenCode, Gemini CLI), or production hardening
-
----
-
-## v2.1 Multi-Agent Ecosystem (Shipped: 2026-02-10)
-
-**Delivered:** Multi-agent ecosystem with 4 adapter plugins (Claude Code, OpenCode, Gemini CLI, Copilot CLI), cross-agent discovery (agent listing, activity timeline, topic-by-agent), and CLOD universal command format.
-
-**Phases completed:** 18-23 (22 plans total)
-
-**Key accomplishments:**
-
-- Agent tagging infrastructure — Event.agent field, TocNode.contributing_agents, AgentAdapter trait SDK
-- OpenCode plugin — 3 commands, 5 skills, navigator agent, TypeScript event capture plugin
-- OpenCode event capture — agent field through ingest-to-retrieval pipeline, multi-agent query results
-- Gemini CLI adapter — shell hook handler, TOML commands, skills with embedded navigator, install skill
-- Copilot CLI adapter — session ID synthesis, skills, .agent.md navigator, plugin.json manifest
-- Cross-agent discovery — ListAgents/GetAgentActivity RPCs, agent-filtered topics, CLOD spec + converter CLI
-- Comprehensive documentation — cross-agent usage guide, adapter authoring guide, UPGRADING.md
-
-**Stats:**
-
-- 155 files created/modified
-- 31,544 lines added (40,817 total LOC Rust)
-- 6 phases, 22 plans, 76 commits
-- 2 days from start to ship (2026-02-09 → 2026-02-10)
-
-**Git range:** `feat(18-01)` → `docs(phase-23)`
-
-**What's next:** E2E automated tests, performance benchmarks, or v2.2 enhancements
-
----
-
diff --git a/.planning/PROJECT.md b/.planning/PROJECT.md
@@ -2,10 +2,10 @@
 
 ## Current State
 
-**Version:** v2.1 (Shipped 2026-02-10)
-**Status:** Multi-agent ecosystem complete — 4 adapters, cross-agent discovery, CLOD format
+**Version:** v2.2 (Shipped 2026-02-11)
+**Status:** Production-hardened — all RPCs wired, 29 E2E tests, CI/CD with dedicated E2E job
 
-The system implements a complete 6-layer cognitive stack with control plane and multi-agent support:
+The system implements a complete 6-layer cognitive stack with control plane, multi-agent support, and production verification:
 - Layer 0: Raw Events (RocksDB) — agent-tagged
 - Layer 1: TOC Hierarchy (time-based navigation) — contributing_agents tracking
 - Layer 2: Agentic TOC Search (index-free, always works)
@@ -16,17 +16,10 @@ The system implements a complete 6-layer cognitive stack with control plane and
 - Control: Retrieval Policy (intent routing, tier detection, fallbacks)
 - Adapters: Claude Code, OpenCode, Gemini CLI, Copilot CLI
 - Discovery: ListAgents, GetAgentActivity, agent-filtered topics
+- Testing: 29 E2E tests covering all layers + multi-agent + degradation + error paths
+- CI/CD: Dedicated E2E job in GitHub Actions, required for PR merge
 
-40,817 LOC Rust across 14 crates. 4 adapter plugins. 3 documentation guides.
-
-## Current Milestone: v2.2 Production Hardening
-
-**Goal:** Make Agent Memory CI-verified and production-ready by closing all tech debt, adding E2E pipeline tests, and strengthening CI/CD.
-
-**Target features:**
-- E2E test suite (ingest → TOC build → grip creation → query route → results)
-- Tech debt cleanup (wire stub RPCs, fix session_count, agent field on teleport results)
-- CI/CD improvements (E2E tests in GitHub Actions)
+43,932 LOC Rust across 14 crates. 4 adapter plugins. 3 documentation guides. 29 E2E tests.
 
 ## What This Is
 
@@ -167,20 +160,20 @@ Agent Memory implements a layered cognitive architecture:
 
 </details>
 
-### Active (v2.2 Production Hardening)
+### Validated (v2.2 - Shipped 2026-02-11)
 
-**E2E Testing**
-- [ ] Full pipeline E2E tests (ingest → TOC → grips → query → results)
-- [ ] E2E tests run in CI (GitHub Actions)
+**Production Hardening (v2.2)**
+- [x] All gRPC stub RPCs wired (GetRankingStatus, PruneVectorIndex, PruneBm25Index) — v2.2
+- [x] ListAgents session_count fixed via event scanning — v2.2
+- [x] Agent field on TeleportResult and VectorTeleportMatch — v2.2
+- [x] 29 E2E tests across 7 files (pipeline, BM25, vector, topic, multi-agent, degradation, error paths) — v2.2
+- [x] Dedicated E2E CI job in GitHub Actions with separate pass/fail reporting — v2.2
+- [x] E2E tests run on every PR, required for merge via ci-success gate — v2.2
 
-**Tech Debt Cleanup**
-- [ ] Wire GetRankingStatus, PruneVectorIndex, PruneBm25Index stub RPCs
-- [ ] Fix session_count in ListAgents (event scanning, not TOC-only)
-- [ ] Add agent field to TeleportResult and VectorTeleportMatch
-- [ ] CI/CD pipeline improvements
+### Active (future)
 
-**Deferred (future)**
-- Performance benchmarks
+**Deferred**
+- Performance benchmarks (ingest throughput, query latency)
 - Cross-project unified memory
 
 ### Out of Scope
@@ -257,6 +250,10 @@ CLI client and agent skill query the daemon. Agent receives TOC navigation tools
 | O(k) agent discovery | Aggregate from TocNode.contributing_agents, not O(n) events | ✓ Validated v2.1 |
 | CLOD as internal format | TOML-based portable command definition, not external standard | ✓ Validated v2.1 |
 | Skills portable across agents | Same SKILL.md works in Claude/OpenCode/Copilot | ✓ Validated v2.1 |
+| E2E tests via cargo test | Standard test infra, no separate framework | ✓ Validated v2.2 |
+| Direct handler testing | tonic::Request without gRPC server; faster, simpler | ✓ Validated v2.2 |
+| Dedicated E2E CI job | Separate from unit tests; clear reporting per CI-03 | ✓ Validated v2.2 |
+| BM25 prune report-only | TeleportSearcher is read-only; deletion needs SearchIndexer | — Design decision v2.2 |
 
 ---
-*Last updated: 2026-02-10 after v2.2 milestone initialization*
+*Last updated: 2026-02-11 after v2.2 milestone completion*
diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md
@@ -5,7 +5,7 @@
 - ✅ **v1.0 MVP** — Phases 1-9 (shipped 2026-01-30)
 - ✅ **v2.0 Scheduler+Teleport** — Phases 10-17 (shipped 2026-02-07)
 - ✅ **v2.1 Multi-Agent Ecosystem** — Phases 18-23 (shipped 2026-02-10)
-- **v2.2 Production Hardening** — Phases 24-27 (in progress)
+- ✅ **v2.2 Production Hardening** — Phases 24-27 (shipped 2026-02-11)
 
 ## Phases
 
@@ -56,82 +56,27 @@ See: `.planning/milestones/v2.1-ROADMAP.md`
 
 </details>
 
-### v2.2 Production Hardening (In Progress)
-
-**Milestone Goal:** Make Agent Memory CI-verified and production-ready by closing all tech debt, adding E2E pipeline tests, and strengthening CI/CD.
-
-- [x] **Phase 24: Proto & Service Debt Cleanup** (3/3 plans) -- completed 2026-02-11
-- [x] **Phase 25: E2E Core Pipeline Tests** (3/3 plans) -- completed 2026-02-11
-- [ ] **Phase 26: E2E Advanced Scenario Tests** - Multi-agent, graceful degradation, and error path tests
-- [ ] **Phase 27: CI/CD E2E Integration** - E2E tests running in GitHub Actions on every PR
-
-## Phase Details
-
-### Phase 24: Proto & Service Debt Cleanup
-**Goal**: All gRPC RPCs are fully wired and return real data; teleport results include agent attribution
-**Depends on**: Nothing (standalone tech debt work)
-**Requirements**: DEBT-01, DEBT-02, DEBT-03, DEBT-04, DEBT-05, DEBT-06
-**Success Criteria** (what must be TRUE):
-  1. GetRankingStatus RPC returns the current ranking configuration (salience weights, decay settings) instead of an unimplemented error
-  2. PruneVectorIndex and PruneBm25Index RPCs trigger actual index cleanup and return a status indicating what was pruned
-  3. ListAgents RPC returns accurate session_count by scanning events, not just TOC nodes
-  4. TeleportResult and VectorTeleportMatch proto messages include an agent field populated from event metadata
-**Plans:** 3 plans
-Plans:
-- [ ] 24-01-PLAN.md -- Wire GetRankingStatus RPC + fix ListAgents session_count
-- [ ] 24-02-PLAN.md -- Add agent field to teleport and vector search results
-- [ ] 24-03-PLAN.md -- Wire PruneVectorIndex and PruneBm25Index RPCs
-
-### Phase 25: E2E Core Pipeline Tests
-**Goal**: The core ingest-to-query pipeline is verified end-to-end by automated tests covering every search layer
-**Depends on**: Phase 24 (agent fields and wired RPCs needed for complete assertions)
-**Requirements**: E2E-01, E2E-02, E2E-03, E2E-04, E2E-07
-**Success Criteria** (what must be TRUE):
-  1. A test ingests events, triggers TOC segment build with grips, and verifies route_query returns results with correct provenance
-  2. A test ingests events, builds BM25 index, and verifies bm25_search returns matching events ranked by relevance
-  3. A test ingests events, builds vector index, and verifies vector_search returns semantically similar events
-  4. A test ingests events, runs topic clustering, and verifies get_top_topics returns relevant topics
-  5. A test ingests events with grips, calls expand_grip, and verifies source events with surrounding context are returned
-**Plans:** 3 plans
-Plans:
-- [ ] 25-01-PLAN.md -- E2E crate setup + full pipeline test + grip provenance test
-- [ ] 25-02-PLAN.md -- BM25 teleport search E2E test with relevance ranking
-- [ ] 25-03-PLAN.md -- Vector semantic search + topic graph E2E tests
-
-### Phase 26: E2E Advanced Scenario Tests
-**Goal**: Edge cases and multi-agent scenarios are verified: cross-agent queries, fallback chains, and error handling all work correctly
-**Depends on**: Phase 25 (builds on core test infrastructure and helpers)
-**Requirements**: E2E-05, E2E-06, E2E-08
-**Success Criteria** (what must be TRUE):
-  1. A test ingests events from multiple agents, verifies cross-agent query returns all results, and filtered query returns only the specified agent's results
-  2. A test queries with missing indexes and verifies the system degrades gracefully to TOC-based fallback, still returning useful results
-  3. A test sends malformed events and invalid queries, verifying graceful error responses (no panics, useful error messages)
-**Plans**: TBD
-
-### Phase 27: CI/CD E2E Integration
-**Goal**: E2E tests run automatically in GitHub Actions on every PR, with clear pass/fail reporting
-**Depends on**: Phase 25, Phase 26 (E2E tests must exist before CI can run them)
-**Requirements**: CI-01, CI-02, CI-03
-**Success Criteria** (what must be TRUE):
-  1. GitHub Actions CI pipeline includes an E2E test job that runs the full E2E suite
-  2. The E2E job triggers on pull requests to main (not just pushes to main)
-  3. CI output shows E2E test count and individual pass/fail status separately from unit/integration tests
-**Plans**: TBD
+<details>
+<summary>v2.2 Production Hardening (Phases 24-27) -- SHIPPED 2026-02-11</summary>
 
-## Progress
+- [x] Phase 24: Proto & Service Debt Cleanup (3/3 plans) -- completed 2026-02-11
+- [x] Phase 25: E2E Core Pipeline Tests (3/3 plans) -- completed 2026-02-11
+- [x] Phase 26: E2E Advanced Scenario Tests (3/3 plans) -- completed 2026-02-11
+- [x] Phase 27: CI/CD E2E Integration (1/1 plan) -- completed 2026-02-11
+
+See: `.planning/milestones/v2.2-ROADMAP.md`
 
-**Execution Order:** 24 -> 25 -> 26 -> 27
+</details>
+
+## Progress
 
 | Phase | Milestone | Plans | Status | Completed |
 |-------|-----------|-------|--------|-----------|
 | 1-9 | v1.0 | 20/20 | Complete | 2026-01-30 |
 | 10-17 | v2.0 | 42/42 | Complete | 2026-02-07 |
 | 18-23 | v2.1 | 22/22 | Complete | 2026-02-10 |
-| 24. Proto & Service Debt Cleanup | v2.2 | 3/3 | Complete | 2026-02-11 |
-| 25. E2E Core Pipeline Tests | v2.2 | 3/3 | Complete | 2026-02-11 |
-| 26. E2E Advanced Scenario Tests | v2.2 | 0/TBD | Not started | - |
-| 27. CI/CD E2E Integration | v2.2 | 0/TBD | Not started | - |
+| 24-27 | v2.2 | 10/10 | Complete | 2026-02-11 |
 
 ---
 
-*Updated: 2026-02-10 after v2.2 roadmap creation*
+*Updated: 2026-02-11 after v2.2 milestone completion*