From 93bd03da4b1bd48a329bbe36894b64beed2c4fb7 Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Sat, 6 Dec 2025 05:18:21 +0800 Subject: [PATCH 01/15] docs: add PydanticAI as agent framework choice --- DEVELOPMENT_PLAN.md | 6 +++--- README.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md index 5762910..5188305 100644 --- a/DEVELOPMENT_PLAN.md +++ b/DEVELOPMENT_PLAN.md @@ -130,9 +130,9 @@ agent/src/ ### Python Module (`memory_service/`) Dependencies: +- `pydantic-ai` (agent framework) +- `litellm` (multi-provider LLM support) - `onnxruntime` (embeddings) -- `sentence-transformers` (model loading) -- `anthropic` (agent LLM) - `pydantic` (schemas) Directory structure: @@ -308,7 +308,7 @@ async def handle_connection(reader, writer): ### C2. Squirrel Agent (`agent.py`) -Single LLM-powered agent with tools. Uses small fast model (Haiku/GPT-4o-mini). +Single LLM-powered agent with tools using PydanticAI framework. Uses cheap fast model (Gemini Flash/DeepSeek). ```python class SquirrelAgent: diff --git a/README.md b/README.md index 2b23cfb..43aeee3 100644 --- a/README.md +++ b/README.md @@ -224,9 +224,9 @@ sqrl config set llm.model claude-sonnet ```toml # ~/.sqrl/config.toml [llm] -api_key = "sk-ant-..." -model = "claude-sonnet-4-20250514" -small_model = "claude-haiku" # For agent operations +provider = "gemini" # gemini | deepseek | openai | anthropic +api_key = "..." +model = "gemini-2.5-flash" # Primary model for agent [daemon] idle_timeout_hours = 2 # Stop after N hours inactive From d5840eb0f0bc78e614104f16912327eb185dadaf Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Sat, 6 Dec 2025 12:58:37 +0800 Subject: [PATCH 02/15] docs: add 2-tier LLM design and context compose flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- DEVELOPMENT_PLAN.md | 23 +++++++++-- EXAMPLE.md | 93 +++++++++++++++++++++++++++------------------ README.md | 29 +++++++++++--- 3 files changed, 97 insertions(+), 48 deletions(-) diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md index 5188305..a1516d7 100644 --- a/DEVELOPMENT_PLAN.md +++ b/DEVELOPMENT_PLAN.md @@ -50,7 +50,7 @@ Modular development plan with Rust daemon + Python Agent communicating via Unix β”‚ └── IPC: CLI command β†’ agent executes β”‚ β”‚ β”‚ β”‚ ONNX Embeddings (all-MiniLM-L6-v2, 384-dim) β”‚ -β”‚ Retrieval (similarity + importance + recency scoring) β”‚ +β”‚ 2-tier LLM: strong (ingest) + fast (compose, CLI, dedup) β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` @@ -241,7 +241,8 @@ Paths: - `/.sqrl/` (project) Config fields: -- llm.api_key, llm.model, llm.small_model +- llm.provider, llm.api_key, llm.base_url +- llm.strong_model, llm.fast_model (2-tier design) - daemon.idle_timeout_hours (default: 2) - daemon.socket_path @@ -308,7 +309,14 @@ async def handle_connection(reader, writer): ### C2. Squirrel Agent (`agent.py`) -Single LLM-powered agent with tools using PydanticAI framework. Uses cheap fast model (Gemini Flash/DeepSeek). +Single LLM-powered agent with tools using PydanticAI framework. Uses 2-tier LLM design: + +| Task | Model Tier | Default | +|------|------------|---------| +| Episode Ingestion | strong_model | gemini-2.5-pro | +| Context Compose | fast_model | gemini-3-flash | +| CLI Interpretation | fast_model | gemini-3-flash | +| Near-duplicate Check | fast_model | gemini-3-flash | ```python class SquirrelAgent: @@ -388,7 +396,14 @@ UserProfile schema: **search_memories(query, filters):** Embed query, sqlite-vec search, return ranked results -**get_task_context(task, budget):** Search + score (similarity + importance + recency) + select within token budget + generate "why" explanations +**get_task_context(task, budget):** +1. Vector search retrieves top 20 candidates +2. LLM (fast_model) reranks + composes context prompt: + - Selects relevant memories + - Resolves conflicts + - Merges related memories + - Generates structured prompt with memory IDs +3. Returns ready-to-inject context prompt within token budget **forget_memory(id):** Soft-delete (set state='deleted') diff --git a/EXAMPLE.md b/EXAMPLE.md index 12b75be..b94851e 100644 --- a/EXAMPLE.md +++ b/EXAMPLE.md @@ -295,64 +295,79 @@ Claude Code (or other AI tool) calls Squirrel via MCP: } ``` -### Step 4.2: Agent Retrieves Context +### Step 4.2: Vector Search (Candidate Retrieval) -The agent receives the MCP request via IPC and uses its retrieval tools: +The agent receives the MCP request via IPC and retrieves candidates: ```python async def get_task_context(project_root: str, task: str, budget: int) -> dict: - # For trivial queries, return empty fast + # For trivial queries, return empty fast (<20ms) if is_trivial_task(task): # "fix typo", "add comment" - return {"memories": [], "tokens_used": 0} + return {"context_prompt": "", "memory_ids": [], "tokens_used": 0} - # Retrieve candidates from both DBs + # Vector search retrieves top 20 candidates from both DBs candidates = await retrieval.search( task=task, project_root=project_root, - include_global=True # user_style from global DB + include_global=True, # user_style from global DB + top_k=20 ) + # candidates now contains ~20 memories ranked by embedding similarity +``` + +### Step 4.3: LLM Rerank + Compose (fast_model) - # Score by: similarity + importance + recency - scored = score_candidates(candidates, task) - selected = select_within_budget(scored, budget) +LLM reranks candidates and composes a context prompt in ONE call: + +```python + # LLM reranks + composes context prompt (uses fast_model) + response = await llm.call( + model=config.fast_model, # gemini-3-flash + prompt=COMPOSE_PROMPT.format( + task=task, + candidates=format_candidates(candidates), + budget=budget + ) + ) return { - "memories": [ - { - "type": m.memory_type, - "content": m.content, - "importance": m.importance, - "why": generate_explanation(m, task) - } - for m in selected - ], - "tokens_used": count_tokens(selected) + "context_prompt": response.prompt, # Ready-to-inject text + "memory_ids": response.selected_ids, # For tracing + "tokens_used": count_tokens(response.prompt) } ``` -### Step 4.3: Response +COMPOSE_PROMPT: +``` +Task: {task} + +Candidate memories (ranked by similarity): +{candidates} + +Select the most relevant memories for this task. Then compose a context prompt that: +1. Prioritizes pitfalls (what NOT to do) first +2. Includes relevant recipes and project_facts +3. Resolves conflicts between memories (newer wins) +4. Merges related memories to save tokens +5. Stays within {budget} tokens + +Return: +- selected_ids: list of memory IDs you selected +- prompt: the composed context prompt for the AI tool +``` + +### Step 4.4: Response ```json { - "task": "Add a delete endpoint for inventory items", - "memories": [ - { - "type": "user_style", - "content": "Prefers async/await with type hints for all handlers", - "importance": "high", - "why": "Relevant because you're adding an HTTP endpoint" - }, - { - "type": "project_fact", - "content": "Uses FastAPI with Pydantic models", - "importance": "medium", - "why": "Relevant because delete endpoint needs proper response model" - } - ], - "tokens_used": 156 + "context_prompt": "## Context from Squirrel\n\n**Style Preferences:**\n- Use async/await with type hints for all handlers [mem_abc123]\n\n**Project Facts:**\n- This project uses FastAPI with Pydantic models [mem_def456]\n\n**Relevant for this task:** You're adding an HTTP endpoint, so follow the async pattern and define a Pydantic response model.", + "memory_ids": ["mem_abc123", "mem_def456"], + "tokens_used": 89 } ``` +The AI tool injects this `context_prompt` directly into its system prompt for better responses. + --- ## Phase 5: Daemon Lifecycle @@ -445,7 +460,7 @@ CREATE TABLE user_profile ( | **Success Detection** | Agent segments Tasks, classifies SUCCESS/FAILURE/UNCERTAIN | | Extraction | SUCCESSβ†’recipe/project_fact, FAILUREβ†’pitfall, UNCERTAINβ†’skip | | Dedup | Near-duplicate check (0.9 similarity) before ADD | -| Retrieval | MCP β†’ Agent scores by similarity+importance+recency β†’ returns within budget | +| Retrieval | MCP β†’ Vector search (top 20) β†’ LLM reranks + composes context prompt | | Idle | 2hr no activity β†’ daemon stops, next command restarts | ### Why Success Detection Matters @@ -464,11 +479,13 @@ This is the core insight: passive learning REQUIRES outcome classification. | Decision | Choice | Why | |----------|--------|-----| | **Unified Agent** | Single Python agent with tools | One LLM brain for all operations | +| **2-tier LLM** | strong_model + fast_model | Pro for complex reasoning, Flash for quick tasks | | **Lazy Daemon** | Start on command, stop after 2hr idle | No system service complexity | | Episode trigger | 4-hour window OR 50 events | Balance context vs LLM cost | -| Success detection | LLM classifies outcomes | Core insight for passive learning | +| Success detection | LLM classifies outcomes (strong_model) | Core insight for passive learning | | Task segmentation | LLM decides, no rules engine | Simple, semantic understanding | | Memory extraction | Outcome-based | Learn from both success and failure | +| **Context compose** | LLM reranks + generates prompt (fast_model) | Better than math scoring, one call | | **Natural language CLI** | Thin shell passes to agent | "By the way" - agent handles all | | **Retroactive ingestion** | Token-limited, not time-limited | Fair for all project sizes | | User profile | Separate table from user_style | Structured vs unstructured | diff --git a/README.md b/README.md index 43aeee3..cb4d513 100644 --- a/README.md +++ b/README.md @@ -151,11 +151,15 @@ Near-duplicate check (0.9 threshold) β†’ store or merge ``` Claude Code calls MCP: squirrel_get_task_context ↓ -Agent retrieves relevant memories +Vector search retrieves candidate memories (top 20) ↓ -Scores by: similarity + importance + recency +LLM reranks candidates + composes context prompt: + - Selects relevant memories + - Resolves conflicts between memories + - Merges related memories + - Generates structured prompt with memory IDs ↓ -Returns within token budget + "why" explanations +Returns ready-to-inject context prompt ↓ Claude Code uses context for better response ``` @@ -224,14 +228,27 @@ sqrl config set llm.model claude-sonnet ```toml # ~/.sqrl/config.toml [llm] -provider = "gemini" # gemini | deepseek | openai | anthropic +provider = "gemini" # gemini | openai | anthropic | ollama | ... api_key = "..." -model = "gemini-2.5-flash" # Primary model for agent +base_url = "" # Optional, for local models (Ollama, LMStudio) + +# 2-tier model design +strong_model = "gemini-2.5-pro" # Complex reasoning (episode ingestion) +fast_model = "gemini-3-flash" # Fast tasks (context compose, CLI, dedup) [daemon] idle_timeout_hours = 2 # Stop after N hours inactive ``` +### LLM Usage + +| Task | Model | Why | +|------|-------|-----| +| Episode Ingestion | strong_model | Multi-step reasoning over ~10K tokens | +| Context Compose | fast_model | Rerank + generate structured prompt | +| CLI Interpretation | fast_model | Parse natural language commands | +| Near-duplicate Check | fast_model | Simple comparison | + ## Project Structure ``` @@ -300,7 +317,7 @@ source .venv/bin/activate && pytest - Cross-platform (Mac, Linux, Windows) - `sqrl update` command -**v1.1:** Auto-update, LLM-based retrieval reranking, memory consolidation +**v1.1:** Auto-update, memory consolidation, retrieval debugging tools **v2:** Hooks output, file injection (AGENTS.md/GEMINI.md), cloud sync, team sharing From eee485af33f291ca27a0045accffe0c8dd974cb6 Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Sun, 7 Dec 2025 23:27:14 +0800 Subject: [PATCH 03/15] docs: add 3-layer storage architecture and team features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add individual vs team memory separation (squirrel.db vs group.db) - Add 6 new team memory types (team_style, team_profile, shared_*, team_process) - Add process memory type for audit/export - Add team commands (sqrl share, sqrl export, sqrl team) - Add team DB options (cloud/self-hosted/local) - Merge v1.1 features into v1 scope - Split v1 into Individual (free) and Team (paid) features πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- DEVELOPMENT_PLAN.md | 120 ++++++++++++++++++++++++++++++++++------- EXAMPLE.md | 128 ++++++++++++++++++++++++++++++++++++++++---- README.md | 112 +++++++++++++++++++++++++++++--------- 3 files changed, 306 insertions(+), 54 deletions(-) diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md index a1516d7..bb50579 100644 --- a/DEVELOPMENT_PLAN.md +++ b/DEVELOPMENT_PLAN.md @@ -49,7 +49,7 @@ Modular development plan with Rust daemon + Python Agent communicating via Unix β”‚ β”œβ”€β”€ IPC: MCP tool call β†’ agent retrieves β”‚ β”‚ └── IPC: CLI command β†’ agent executes β”‚ β”‚ β”‚ -β”‚ ONNX Embeddings (all-MiniLM-L6-v2, 384-dim) β”‚ +β”‚ API Embeddings (text-embedding-3-small, 1536-dim) β”‚ β”‚ 2-tier LLM: strong (ingest) + fast (compose, CLI, dedup) β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` @@ -132,7 +132,7 @@ agent/src/ Dependencies: - `pydantic-ai` (agent framework) - `litellm` (multi-provider LLM support) -- `onnxruntime` (embeddings) +- `openai` (embeddings API client) - `pydantic` (schemas) Directory structure: @@ -148,7 +148,7 @@ memory_service/ β”‚ β”‚ β”œβ”€β”€ filesystem.py # find_cli_configs, read/write_file β”‚ β”‚ β”œβ”€β”€ config.py # init_project, mcp_config, user_profile β”‚ β”‚ └── db.py # query, add, update memories -β”‚ β”œβ”€β”€ embeddings.py # ONNX embeddings +β”‚ β”œβ”€β”€ embeddings.py # API embeddings (OpenAI, etc.) β”‚ β”œβ”€β”€ retrieval.py # Similarity search β”‚ └── schemas/ └── tests/ @@ -162,14 +162,14 @@ memory_service/ SQLite + sqlite-vec initialization: ```sql --- memories table +-- memories table (individual DB: squirrel.db) CREATE TABLE memories ( id TEXT PRIMARY KEY, content_hash TEXT NOT NULL UNIQUE, content TEXT NOT NULL, - memory_type TEXT NOT NULL, -- user_style | project_fact | pitfall | recipe + memory_type TEXT NOT NULL, -- user_style | user_profile | process | pitfall | recipe | project_fact repo TEXT NOT NULL, -- repo path OR 'global' - embedding BLOB, + embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) confidence REAL NOT NULL, importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low state TEXT NOT NULL DEFAULT 'active', -- active | deleted @@ -180,6 +180,25 @@ CREATE TABLE memories ( deleted_at TEXT ); +-- team memories table (group DB: group.db) - paid tier +CREATE TABLE team_memories ( + id TEXT PRIMARY KEY, + content_hash TEXT NOT NULL UNIQUE, + content TEXT NOT NULL, + memory_type TEXT NOT NULL, -- team_style | team_profile | team_process | shared_pitfall | shared_recipe | shared_fact + repo TEXT NOT NULL, -- repo path OR 'global' + embedding BLOB, -- 1536-dim float32 + confidence REAL NOT NULL, + importance TEXT NOT NULL DEFAULT 'medium', + state TEXT NOT NULL DEFAULT 'active', + team_id TEXT NOT NULL, -- team identifier + contributed_by TEXT NOT NULL, -- user who shared this + source_memory_id TEXT, -- original individual memory ID (if promoted) + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + deleted_at TEXT +); + -- events table CREATE TABLE events ( id TEXT PRIMARY KEY, @@ -240,11 +259,20 @@ Paths: - `~/.sqrl/` (global) - `/.sqrl/` (project) +Files: +- `squirrel.db` - individual memories (free) +- `group.db` - team memories (paid, synced) +- `config.toml` - settings + Config fields: - llm.provider, llm.api_key, llm.base_url - llm.strong_model, llm.fast_model (2-tier design) +- embedding.provider, embedding.model (default: openai/text-embedding-3-small) - daemon.idle_timeout_hours (default: 2) - daemon.socket_path +- team.enabled, team.team_id (paid tier) +- team.sync_mode (cloud | self-hosted | local) +- team.sync_url (for self-hosted) --- @@ -378,10 +406,17 @@ UUIDβ†’integer mapping when showing existing memories to LLM (prevents hallucina ### C4. Schemas (`schemas/`) -Memory schema: +Memory schema (individual): - id, content_hash, content, memory_type, repo, embedding - confidence, importance, state, user_id, assistant_id - created_at, updated_at, deleted_at +- memory_type: user_style | user_profile | process | pitfall | recipe | project_fact + +TeamMemory schema (group, paid): +- id, content_hash, content, memory_type, repo, embedding +- confidence, importance, state, team_id, contributed_by, source_memory_id +- created_at, updated_at, deleted_at +- memory_type: team_style | team_profile | team_process | shared_pitfall | shared_recipe | shared_fact UserProfile schema: - key, value, source (explicit|inferred), confidence, updated_at @@ -394,19 +429,28 @@ UserProfile schema: **ingest_episode(events):** LLM analysis, task segmentation, outcome classification, memory extraction -**search_memories(query, filters):** Embed query, sqlite-vec search, return ranked results +**search_memories(query, filters):** Embed query, sqlite-vec search, return ranked results (searches both individual and team DBs) **get_task_context(task, budget):** -1. Vector search retrieves top 20 candidates +1. Vector search retrieves top 20 candidates (from both individual and team DBs) 2. LLM (fast_model) reranks + composes context prompt: - Selects relevant memories - - Resolves conflicts + - Resolves conflicts (team memories may override individual) - Merges related memories - Generates structured prompt with memory IDs 3. Returns ready-to-inject context prompt within token budget **forget_memory(id):** Soft-delete (set state='deleted') +**share_memory(id, target_type):** Promote individual memory to team DB (manual, opt-in) +- Copy memory from squirrel.db to group.db +- Set contributed_by, source_memory_id +- Optional type conversion (e.g., pitfall β†’ shared_pitfall) + +**export_memories(filters, format):** Export memories as JSON for sharing/backup + +**import_memories(data):** Import memories from JSON + ### D2. Filesystem Tools (`tools/filesystem.py`) **find_cli_configs():** Scan for ~/.claude, ~/.codex-cli, ~/.gemini, etc. @@ -426,6 +470,16 @@ UserProfile schema: **get_user_profile(), set_user_profile(key, value):** Manage user_profile table +### D3.1 Team Tools (`tools/team.py`) - Paid Tier + +**team_join(team_id):** Join a team, enable sync + +**team_create(name):** Create new team, get team_id + +**team_sync():** Force sync with cloud/self-hosted server + +**team_leave():** Leave team, keep local copies of team memories + ### D4. DB Tools (`tools/db.py`) **query_memories(filters):** Direct DB query with filtering @@ -438,9 +492,9 @@ UserProfile schema: ### D5. Embeddings (`embeddings.py`) -ONNX runtime with all-MiniLM-L6-v2 (384-dim). +API-based embeddings via OpenAI (text-embedding-3-small, 1536-dim). -Batch embedding, model cached in memory. +Supports multiple providers via config. Batch embedding with retry logic. ### D6. Retrieval (`retrieval.py`) @@ -497,6 +551,14 @@ Supports both natural language and direct commands: - `sqrl init --skip-history` β†’ agent interprets - `sqrl update` β†’ self-update binary +Team commands (paid tier): +- `sqrl share ` β†’ promote individual memory to team +- `sqrl export ` β†’ export memories as JSON +- `sqrl import ` β†’ import memories +- `sqrl team join ` β†’ join team +- `sqrl team create ` β†’ create team +- `sqrl team sync` β†’ force sync + --- ## Phase X – Hardening @@ -554,19 +616,37 @@ Supports both natural language and direct commands: --- -## v1.1 Scope (Future) - -- Auto-update (background check + apply on restart) -- LLM-based retrieval reranking for complex queries -- Memory consolidation (periodic merging of similar memories) -- `sqrl debug` command for retrieval debugging +## v1 Scope + +**Individual Features (Free):** +- Passive log watching (4 CLIs) +- Success detection (SUCCESS/FAILURE/UNCERTAIN) +- Unified Python agent with tools +- Natural language CLI +- MCP integration (2 tools) +- Lazy daemon (start on demand, stop after 2hr idle) +- Retroactive log ingestion on init (token-limited) +- 6 memory types (user_style, user_profile, process, pitfall, recipe, project_fact) +- Near-duplicate deduplication (0.9 threshold) +- Cross-platform (Mac, Linux, Windows) +- Export/import memories (JSON) +- Auto-update (`sqrl update`) +- Memory consolidation +- Retrieval debugging tools + +**Team Features (Paid):** +- Cloud sync for group.db +- Team memory types (team_style, team_profile, shared_*, team_process) +- `sqrl share` command (promote individual to team) +- Team management (create, join, sync) +- Self-hosted option for enterprise ## v2 Scope (Future) - Hooks output for Claude Code / Gemini CLI - File injection for AGENTS.md / GEMINI.md -- Cloud sync (user_id/assistant_id fields prepared) -- Team memory sharing +- Team analytics dashboard +- Memory marketplace (export/sell recipe packs) - Web dashboard --- diff --git a/EXAMPLE.md b/EXAMPLE.md index b94851e..7528ecc 100644 --- a/EXAMPLE.md +++ b/EXAMPLE.md @@ -8,7 +8,7 @@ Squirrel watches AI tool logs, groups events into **Episodes** (4-hour time wind 1. **Segment Tasks** - Identify distinct user goals within the episode 2. **Classify Outcomes** - SUCCESS | FAILURE | UNCERTAIN for each task -3. **Extract Memories** - SUCCESSβ†’recipe/project_fact, FAILUREβ†’pitfall, UNCERTAINβ†’skip +3. **Extract Memories** - SUCCESSβ†’recipe/project_fact, FAILUREβ†’pitfall, ALLβ†’process, UNCERTAINβ†’skip Episode = batch of events from same repo within 4-hour window (internal batching, not a product concept). @@ -60,11 +60,13 @@ File structure after init: ``` ~/.sqrl/ β”œβ”€β”€ config.toml # API keys, settings -β”œβ”€β”€ squirrel.db # Global SQLite (user_style, user_profile) +β”œβ”€β”€ squirrel.db # Global individual (user_style, user_profile) +β”œβ”€β”€ group.db # Global team (team_style, team_profile) - synced, paid └── logs/ # Daemon logs ~/projects/inventory-api/.sqrl/ -└── squirrel.db # Project SQLite (project_fact, pitfall, recipe) +β”œβ”€β”€ squirrel.db # Project individual (process, pitfall, recipe, project_fact) +└── group.db # Project team (shared_*, team_process) - synced, paid ``` ### Step 1.3: Natural Language CLI @@ -199,6 +201,12 @@ async def ingest_episode(episode: dict) -> dict: "content": "Prefers async/await with type hints for all handlers", "importance": "high", "repo": "global", + }, + { + "type": "process", # Always recorded for export + "content": "Added GET /items/category endpoint with async handler, type hints, pytest fixture", + "importance": "medium", + "repo": "/Users/alice/projects/inventory-api", } ] }, @@ -218,6 +226,12 @@ async def ingest_episode(episode: dict) -> dict: "content": "For auth redirect loops, fix useEffect cleanup to prevent re-triggering on token refresh", "importance": "high", "repo": "/Users/alice/projects/inventory-api", + }, + { + "type": "process", # Always recorded for export + "content": "Tried localStorage fix (failed), tried cookies fix (failed), useEffect cleanup fix worked", + "importance": "medium", + "repo": "/Users/alice/projects/inventory-api", } ] } @@ -251,8 +265,9 @@ Analyze this session: 3. For SUCCESS tasks: extract recipe (reusable pattern) or project_fact memories 4. For FAILURE tasks: extract pitfall memories (what NOT to do) 5. For tasks with failed attempts before success: extract BOTH pitfall AND recipe +6. For ALL tasks: extract process memory (what happened, for export/audit) -Return only high-confidence memories. When in doubt, skip. +Return only high-confidence memories. When in doubt, skip (except process - always record). ``` ### Step 3.2: Near-Duplicate Check + Save Memory @@ -414,16 +429,16 @@ CREATE TABLE events ( ); ``` -### Memory +### Individual Memory (squirrel.db - Free) ```sql CREATE TABLE memories ( id TEXT PRIMARY KEY, content_hash TEXT NOT NULL UNIQUE, content TEXT NOT NULL, - memory_type TEXT NOT NULL, -- user_style | project_fact | pitfall | recipe + memory_type TEXT NOT NULL, -- user_style | user_profile | process | pitfall | recipe | project_fact repo TEXT NOT NULL, -- repo path OR 'global' - embedding BLOB, -- 384-dim float32 + embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) confidence REAL NOT NULL, importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low state TEXT NOT NULL DEFAULT 'active', -- active | deleted @@ -435,6 +450,28 @@ CREATE TABLE memories ( ); ``` +### Team Memory (group.db - Paid) + +```sql +CREATE TABLE team_memories ( + id TEXT PRIMARY KEY, + content_hash TEXT NOT NULL UNIQUE, + content TEXT NOT NULL, + memory_type TEXT NOT NULL, -- team_style | team_profile | team_process | shared_pitfall | shared_recipe | shared_fact + repo TEXT NOT NULL, -- repo path OR 'global' + embedding BLOB, -- 1536-dim float32 + confidence REAL NOT NULL, + importance TEXT NOT NULL DEFAULT 'medium', + state TEXT NOT NULL DEFAULT 'active', + team_id TEXT NOT NULL, + contributed_by TEXT NOT NULL, + source_memory_id TEXT, -- original individual memory (if promoted) + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + deleted_at TEXT +); +``` + ### User Profile (separate from memories) ```sql @@ -458,9 +495,11 @@ CREATE TABLE user_profile ( | Learning | Daemon watches CLI logs, parses to Events | | Batching | Groups events into Episodes (4hr OR 50 events) | | **Success Detection** | Agent segments Tasks, classifies SUCCESS/FAILURE/UNCERTAIN | -| Extraction | SUCCESSβ†’recipe/project_fact, FAILUREβ†’pitfall, UNCERTAINβ†’skip | +| Extraction | SUCCESSβ†’recipe/project_fact, FAILUREβ†’pitfall, ALLβ†’process, UNCERTAINβ†’skip | | Dedup | Near-duplicate check (0.9 similarity) before ADD | -| Retrieval | MCP β†’ Vector search (top 20) β†’ LLM reranks + composes context prompt | +| Retrieval | MCP β†’ Vector search (top 20 from both DBs) β†’ LLM reranks + composes context prompt | +| Share (opt-in) | `sqrl share` promotes individual memory to team DB | +| Team Sync | group.db syncs with cloud (paid) or self-hosted | | Idle | 2hr no activity β†’ daemon stops, next command restarts | ### Why Success Detection Matters @@ -485,12 +524,81 @@ This is the core insight: passive learning REQUIRES outcome classification. | Success detection | LLM classifies outcomes (strong_model) | Core insight for passive learning | | Task segmentation | LLM decides, no rules engine | Simple, semantic understanding | | Memory extraction | Outcome-based | Learn from both success and failure | +| **Process memory** | Always recorded | Audit trail, exportable for sharing | | **Context compose** | LLM reranks + generates prompt (fast_model) | Better than math scoring, one call | | **Natural language CLI** | Thin shell passes to agent | "By the way" - agent handles all | | **Retroactive ingestion** | Token-limited, not time-limited | Fair for all project sizes | | User profile | Separate table from user_style | Structured vs unstructured | -| Database layers | Global + Project SQLite | user_style/profile global, rest per-project | +| **3-layer DB** | Individual (squirrel.db) + Team (group.db) | Free vs Paid, local vs cloud | +| **Team sharing** | Manual opt-in via `sqrl share` | User controls what's shared | +| **Team DB location** | Cloud (default) / Self-hosted / Local | Flexibility for enterprise | | Near-duplicate threshold | 0.9 similarity | Avoid redundant memories | | Trivial query fast-path | Return empty <20ms | No wasted LLM calls | | **Cross-platform** | Mac, Linux, Windows from v1 | All platforms supported | | 100% passive | No user prompts during coding | Invisible during use | + +--- + +## Phase 6: Team Sharing (Paid) + +### Step 6.1: Alice Shares a Pitfall + +Alice finds a critical pitfall that should help the team: + +```bash +sqrl share mem_abc123 --as shared_pitfall +``` + +What happens: +1. Agent reads memory from `squirrel.db` +2. Copies to `group.db` with type `shared_pitfall` +3. Sets `contributed_by: alice`, `source_memory_id: mem_abc123` +4. Syncs `group.db` to cloud + +### Step 6.2: Bob Gets Team Context + +Bob joins Alice's team and works on the same project: + +```bash +sqrl team join abc-team-id +``` + +When Bob asks Claude Code to help with auth: +1. MCP calls `squirrel_get_task_context` +2. Vector search queries BOTH: + - Bob's `squirrel.db` (his individual memories) + - Team's `group.db` (shared team memories including Alice's pitfall) +3. LLM composes context with both individual and team memories +4. Bob gets Alice's auth pitfall in context without ever experiencing it himself + +### Step 6.3: Export for Onboarding + +Team lead exports all shared recipes for new developers: + +```bash +sqrl export shared_recipe --project --format json > onboarding.json +``` + +New developer imports: +```bash +sqrl import onboarding.json +``` + +--- + +## Memory Type Reference + +| Type | Scope | DB | Sync | Description | +|------|-------|-----|------|-------------| +| `user_style` | Global | squirrel.db | Local | Your coding preferences | +| `user_profile` | Global | squirrel.db | Local | Your info (name, role) | +| `process` | Project | squirrel.db | Local | What happened (exportable) | +| `pitfall` | Project | squirrel.db | Local | Issues you encountered | +| `recipe` | Project | squirrel.db | Local | Patterns that worked | +| `project_fact` | Project | squirrel.db | Local | Project knowledge | +| `team_style` | Global | group.db | Cloud | Team coding standards | +| `team_profile` | Global | group.db | Cloud | Team info | +| `team_process` | Project | group.db | Cloud | Shared what-happened | +| `shared_pitfall` | Project | group.db | Cloud | Team-wide issues | +| `shared_recipe` | Project | group.db | Cloud | Team-approved patterns | +| `shared_fact` | Project | group.db | Cloud | Team project knowledge | diff --git a/README.md b/README.md index cb4d513..194b1ba 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Squirrel -Local-first memory system for AI coding tools. Learns from your successes AND failures, providing personalized, task-aware context via MCP. +Local-first memory system for AI coding tools. Learns from your successes AND failures, providing personalized, task-aware context via MCP. Supports individual and team memory layers. ## What It Does @@ -13,6 +13,7 @@ You code with Claude Code / Codex / Cursor / Gemini CLI ↓ SUCCESS β†’ recipe/project_fact memories FAILURE β†’ pitfall memories (what NOT to do) + ALL β†’ process memories (what happened, exportable) ↓ AI tools call MCP β†’ get personalized context ↓ @@ -50,7 +51,7 @@ You code with Claude Code / Codex / Cursor / Gemini CLI β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”‚ Embeddings β”‚ β”‚ Retrieval β”‚ β”‚ -β”‚ β”‚ (ONNX model) β”‚ β”‚ (similarity) β”‚ β”‚ +β”‚ β”‚ (API-based) β”‚ β”‚ (similarity) β”‚ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` @@ -58,7 +59,7 @@ You code with Claude Code / Codex / Cursor / Gemini CLI | Component | Language | Role | |-----------|----------|------| | **Rust Daemon** | Rust | Log watcher, SQLite + sqlite-vec, MCP server, thin CLI | -| **Python Agent** | Python | Unified agent with tools, ONNX embeddings, retrieval | +| **Python Agent** | Python | Unified agent with tools, API embeddings, retrieval | ## Quick Start @@ -187,6 +188,24 @@ sqrl forget sqrl config set llm.model claude-sonnet ``` +### Team Commands + +```bash +# Share individual memory to team (manual, opt-in) +sqrl share # Promote to team DB +sqrl share --as pitfall # Share with type conversion + +# Export/Import memories +sqrl export pitfall # Export all pitfalls as JSON +sqrl export recipe --project # Export project recipes +sqrl import memories.json # Import memories + +# Team management (paid) +sqrl team join # Join a team +sqrl team create "Backend Team" # Create team +sqrl team sync # Force sync with cloud +``` + ## MCP Tools | Tool | Purpose | @@ -196,32 +215,59 @@ sqrl config set llm.model claude-sonnet ## Memory Types -| Type | Description | Example | -|------|-------------|---------| -| `user_style` | Coding preferences | "Prefers async/await" | -| `project_fact` | Project knowledge | "Uses PostgreSQL 15" | -| `pitfall` | Known issues | "API returns 500 on null user_id" | -| `recipe` | Successful patterns | "Use repository pattern for DB" | +### Individual Memories (Free) + +| Type | Scope | Description | Example | +|------|-------|-------------|---------| +| `user_style` | Global | Your coding preferences | "Prefers async/await" | +| `user_profile` | Global | Your info (name, role, skills) | "Backend dev, 5yr Python" | +| `process` | Project | What happened (exportable) | "Tried X, failed, then Y worked" | +| `pitfall` | Project | Issues you encountered | "API returns 500 on null user_id" | +| `recipe` | Project | Patterns that worked for you | "Use repository pattern for DB" | +| `project_fact` | Project | Project knowledge you learned | "Uses PostgreSQL 15" | + +### Team Memories (Paid - Cloud Sync) + +| Type | Scope | Description | Example | +|------|-------|-------------|---------| +| `team_style` | Global | Team coding standards | "Team uses ESLint + Prettier" | +| `team_profile` | Global | Team info | "Backend team, 5 devs" | +| `team_process` | Project | Shared what-happened logs | "Sprint 12: migrated to Redis" | +| `shared_pitfall` | Project | Team-wide known issues | "Never use ORM for bulk inserts" | +| `shared_recipe` | Project | Team-approved patterns | "Use factory pattern for tests" | +| `shared_fact` | Project | Team project knowledge | "Prod DB is on AWS RDS" | ## Storage Layout ``` ~/.sqrl/ β”œβ”€β”€ config.toml # User settings, API keys -β”œβ”€β”€ squirrel.db # Global SQLite (user_style, user_profile) +β”œβ”€β”€ squirrel.db # Global individual (user_style, user_profile) +β”œβ”€β”€ group.db # Global team (team_style, team_profile) - synced └── logs/ # Daemon logs /.sqrl/ -β”œβ”€β”€ squirrel.db # Project SQLite (project memories) +β”œβ”€β”€ squirrel.db # Project individual (process, pitfall, recipe, project_fact) +β”œβ”€β”€ group.db # Project team (shared_*, team_process) - synced └── config.toml # Project overrides (optional) ``` -### Database Layers +### 3-Layer Database Architecture -| Layer | Location | Contents | -|-------|----------|----------| -| **User** | `~/.sqrl/squirrel.db` | user_style, user_profile | -| **Project** | `/.sqrl/squirrel.db` | project_fact, pitfall, recipe | +| Layer | DB File | Contents | Sync | +|-------|---------|----------|------| +| **Global Individual** | `~/.sqrl/squirrel.db` | user_style, user_profile | Local only | +| **Global Team** | `~/.sqrl/group.db` | team_style, team_profile | Cloud (paid) | +| **Project Individual** | `/.sqrl/squirrel.db` | process, pitfall, recipe, project_fact | Local only | +| **Project Team** | `/.sqrl/group.db` | shared_pitfall, shared_recipe, shared_fact, team_process | Cloud (paid) | + +### Team Database Options + +| Mode | Location | Use Case | +|------|----------|----------| +| **Cloud** (default) | Squirrel Cloud | Teams, auto-sync, paid tier | +| **Self-hosted** | Your server | Enterprise, data sovereignty | +| **Local file** | `group.db` file | Offline, manual export/import | ## Configuration @@ -236,8 +282,18 @@ base_url = "" # Optional, for local models (Ollama, LMStudio strong_model = "gemini-2.5-pro" # Complex reasoning (episode ingestion) fast_model = "gemini-3-flash" # Fast tasks (context compose, CLI, dedup) +[embedding] +provider = "openai" # openai | gemini | cohere | ... +model = "text-embedding-3-small" # 1536-dim, $0.10/M tokens + [daemon] idle_timeout_hours = 2 # Stop after N hours inactive + +[team] # Paid tier +enabled = false # Enable team features +team_id = "" # Your team ID +sync_mode = "cloud" # cloud | self-hosted | local +sync_url = "" # Custom sync URL (self-hosted only) ``` ### LLM Usage @@ -267,7 +323,7 @@ Squirrel/ β”‚ β”œβ”€β”€ server.py # Unix socket IPC server β”‚ β”œβ”€β”€ agent.py # Unified agent with tools β”‚ β”œβ”€β”€ tools/ # Tool implementations -β”‚ β”œβ”€β”€ embeddings.py # ONNX embeddings +β”‚ β”œβ”€β”€ embeddings.py # API embeddings (OpenAI, etc.) β”‚ └── retrieval.py # Similarity search β”‚ └── docs/ @@ -304,7 +360,7 @@ source .venv/bin/activate && pytest ## v1 Scope -**In:** +**Individual Features (Free):** - Passive log watching (4 CLIs) - Success detection (SUCCESS/FAILURE/UNCERTAIN classification) - Unified Python agent with tools @@ -312,14 +368,22 @@ source .venv/bin/activate && pytest - MCP integration (2 tools) - Lazy daemon (start on demand, stop after 2hr idle) - Retroactive log ingestion on init (token-limited) -- 4 memory types + user_profile +- 6 memory types (user_style, user_profile, process, pitfall, recipe, project_fact) - Near-duplicate deduplication (0.9 threshold) - Cross-platform (Mac, Linux, Windows) -- `sqrl update` command - -**v1.1:** Auto-update, memory consolidation, retrieval debugging tools - -**v2:** Hooks output, file injection (AGENTS.md/GEMINI.md), cloud sync, team sharing +- Export/import memories (JSON) +- Auto-update (`sqrl update`) +- Memory consolidation +- Retrieval debugging tools + +**Team Features (Paid):** +- Cloud sync for group.db +- Team memory types (team_style, team_profile, shared_*, team_process) +- `sqrl share` command (promote individual to team) +- Team management (create, join, sync) +- Self-hosted option for enterprise + +**v2:** Hooks output, file injection (AGENTS.md/GEMINI.md), team analytics, memory marketplace ## Contributing From c6c9d4badedf5843178a1b5aa136c18d9b386397 Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Tue, 9 Dec 2025 02:55:52 +0800 Subject: [PATCH 04/15] docs: add complete technology stack decisions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Technology Stack table to README.md and DEVELOPMENT_PLAN.md - IPC: JSON-RPC 2.0 (MCP-compatible) - MCP SDK: rmcp (official Rust SDK) - Build/Release: dist (cargo-dist) with Homebrew, MSI, installers - Auto-update: axoupdater (dist's official updater) - Python packaging: PyInstaller (bundled, zero user deps) - Cloud sync: SQLite Session Extension + server_seq cursor - Update installation section with MSI for Windows πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- DEVELOPMENT_PLAN.md | 68 ++++++++++++++++++++++++++++++++++++--------- README.md | 30 +++++++++++++++++--- 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md index bb50579..a2cbeb1 100644 --- a/DEVELOPMENT_PLAN.md +++ b/DEVELOPMENT_PLAN.md @@ -2,6 +2,23 @@ Modular development plan with Rust daemon + Python Agent communicating via Unix socket IPC. +## Technology Stack + +| Category | Technology | Notes | +|----------|------------|-------| +| **Storage** | SQLite + sqlite-vec | Local-first, vector search | +| **IPC Protocol** | JSON-RPC 2.0 | MCP-compatible, over Unix socket | +| **MCP SDK** | rmcp | Official Rust SDK (modelcontextprotocol/rust-sdk) | +| **CLI Framework** | clap | Rust CLI parsing | +| **Agent Framework** | PydanticAI | Python agent with tools | +| **LLM Client** | LiteLLM | Multi-provider support | +| **Embeddings** | OpenAI text-embedding-3-small | 1536-dim, API-based | +| **Cloud Sync** | SQLite Session Extension | Changeset-based sync for team DB | +| **Build/Release** | dist (cargo-dist) | Generates Homebrew, MSI, installers | +| **Auto-update** | axoupdater | dist's official updater | +| **Python Packaging** | PyInstaller | Bundled, zero user deps | +| **Logging** | tracing (Rust), structlog (Python) | Structured logging | + ## Architecture Overview ``` @@ -108,9 +125,11 @@ Storage) Daemon) Agent) Tools) CLI) Dependencies: - `tokio` (async runtime) - `rusqlite` + `sqlite-vec` (storage) -- `serde`, `serde_json` (serialization) +- `serde`, `serde_json` (serialization, JSON-RPC 2.0) - `notify` (file watching) -- `clap` (CLI) +- `clap` (CLI framework) +- `rmcp` (official MCP SDK - modelcontextprotocol/rust-sdk) +- `tracing` (structured logging) - `uuid`, `chrono` (ID, timestamps) Directory structure: @@ -134,6 +153,7 @@ Dependencies: - `litellm` (multi-provider LLM support) - `openai` (embeddings API client) - `pydantic` (schemas) +- `structlog` (structured logging) Directory structure: ``` @@ -313,12 +333,18 @@ On flush: create Episode, send to Python via IPC, mark events processed ### B4. IPC Client (`ipc.rs`) -Unix socket client with JSON-RPC style protocol: +Unix socket client with JSON-RPC 2.0 protocol (MCP-compatible): ```json -{"method": "agent_execute", "params": {...}, "id": 123} -{"result": {...}, "id": 123} +{"jsonrpc": "2.0", "method": "ingest_episode", "params": {"episode": {...}}, "id": 1} +{"jsonrpc": "2.0", "result": {"memories_created": 3}, "id": 1} ``` +Methods: +- `ingest_episode` - Process episode, extract memories +- `get_task_context` - MCP tool call +- `search_memories` - MCP tool call +- `execute_command` - CLI natural language/direct command + --- ## Track C – Python: Unified Agent @@ -511,6 +537,8 @@ Access logging to memory_access_log table. ### E1. MCP Server (`mcp.rs`) +Uses `rmcp` (official MCP SDK from modelcontextprotocol/rust-sdk). + 2 tools: ``` squirrel_get_task_context @@ -549,7 +577,7 @@ fn main() { Supports both natural language and direct commands: - `sqrl "setup this project"` β†’ agent interprets - `sqrl init --skip-history` β†’ agent interprets -- `sqrl update` β†’ self-update binary +- `sqrl update` β†’ auto-update via axoupdater Team commands (paid tier): - `sqrl share ` β†’ promote individual memory to team @@ -571,14 +599,28 @@ Team commands (paid tier): - Unit tests: storage, events, agent tools - Integration tests: full flow from log to memory to retrieval -### Cross-Platform -- Mac: brew + install script -- Linux: install script + AUR + nixpkg -- Windows: install script + winget + scoop +### Build & Release (dist) + +Uses `dist` (cargo-dist) as single release orchestrator: +- Builds Rust daemon for Mac/Linux/Windows +- Builds Python agent via PyInstaller (as dist workspace member) +- Generates installers: Homebrew, MSI, shell/powershell scripts + +### Cross-Platform Installation + +| Platform | Primary | Fallback | +|----------|---------|----------| +| Mac | `brew install sqrl` | install script | +| Linux | `brew install sqrl` | install script, AUR, nixpkg | +| Windows | MSI installer | winget, install script | + +Windows note: MSI recommended over raw .exe to reduce SmartScreen/AV friction. + +### Auto-Update (axoupdater) -### Update Mechanism -- `sqrl update`: download latest binary, replace self -- Auto-update in v1.1 +- `sqrl update` uses axoupdater (dist's official updater) +- Updates both Rust daemon and Python agent together +- Reads dist install receipt to determine installed version/source --- diff --git a/README.md b/README.md index 194b1ba..cf68e5a 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,19 @@ You code with Claude Code / Codex / Cursor / Gemini CLI | **Rust Daemon** | Rust | Log watcher, SQLite + sqlite-vec, MCP server, thin CLI | | **Python Agent** | Python | Unified agent with tools, API embeddings, retrieval | +### Technology Stack + +| Layer | Technology | Notes | +|-------|------------|-------| +| **Storage** | SQLite + sqlite-vec | Local-first, vector search | +| **IPC** | JSON-RPC 2.0 over Unix socket | MCP-compatible protocol | +| **MCP SDK** | rmcp (official Rust SDK) | modelcontextprotocol/rust-sdk | +| **Agent Framework** | PydanticAI + LiteLLM | Multi-provider LLM support | +| **Embeddings** | OpenAI text-embedding-3-small | 1536-dim, API-based | +| **Cloud Sync** | SQLite Session Extension | Changeset-based sync for team DB | +| **Build/Release** | dist (cargo-dist) | Generates Homebrew, MSI, installers | +| **Auto-update** | axoupdater | dist's official updater | + ## Quick Start ```bash @@ -82,13 +95,22 @@ sqrl status | Platform | Method | |----------|--------| -| **Mac** | `brew install sqrl` or install script | -| **Linux** | Install script, AUR (Arch), nixpkg (NixOS) | -| **Windows** | `winget install sqrl` or `scoop install sqrl` | +| **Mac** | `brew install sqrl` (recommended) or install script | +| **Linux** | `brew install sqrl`, install script, AUR (Arch), nixpkg (NixOS) | +| **Windows** | MSI installer (recommended), `winget install sqrl`, or install script | -Universal install script works on all platforms: +Universal install script (fallback): ```bash +# Mac/Linux curl -sSL https://sqrl.dev/install.sh | sh + +# Windows (PowerShell) +irm https://sqrl.dev/install.ps1 | iex +``` + +Auto-update: +```bash +sqrl update # Updates both Rust daemon and Python agent ``` ## How It Works From f876a305db7b9479a403bfafc03223f0d4dbe49e Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Tue, 9 Dec 2025 12:09:19 +0800 Subject: [PATCH 05/15] docs: add episode segmentation and memory lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Episode segmentation: EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION - SUCCESS/FAILURE only for EXECUTION_TASK with evidence requirement - Memory lifecycle: status (active/inactive/invalidated) + validity tracking - Fact contradiction detection via semantic_key + LLM - Soft delete mechanism (sqrl forget) - Updated v1/v2 scope with limitations πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- DEVELOPMENT_PLAN.md | 453 +++++++++++++++++++++++++++++++++++--------- EXAMPLE.md | 401 +++++++++++++++++++++------------------ README.md | 243 +++++++++++++++--------- 3 files changed, 740 insertions(+), 357 deletions(-) diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md index a2cbeb1..28b997c 100644 --- a/DEVELOPMENT_PLAN.md +++ b/DEVELOPMENT_PLAN.md @@ -13,7 +13,6 @@ Modular development plan with Rust daemon + Python Agent communicating via Unix | **Agent Framework** | PydanticAI | Python agent with tools | | **LLM Client** | LiteLLM | Multi-provider support | | **Embeddings** | OpenAI text-embedding-3-small | 1536-dim, API-based | -| **Cloud Sync** | SQLite Session Extension | Changeset-based sync for team DB | | **Build/Release** | dist (cargo-dist) | Generates Homebrew, MSI, installers | | **Auto-update** | axoupdater | dist's official updater | | **Python Packaging** | PyInstaller | Bundled, zero user deps | @@ -71,23 +70,91 @@ Modular development plan with Rust daemon + Python Agent communicating via Unix β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` -## Core Insight: Success Detection +## Core Insight: Episode Segmentation -Passive learning from logs requires knowing WHAT succeeded and WHAT failed. Unlike explicit APIs where users call `memory.add()`, we infer success from conversation patterns. +Not all sessions are coding tasks with success/failure outcomes. Sessions include architecture discussions, research, brainstorming - these produce valuable memories but don't fit the success/failure model. -**Success signals (implicit):** +**Episode β†’ Segments β†’ Memories (single LLM call):** + +1. **Segment the episode** by kind (not by task success/failure) +2. **Extract memories** based on segment kind +3. **Only EXECUTION_TASK segments** get SUCCESS/FAILURE classification + +### Segment Kinds + +| Kind | Description | Outcome Field | Memory Output | +|------|-------------|---------------|---------------| +| `EXECUTION_TASK` | Coding, fixing bugs, running commands | `outcome`: SUCCESS / FAILURE / UNCERTAIN | lesson (with outcome), fact | +| `PLANNING_DECISION` | Architecture, design, tech choices | `resolution`: DECIDED / OPEN | fact, lesson (rationale), profile | +| `RESEARCH_LEARNING` | Learning, exploring docs, asking questions | `resolution`: ANSWERED / PARTIAL | fact, lesson | +| `DISCUSSION` | Brainstorming, market research, chat | (none) | profile, lesson (insights) | + +**Key rule:** SUCCESS/FAILURE only allowed on EXECUTION_TASK. Other kinds never output FAILURE. + +### Success/Failure Detection (EXECUTION_TASK only) + +**Success signals (require evidence):** - AI says "done" / "complete" + User moves to next task β†’ SUCCESS - Tests pass, build succeeds β†’ SUCCESS - User says "thanks", "perfect", "works" β†’ SUCCESS -**Failure signals:** +**Failure signals (require evidence):** - Same error reappears after attempted fix β†’ FAILURE - User says "still broken", "that didn't work" β†’ FAILURE -- User abandons task mid-conversation β†’ UNCERTAIN + +**No evidence β†’ UNCERTAIN** (conservative default) + +### Episode Ingestion Output Schema + +```json +{ + "episode_summary": "...", + "segments": [ + { + "id": "seg_1", + "kind": "EXECUTION_TASK", + "title": "Fix auth 500 on null user_id", + "event_range": [12, 33], + "outcome": { + "status": "SUCCESS", + "evidence": ["event#31 tests passed", "event#33 user confirmed"] + } + }, + { + "id": "seg_2", + "kind": "PLANNING_DECISION", + "title": "Choose sync conflict strategy", + "event_range": [34, 44], + "resolution": "DECIDED", + "decision": { + "choice": "server-wins", + "rationale": ["shared team DB", "simplicity"] + } + } + ], + "memories": [ + { + "memory_type": "FACT", + "scope": "PROJECT", + "content": "Project uses PostgreSQL 15 via Prisma.", + "source_segments": ["seg_1"], + "confidence": 0.86 + }, + { + "memory_type": "LESSON", + "scope": "PROJECT", + "outcome": "failure", + "content": "Validate user_id before DB insert to avoid 500s.", + "source_segments": ["seg_1"], + "confidence": 0.9 + } + ] +} +``` **The LLM-decides-everything approach:** - One LLM call per Episode (4-hour window) -- LLM segments tasks, classifies outcomes, extracts memories +- LLM segments by kind first, then extracts memories per segment - No rules engine, no heuristics for task detection - 100% passive - no user prompts or confirmations @@ -182,41 +249,27 @@ memory_service/ SQLite + sqlite-vec initialization: ```sql --- memories table (individual DB: squirrel.db) +-- memories table (squirrel.db) CREATE TABLE memories ( id TEXT PRIMARY KEY, content_hash TEXT NOT NULL UNIQUE, content TEXT NOT NULL, - memory_type TEXT NOT NULL, -- user_style | user_profile | process | pitfall | recipe | project_fact + memory_type TEXT NOT NULL, -- lesson | fact | profile + outcome TEXT, -- success | failure (for lesson type) + fact_type TEXT, -- knowledge | process (for fact type) + scope TEXT NOT NULL, -- global | project repo TEXT NOT NULL, -- repo path OR 'global' embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) confidence REAL NOT NULL, importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low - state TEXT NOT NULL DEFAULT 'active', -- active | deleted + status TEXT NOT NULL DEFAULT 'active', -- active | inactive | invalidated + valid_from TEXT NOT NULL, -- when this became true + valid_to TEXT, -- when it stopped being true (null = still valid) + superseded_by TEXT, -- memory_id that replaced this (for invalidated) + semantic_key TEXT, -- for fact contradiction detection (e.g., db.engine) user_id TEXT NOT NULL DEFAULT 'local', - assistant_id TEXT NOT NULL DEFAULT 'squirrel', created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - deleted_at TEXT -); - --- team memories table (group DB: group.db) - paid tier -CREATE TABLE team_memories ( - id TEXT PRIMARY KEY, - content_hash TEXT NOT NULL UNIQUE, - content TEXT NOT NULL, - memory_type TEXT NOT NULL, -- team_style | team_profile | team_process | shared_pitfall | shared_recipe | shared_fact - repo TEXT NOT NULL, -- repo path OR 'global' - embedding BLOB, -- 1536-dim float32 - confidence REAL NOT NULL, - importance TEXT NOT NULL DEFAULT 'medium', - state TEXT NOT NULL DEFAULT 'active', - team_id TEXT NOT NULL, -- team identifier - contributed_by TEXT NOT NULL, -- user who shared this - source_memory_id TEXT, -- original individual memory ID (if promoted) - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - deleted_at TEXT + updated_at TEXT NOT NULL ); -- events table @@ -263,6 +316,52 @@ CREATE TABLE memory_access_log ( ); ``` +### A1.1 Memory Lifecycle (Forget Mechanism) + +**Status values:** +- `active` - Normal, appears in retrieval +- `inactive` - Soft deleted by user (`sqrl forget`), recoverable, hidden from retrieval +- `invalidated` - Superseded by newer fact, keeps history, hidden from retrieval + +**Validity fields (for fact/profile):** +- `valid_from` - When this became true (default: created_at) +- `valid_to` - When it stopped being true (null = still valid) +- `superseded_by` - ID of memory that replaced this + +**Retrieval filter:** +```sql +WHERE status = 'active' + AND (valid_to IS NULL OR valid_to > datetime('now')) +``` + +**Contradiction detection (during ingestion):** + +For facts with `semantic_key`: +``` +semantic_key examples: db.engine, db.version, api.framework, + auth.method, package_manager, orm +``` +- Same key + different value β†’ invalidate old (status='invalidated', valid_to=now, superseded_by=new_id) +- LLM extracts semantic_key when possible during ingestion + +For free-text facts without clear key: +- LLM judges semantic conflict between new fact and similar existing facts +- High confidence conflict β†’ invalidate old +- Low confidence β†’ keep both, let retrieval handle via recency weighting + +**No cascade delete:** +- Invalidating a fact does NOT delete related lessons +- Related lessons get flagged in retrieval output: `dependency_changed: true` + +**CLI behavior:** +- `sqrl forget ` β†’ status='inactive' (soft delete, recoverable) +- `sqrl forget "deprecated API"` β†’ search + confirm + soft delete matching memories + +**v1 limitations (documented for users):** +- No TTL/auto-expiration (manual forget only) +- No hard delete/purge (data remains in SQLite file) +- Free-text contradiction detection depends on LLM, may have false positives + ### A2. Event model (`events.rs`) Event struct (normalized, CLI-agnostic): @@ -280,19 +379,19 @@ Paths: - `/.sqrl/` (project) Files: -- `squirrel.db` - individual memories (free) -- `group.db` - team memories (paid, synced) +- `squirrel.db` - memories - `config.toml` - settings Config fields: +- agents.claude_code, agents.codex_cli, agents.gemini_cli, agents.cursor (CLI selection) - llm.provider, llm.api_key, llm.base_url - llm.strong_model, llm.fast_model (2-tier design) - embedding.provider, embedding.model (default: openai/text-embedding-3-small) - daemon.idle_timeout_hours (default: 2) - daemon.socket_path -- team.enabled, team.team_id (paid tier) -- team.sync_mode (cloud | self-hosted | local) -- team.sync_url (for self-hosted) + +Projects registry (`~/.sqrl/projects.json`): +- List of initialized project paths for `sqrl sync` --- @@ -418,31 +517,52 @@ class SquirrelAgent: ### C3. Episode Ingestion (via ingest_episode tool) -LLM analyzes entire Episode in ONE call: -1. Segment into Tasks (user goals) -2. Classify each: SUCCESS | FAILURE | UNCERTAIN -3. Extract memories based on outcome: - - SUCCESS β†’ recipe or project_fact - - FAILURE β†’ pitfall - - UNCERTAIN β†’ skip +LLM analyzes entire Episode in ONE call (segment-first approach): + +1. **Segment by kind** (not by success/failure): + - EXECUTION_TASK - coding, fixing, running commands + - PLANNING_DECISION - architecture, design choices + - RESEARCH_LEARNING - learning, exploring docs + - DISCUSSION - brainstorming, chat + +2. **For EXECUTION_TASK segments only**, classify outcome: + - SUCCESS (with evidence: tests passed, user confirmed, etc.) + - FAILURE (with evidence: error persists, user says "didn't work") + - UNCERTAIN (no clear evidence - conservative default) + +3. **Extract memories based on segment kind:** + - EXECUTION_TASK β†’ lesson (with outcome), fact (knowledge discovered) + - PLANNING_DECISION β†’ fact (decisions), lesson (rationale for rejected options), profile + - RESEARCH_LEARNING β†’ fact (knowledge), lesson (key learnings) + - DISCUSSION β†’ profile (user preferences), lesson (insights) + +4. **Contradiction check for facts:** + - Extract semantic_key if possible (db.engine, api.framework, etc.) + - Check existing facts with same key β†’ invalidate old if conflict + - Free-text facts β†’ LLM judges semantic conflict -Near-duplicate check before ADD (0.9 similarity threshold). +5. **Near-duplicate check** before ADD (0.9 similarity threshold) UUIDβ†’integer mapping when showing existing memories to LLM (prevents hallucination). ### C4. Schemas (`schemas/`) -Memory schema (individual): +Memory schema: - id, content_hash, content, memory_type, repo, embedding -- confidence, importance, state, user_id, assistant_id -- created_at, updated_at, deleted_at -- memory_type: user_style | user_profile | process | pitfall | recipe | project_fact - -TeamMemory schema (group, paid): -- id, content_hash, content, memory_type, repo, embedding -- confidence, importance, state, team_id, contributed_by, source_memory_id -- created_at, updated_at, deleted_at -- memory_type: team_style | team_profile | team_process | shared_pitfall | shared_recipe | shared_fact +- outcome (for lesson), fact_type (for fact), scope +- confidence, importance, user_id +- created_at, updated_at +- memory_type: lesson | fact | profile +- outcome (lesson only): success | failure +- fact_type (fact only): knowledge | process +- scope: global | project + +Lifecycle fields: +- status: active | inactive | invalidated +- valid_from: timestamp (when this became true) +- valid_to: timestamp | null (when it stopped being true) +- superseded_by: memory_id | null (for invalidated facts) +- semantic_key: string | null (for fact contradiction detection) UserProfile schema: - key, value, source (explicit|inferred), confidence, updated_at @@ -455,23 +575,20 @@ UserProfile schema: **ingest_episode(events):** LLM analysis, task segmentation, outcome classification, memory extraction -**search_memories(query, filters):** Embed query, sqlite-vec search, return ranked results (searches both individual and team DBs) +**search_memories(query, filters):** Embed query, sqlite-vec search, return ranked results **get_task_context(task, budget):** -1. Vector search retrieves top 20 candidates (from both individual and team DBs) +1. Vector search retrieves top 20 candidates 2. LLM (fast_model) reranks + composes context prompt: - Selects relevant memories - - Resolves conflicts (team memories may override individual) + - Resolves conflicts between memories - Merges related memories - Generates structured prompt with memory IDs 3. Returns ready-to-inject context prompt within token budget -**forget_memory(id):** Soft-delete (set state='deleted') - -**share_memory(id, target_type):** Promote individual memory to team DB (manual, opt-in) -- Copy memory from squirrel.db to group.db -- Set contributed_by, source_memory_id -- Optional type conversion (e.g., pitfall β†’ shared_pitfall) +**forget_memory(id_or_query):** +- If ID: set status='inactive' (soft delete, recoverable) +- If natural language query: search β†’ confirm with user β†’ soft delete matches **export_memories(filters, format):** Export memories as JSON for sharing/backup @@ -490,21 +607,72 @@ UserProfile schema: **init_project(path, skip_history):** 1. Create `/.sqrl/squirrel.db` 2. If not skip_history: scan_project_logs β†’ ingest -3. find_cli_configs β†’ offer to set_mcp_config +3. For each enabled CLI in `config.agents`: + - Configure MCP (add Squirrel server to CLI's MCP config) + - Inject instruction text to agent file (CLAUDE.md, AGENTS.md, GEMINI.md, .cursor/rules/) +4. Register project in `~/.sqrl/projects.json` + +**sync_projects():** +1. Read enabled CLIs from `config.agents` +2. For each project in `~/.sqrl/projects.json`: + - For each enabled CLI not yet configured in that project: + - Configure MCP + - Inject instruction text **get_mcp_config(cli), set_mcp_config(cli, server, config):** Read/write MCP config files +**get_agent_instructions(cli), set_agent_instructions(cli, content):** Read/write agent instruction files + **get_user_profile(), set_user_profile(key, value):** Manage user_profile table -### D3.1 Team Tools (`tools/team.py`) - Paid Tier +### D3.1 MCP Config Locations + +| CLI | MCP Config Location | +|-----|---------------------| +| Claude Code | `~/.claude.json` or `/.mcp.json` | +| Codex CLI | `codex mcp add-server` command | +| Gemini CLI | `/.gemini/settings.json` | +| Cursor | `~/.cursor/mcp.json` or `/.cursor/mcp.json` | -**team_join(team_id):** Join a team, enable sync +MCP server definition: +```json +{ + "mcpServers": { + "squirrel": { + "command": "sqrl-daemon", + "args": ["--mcp"], + "disabled": false + } + } +} +``` + +### D3.2 Agent Instruction Files + +| CLI | Instruction File | +|-----|------------------| +| Claude Code | `/CLAUDE.md` | +| Codex CLI | `/AGENTS.md` | +| Gemini CLI | `/GEMINI.md` | +| Cursor | `/.cursor/rules/squirrel.mdc` | + +Instruction text to inject: +```markdown +## Squirrel Memory System -**team_create(name):** Create new team, get team_id +This project uses Squirrel for persistent memory across sessions. -**team_sync():** Force sync with cloud/self-hosted server +ALWAYS call `squirrel_get_task_context` BEFORE: +- Fixing bugs (to check if this bug was seen before) +- Refactoring code (to get patterns that worked/failed) +- Adding features touching existing modules +- Debugging errors that seem familiar -**team_leave():** Leave team, keep local copies of team memories +DO NOT call for: +- Simple typo fixes +- Adding comments +- Formatting changes +``` ### D4. DB Tools (`tools/db.py`) @@ -577,15 +745,11 @@ fn main() { Supports both natural language and direct commands: - `sqrl "setup this project"` β†’ agent interprets - `sqrl init --skip-history` β†’ agent interprets +- `sqrl config` β†’ interactive CLI selection +- `sqrl sync` β†’ update all projects with new CLI configs - `sqrl update` β†’ auto-update via axoupdater - -Team commands (paid tier): -- `sqrl share ` β†’ promote individual memory to team - `sqrl export ` β†’ export memories as JSON - `sqrl import ` β†’ import memories -- `sqrl team join ` β†’ join team -- `sqrl team create ` β†’ create team -- `sqrl team sync` β†’ force sync --- @@ -660,36 +824,147 @@ Windows note: MSI recommended over raw .exe to reduce SmartScreen/AV friction. ## v1 Scope -**Individual Features (Free):** - Passive log watching (4 CLIs) -- Success detection (SUCCESS/FAILURE/UNCERTAIN) +- Episode segmentation (EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION) +- Success detection for EXECUTION_TASK only (SUCCESS/FAILURE/UNCERTAIN with evidence) - Unified Python agent with tools - Natural language CLI - MCP integration (2 tools) - Lazy daemon (start on demand, stop after 2hr idle) - Retroactive log ingestion on init (token-limited) -- 6 memory types (user_style, user_profile, process, pitfall, recipe, project_fact) +- 3 memory types (lesson, fact, profile) with scope flag +- Memory lifecycle: status (active/inactive/invalidated) + validity (valid_from/valid_to) +- Fact contradiction detection (semantic_key + LLM for free-text) +- Soft delete (`sqrl forget`) - no hard purge - Near-duplicate deduplication (0.9 threshold) - Cross-platform (Mac, Linux, Windows) - Export/import memories (JSON) - Auto-update (`sqrl update`) - Memory consolidation - Retrieval debugging tools +- CLI selection (`sqrl config`) + MCP wiring + agent instruction injection +- `sqrl sync` for updating existing projects with new CLIs -**Team Features (Paid):** -- Cloud sync for group.db -- Team memory types (team_style, team_profile, shared_*, team_process) -- `sqrl share` command (promote individual to team) -- Team management (create, join, sync) -- Self-hosted option for enterprise +**v1 limitations:** +- No TTL/auto-expiration (manual forget only) +- No hard delete (soft delete only, data remains in SQLite) +- Free-text contradiction detection may have false positives ## v2 Scope (Future) -- Hooks output for Claude Code / Gemini CLI -- File injection for AGENTS.md / GEMINI.md +- Team/cloud sync (group.db, share command, team management) +- Deep CLI integrations (Claude Code hooks, Cursor extension) - Team analytics dashboard -- Memory marketplace (export/sell recipe packs) -- Web dashboard +- Memory marketplace +- TTL / temporary memory (auto-expiration) +- Hard purge for privacy/compliance +- Memory linking + evolution (A-MEM style) +- Richer conflict detection with schema/key registry +- `get_memory_history` API for debugging invalidation chains + +--- + +## v2 Architecture: Team/Cloud + +### Overview + +v2 adds team memory sharing via `group.db` - a separate database that syncs with cloud. Individual memories stay in `squirrel.db` (local-only), team memories go to `group.db` (synced). + +### 3-Layer Database Architecture (v2) + +| Layer | DB File | Contents | Sync | +|-------|---------|----------|------| +| **Global** | `~/.sqrl/squirrel.db` | lesson, fact, profile (scope=global) | Local only | +| **Project** | `/.sqrl/squirrel.db` | lesson, fact (scope=project) | Local only | +| **Team** | `~/.sqrl/group.db` + `/.sqrl/group.db` | Shared memories (owner=team) | Cloud | + +### Memory Schema (v2) + +Additional fields for team support: + +```sql +CREATE TABLE memories ( + -- ... all v1 fields ... + owner TEXT NOT NULL DEFAULT 'individual', -- individual | team + team_id TEXT, -- team identifier (for owner=team) + contributed_by TEXT, -- user who shared (for owner=team) + source_memory_id TEXT -- original memory ID (if promoted to team) +); +``` + +### Team Tools (v2) + +**share_memory(memory_id):** Promote individual memory to team +1. Read from `squirrel.db` +2. Copy to `group.db` with `owner: team` +3. Set `contributed_by`, `source_memory_id` +4. Sync triggers cloud upload + +**team_join(team_id), team_leave():** Team membership management + +**team_export(filters):** Export team memories for offline/backup + +### Sync Architecture + +**Local-first with background sync:** +- `group.db` is local copy, always available +- Background process syncs with cloud +- Users never wait for network +- Conflict resolution: last-write-wins with vector clocks + +**Scaling considerations (from research):** +- Individual user (6 months): ~6MB (900 memories) +- Team (10,000 users): ~6GB if full sync - NOT viable + +**Hybrid approach for large teams:** +| Team Size | Strategy | +|-----------|----------| +| Small (<100) | Full sync - all team memories in local group.db | +| Medium (100-1000) | Partial sync - recent + relevant memories locally | +| Large (1000+) | Cloud-primary - query cloud, cache locally | + +**Reference:** Figma, Notion, Linear all use server-first or partial sync. Nobody syncs everything locally at scale. + +### Team Commands (v2) + +```bash +sqrl team join # Join team, start syncing group.db +sqrl team leave # Leave team, remove group.db +sqrl share # Promote individual memory to team +sqrl share --all # Share all individual memories to team +sqrl team export # Export team memories to local +``` + +### Migration Paths + +**Local β†’ Cloud (user subscribes):** +```bash +sqrl share --all # Promotes all individual memories to team +``` + +**Cloud β†’ Local (team exports):** +```bash +sqrl team export --project # Downloads team memories to local squirrel.db +``` + +### Config (v2) + +```toml +# ~/.sqrl/config.toml +[team] +id = "abc-team-id" +sync_interval_seconds = 300 # 5 min background sync +sync_strategy = "full" # full | partial | cloud-primary +``` + +### Retrieval (v2) + +Context retrieval queries BOTH databases: +1. `squirrel.db` (individual memories) +2. `group.db` (team memories) +3. LLM reranks combined results + +Team memories get attribution: "From team member Alice" --- diff --git a/EXAMPLE.md b/EXAMPLE.md index 7528ecc..cad641b 100644 --- a/EXAMPLE.md +++ b/EXAMPLE.md @@ -6,13 +6,17 @@ Detailed example demonstrating the entire Squirrel data flow from installation t Squirrel watches AI tool logs, groups events into **Episodes** (4-hour time windows), and sends them to a unified Python Agent for analysis: -1. **Segment Tasks** - Identify distinct user goals within the episode -2. **Classify Outcomes** - SUCCESS | FAILURE | UNCERTAIN for each task -3. **Extract Memories** - SUCCESSβ†’recipe/project_fact, FAILUREβ†’pitfall, ALLβ†’process, UNCERTAINβ†’skip +1. **Segment by Kind** - Not all sessions are coding tasks. Identify segment type first: + - `EXECUTION_TASK` - coding, fixing bugs, running commands + - `PLANNING_DECISION` - architecture, design, tech choices + - `RESEARCH_LEARNING` - learning, exploring docs + - `DISCUSSION` - brainstorming, chat +2. **Classify Outcomes** - Only for EXECUTION_TASK: SUCCESS | FAILURE | UNCERTAIN (with evidence) +3. **Extract Memories** - Based on segment kind, not just success/failure Episode = batch of events from same repo within 4-hour window (internal batching, not a product concept). -**The key insight:** Passive learning requires knowing WHAT succeeded before extracting patterns. We don't ask users to confirm - we infer from conversation flow. +**The key insight:** Not every session is a "task" with success/failure. Architecture discussions, research, and chat produce valuable memories without outcomes. We segment first, then extract appropriately. --- @@ -40,7 +44,27 @@ brew install sqrl # Mac winget install sqrl # Windows ``` -### Step 1.2: First Command Starts Daemon +### Step 1.2: CLI Selection (First Run) + +```bash +sqrl config +# Interactive prompt: select which CLIs you use +# β†’ Claude Code: yes +# β†’ Codex CLI: yes +# β†’ Gemini CLI: no +# β†’ Cursor: yes +``` + +This stores CLI selection in `~/.sqrl/config.toml`: +```toml +[agents] +claude_code = true +codex_cli = true +gemini_cli = false +cursor = true +``` + +### Step 1.3: Project Initialization ```bash cd ~/projects/inventory-api @@ -50,26 +74,65 @@ sqrl init What happens: 1. First `sqrl` command auto-starts daemon (lazy start) 2. `sqrl init` triggers agent via IPC -3. Agent scans for CLI log folders containing this project -4. Agent asks: ingest historical logs? (token-limited, not time-limited) -5. Creates `.sqrl/squirrel.db` for project memories -6. Detects installed CLIs (Claude Code, Codex, Gemini CLI, Cursor) -7. Offers to configure MCP for each detected CLI +3. Creates `.sqrl/squirrel.db` for project memories +4. Agent scans for CLI log folders containing this project +5. Agent asks: ingest historical logs? (token-limited, not time-limited) +6. For each enabled CLI (from `config.agents`): + - Configures MCP (adds Squirrel server to CLI's MCP config) + - Injects instruction text to agent file (CLAUDE.md, AGENTS.md, .cursor/rules/) +7. Registers project in `~/.sqrl/projects.json` File structure after init: ``` ~/.sqrl/ -β”œβ”€β”€ config.toml # API keys, settings -β”œβ”€β”€ squirrel.db # Global individual (user_style, user_profile) -β”œβ”€β”€ group.db # Global team (team_style, team_profile) - synced, paid +β”œβ”€β”€ config.toml # API keys, settings, CLI selection +β”œβ”€β”€ squirrel.db # Global memories (lesson, fact, profile with scope=global) +β”œβ”€β”€ projects.json # List of initialized projects (for sqrl sync) └── logs/ # Daemon logs -~/projects/inventory-api/.sqrl/ -β”œβ”€β”€ squirrel.db # Project individual (process, pitfall, recipe, project_fact) -└── group.db # Project team (shared_*, team_process) - synced, paid +~/projects/inventory-api/ +β”œβ”€β”€ .sqrl/ +β”‚ └── squirrel.db # Project memories (lesson, fact with scope=project) +β”œβ”€β”€ CLAUDE.md # ← Squirrel instructions injected +└── AGENTS.md # ← Squirrel instructions injected +``` + +### Step 1.4: Agent Instruction Injection + +For each enabled CLI, Squirrel adds this block to the agent instruction file: + +```markdown +## Squirrel Memory System + +This project uses Squirrel for persistent memory across sessions. + +ALWAYS call `squirrel_get_task_context` BEFORE: +- Fixing bugs (to check if this bug was seen before) +- Refactoring code (to get patterns that worked/failed) +- Adding features touching existing modules +- Debugging errors that seem familiar + +DO NOT call for: +- Simple typo fixes +- Adding comments +- Formatting changes ``` -### Step 1.3: Natural Language CLI +This increases the probability that AI tools will call Squirrel MCP tools at the right moments. + +### Step 1.5: Syncing New CLIs + +Weeks later, Alice enables Cursor globally: + +```bash +sqrl config # select Cursor + +# Update all existing projects +sqrl sync +# β†’ Adds MCP config + instructions for Cursor to all registered projects +``` + +### Step 1.6: Natural Language CLI The agent handles all CLI commands: @@ -86,6 +149,7 @@ Or direct commands: sqrl search "database patterns" sqrl status sqrl config set llm.model claude-sonnet +sqrl sync # Update all projects with new CLI configs ``` --- @@ -174,9 +238,9 @@ fn flush_episode(repo: &str, events: Vec) { ## Phase 3: Memory Extraction (Python Agent) -### Step 3.1: Agent Analyzes Episode +### Step 3.1: Agent Analyzes Episode (Segment-First Approach) -The unified agent receives the Episode and uses its tools to analyze and store memories: +The unified agent receives the Episode and uses segment-first analysis: ```python async def ingest_episode(episode: dict) -> dict: @@ -186,57 +250,66 @@ async def ingest_episode(episode: dict) -> dict: for e in episode["events"] ]) - # LLM analyzes: tasks, outcomes, and memories in ONE call + # LLM analyzes: segments first, then memories in ONE call response = await llm.call(INGEST_PROMPT.format(context=context)) return { - "tasks": [ + "segments": [ { - "task": "Add category endpoint", - "outcome": "SUCCESS", - "evidence": "User said 'Perfect, tests pass!'", - "memories": [ - { - "type": "user_style", - "content": "Prefers async/await with type hints for all handlers", - "importance": "high", - "repo": "global", - }, - { - "type": "process", # Always recorded for export - "content": "Added GET /items/category endpoint with async handler, type hints, pytest fixture", - "importance": "medium", - "repo": "/Users/alice/projects/inventory-api", - } - ] + "id": "seg_1", + "kind": "EXECUTION_TASK", + "title": "Add category endpoint", + "event_range": [0, 4], + "outcome": { + "status": "SUCCESS", + "evidence": ["User said 'Perfect, tests pass!'"] + } }, { - "task": "Fix auth loop bug", - "outcome": "SUCCESS", # Eventually succeeded after failures - "evidence": "User said 'That fixed it, thanks!'", - "memories": [ - { - "type": "pitfall", - "content": "Auth token refresh loops are NOT caused by localStorage or cookies - check useEffect cleanup first", - "importance": "high", - "repo": "/Users/alice/projects/inventory-api", - }, - { - "type": "recipe", - "content": "For auth redirect loops, fix useEffect cleanup to prevent re-triggering on token refresh", - "importance": "high", - "repo": "/Users/alice/projects/inventory-api", - }, - { - "type": "process", # Always recorded for export - "content": "Tried localStorage fix (failed), tried cookies fix (failed), useEffect cleanup fix worked", - "importance": "medium", - "repo": "/Users/alice/projects/inventory-api", - } - ] + "id": "seg_2", + "kind": "EXECUTION_TASK", + "title": "Fix auth loop bug", + "event_range": [5, 10], + "outcome": { + "status": "SUCCESS", + "evidence": ["User said 'That fixed it, thanks!'"] + } } ], - "confidence": 0.9 + "memories": [ + { + "memory_type": "lesson", + "outcome": "success", + "scope": "global", + "content": "Prefers async/await with type hints for all handlers", + "source_segments": ["seg_1"], + "confidence": 0.9 + }, + { + "memory_type": "lesson", + "outcome": "failure", + "scope": "project", + "content": "Auth token refresh loops are NOT caused by localStorage or cookies - check useEffect cleanup first", + "source_segments": ["seg_2"], + "confidence": 0.9 + }, + { + "memory_type": "lesson", + "outcome": "success", + "scope": "project", + "content": "For auth redirect loops, fix useEffect cleanup to prevent re-triggering on token refresh", + "source_segments": ["seg_2"], + "confidence": 0.9 + }, + { + "memory_type": "fact", + "fact_type": "process", + "scope": "project", + "content": "Tried localStorage fix (failed), tried cookies fix (failed), useEffect cleanup fix worked", + "source_segments": ["seg_2"], + "confidence": 0.85 + } + ] } ``` @@ -257,17 +330,28 @@ Analyze this coding session (~4 hours of activity): [assistant] I think the issue is in useEffect cleanup... [user] That fixed it, thanks! -Analyze this session: -1. Identify distinct Tasks (user goals like "add endpoint", "fix bug") -2. For each Task, determine: - - outcome: SUCCESS | FAILURE | UNCERTAIN - - evidence: why you classified it this way (quote user if possible) -3. For SUCCESS tasks: extract recipe (reusable pattern) or project_fact memories -4. For FAILURE tasks: extract pitfall memories (what NOT to do) -5. For tasks with failed attempts before success: extract BOTH pitfall AND recipe -6. For ALL tasks: extract process memory (what happened, for export/audit) +Analyze this session using SEGMENT-FIRST approach: + +1. SEGMENT the episode by kind (each segment = 5-20 events): + - EXECUTION_TASK: coding, fixing, running commands (CAN have outcome) + - PLANNING_DECISION: architecture, design (has resolution: DECIDED/OPEN) + - RESEARCH_LEARNING: learning, exploring (has resolution: ANSWERED/PARTIAL) + - DISCUSSION: brainstorming, chat (no outcome) + +2. For EXECUTION_TASK segments ONLY, classify outcome: + - SUCCESS: with evidence (tests passed, user confirmed, etc.) + - FAILURE: with evidence (error persists, user says "didn't work") + - UNCERTAIN: no clear evidence (conservative default) + + IMPORTANT: Other segment kinds NEVER have SUCCESS/FAILURE. -Return only high-confidence memories. When in doubt, skip (except process - always record). +3. Extract memories based on segment kind: + - EXECUTION_TASK: lesson (with outcome), fact (knowledge discovered) + - PLANNING_DECISION: fact (decisions), lesson (rationale), profile + - RESEARCH_LEARNING: fact (knowledge), lesson (learnings) + - DISCUSSION: profile (preferences), lesson (insights) + +Return segments[] and memories[] with source_segment references. ``` ### Step 3.2: Near-Duplicate Check + Save Memory @@ -360,8 +444,8 @@ Candidate memories (ranked by similarity): {candidates} Select the most relevant memories for this task. Then compose a context prompt that: -1. Prioritizes pitfalls (what NOT to do) first -2. Includes relevant recipes and project_facts +1. Prioritizes lessons with outcome=failure (what NOT to do) first +2. Includes relevant lessons (outcome=success) and facts 3. Resolves conflicts between memories (newer wins) 4. Merges related memories to save tokens 5. Stays within {budget} tokens @@ -429,48 +513,37 @@ CREATE TABLE events ( ); ``` -### Individual Memory (squirrel.db - Free) +### Memory (squirrel.db) ```sql CREATE TABLE memories ( id TEXT PRIMARY KEY, content_hash TEXT NOT NULL UNIQUE, content TEXT NOT NULL, - memory_type TEXT NOT NULL, -- user_style | user_profile | process | pitfall | recipe | project_fact + memory_type TEXT NOT NULL, -- lesson | fact | profile + outcome TEXT, -- success | failure (for lesson type) + fact_type TEXT, -- knowledge | process (for fact type) + scope TEXT NOT NULL, -- global | project repo TEXT NOT NULL, -- repo path OR 'global' embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) confidence REAL NOT NULL, importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low - state TEXT NOT NULL DEFAULT 'active', -- active | deleted + -- Lifecycle fields + status TEXT NOT NULL DEFAULT 'active', -- active | inactive | invalidated + valid_from TEXT NOT NULL, -- when this became true + valid_to TEXT, -- when it stopped being true + superseded_by TEXT, -- memory_id that replaced this + semantic_key TEXT, -- for fact contradiction (e.g., db.engine) user_id TEXT NOT NULL DEFAULT 'local', - assistant_id TEXT NOT NULL DEFAULT 'squirrel', created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - deleted_at TEXT + updated_at TEXT NOT NULL ); ``` -### Team Memory (group.db - Paid) - -```sql -CREATE TABLE team_memories ( - id TEXT PRIMARY KEY, - content_hash TEXT NOT NULL UNIQUE, - content TEXT NOT NULL, - memory_type TEXT NOT NULL, -- team_style | team_profile | team_process | shared_pitfall | shared_recipe | shared_fact - repo TEXT NOT NULL, -- repo path OR 'global' - embedding BLOB, -- 1536-dim float32 - confidence REAL NOT NULL, - importance TEXT NOT NULL DEFAULT 'medium', - state TEXT NOT NULL DEFAULT 'active', - team_id TEXT NOT NULL, - contributed_by TEXT NOT NULL, - source_memory_id TEXT, -- original individual memory (if promoted) - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - deleted_at TEXT -); -``` +**Status values:** +- `active` - Normal, appears in retrieval +- `inactive` - Soft deleted via `sqrl forget`, hidden but recoverable +- `invalidated` - Superseded by newer fact, hidden but keeps history ### User Profile (separate from memories) @@ -490,26 +563,36 @@ CREATE TABLE user_profile ( | Phase | What Happens | |-------|--------------| | Install | Universal script or package manager | -| First Command | Lazy daemon start, no system service | -| Init | Agent scans logs, optional history ingestion, configures MCP | +| CLI Selection | `sqrl config` - select which CLIs you use (Claude Code, Codex, etc.) | +| Init | Creates DB, ingests history, configures MCP + injects agent instructions for enabled CLIs | +| Sync | `sqrl sync` - updates all projects when new CLIs enabled | | Learning | Daemon watches CLI logs, parses to Events | | Batching | Groups events into Episodes (4hr OR 50 events) | -| **Success Detection** | Agent segments Tasks, classifies SUCCESS/FAILURE/UNCERTAIN | -| Extraction | SUCCESSβ†’recipe/project_fact, FAILUREβ†’pitfall, ALLβ†’process, UNCERTAINβ†’skip | +| **Segmentation** | Agent segments by kind: EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION | +| **Outcome** | For EXECUTION_TASK only: SUCCESS/FAILURE/UNCERTAIN (with evidence) | +| Extraction | Based on segment kind: lesson, fact, profile | +| **Contradiction** | New fact conflicts with old β†’ old marked `invalidated` | | Dedup | Near-duplicate check (0.9 similarity) before ADD | -| Retrieval | MCP β†’ Vector search (top 20 from both DBs) β†’ LLM reranks + composes context prompt | -| Share (opt-in) | `sqrl share` promotes individual memory to team DB | -| Team Sync | group.db syncs with cloud (paid) or self-hosted | +| Retrieval | MCP β†’ Vector search (top 20) β†’ LLM reranks + composes context prompt | +| Forget | `sqrl forget` β†’ soft delete (status=inactive), recoverable | | Idle | 2hr no activity β†’ daemon stops, next command restarts | -### Why Success Detection Matters +### Why Segment-First Matters + +Not all sessions are coding tasks with success/failure: +- Architecture discussions β†’ produce decisions (fact), not outcomes +- Research sessions β†’ produce knowledge (fact), not outcomes +- Brainstorming β†’ produces insights (lesson) and preferences (profile) -Without success detection, we'd blindly store patterns without knowing if they worked: -- User tries 5 approaches, only #5 works -- Old approach: Store all 5 as "patterns" (4 are wrong!) -- With success detection: Store #1-4 as pitfalls, #5 as recipe +Segment-first ensures we extract appropriate memories from each session type, and only apply SUCCESS/FAILURE to actual execution tasks. -This is the core insight: passive learning REQUIRES outcome classification. +### Why Contradiction Detection Matters + +Facts change over time: +- Day 1: "Project uses PostgreSQL" (fact) +- Day 30: "Migrated to MySQL" (new fact) + +Contradiction detection auto-invalidates old facts when new conflicting facts arrive, keeping retrieval clean and accurate. --- @@ -521,17 +604,22 @@ This is the core insight: passive learning REQUIRES outcome classification. | **2-tier LLM** | strong_model + fast_model | Pro for complex reasoning, Flash for quick tasks | | **Lazy Daemon** | Start on command, stop after 2hr idle | No system service complexity | | Episode trigger | 4-hour window OR 50 events | Balance context vs LLM cost | -| Success detection | LLM classifies outcomes (strong_model) | Core insight for passive learning | -| Task segmentation | LLM decides, no rules engine | Simple, semantic understanding | -| Memory extraction | Outcome-based | Learn from both success and failure | -| **Process memory** | Always recorded | Audit trail, exportable for sharing | +| **Segment-first** | Segment by kind before outcome classification | Not all sessions are tasks with outcomes | +| **Segment kinds** | EXECUTION_TASK / PLANNING / RESEARCH / DISCUSSION | Different session types produce different memories | +| **Outcome only for EXECUTION_TASK** | SUCCESS/FAILURE/UNCERTAIN with evidence | Avoid classifying discussions as "failures" | +| Memory extraction | Based on segment kind | Architecture produces facts, coding produces lessons | +| **Fact memory** | Always recorded | Audit trail, exportable for sharing | +| **Memory lifecycle** | status (active/inactive/invalidated) + validity | Soft delete + contradiction handling | +| **Fact contradiction** | semantic_key + LLM for free-text | Auto-invalidate old when new conflicts | +| **Soft delete only (v1)** | `sqrl forget` β†’ status=inactive | Recoverable, no hard purge until v2 | | **Context compose** | LLM reranks + generates prompt (fast_model) | Better than math scoring, one call | | **Natural language CLI** | Thin shell passes to agent | "By the way" - agent handles all | | **Retroactive ingestion** | Token-limited, not time-limited | Fair for all project sizes | | User profile | Separate table from user_style | Structured vs unstructured | -| **3-layer DB** | Individual (squirrel.db) + Team (group.db) | Free vs Paid, local vs cloud | -| **Team sharing** | Manual opt-in via `sqrl share` | User controls what's shared | -| **Team DB location** | Cloud (default) / Self-hosted / Local | Flexibility for enterprise | +| **2-layer DB** | Global (squirrel.db) + Project (squirrel.db) | Scope-based separation | +| **CLI selection** | User picks CLIs in `sqrl config` | Only configure what user actually uses | +| **Agent instruction injection** | Add Squirrel block to CLAUDE.md, AGENTS.md, etc. | Increase MCP call success rate | +| **sqrl sync** | Update all projects when new CLI enabled | User stays in control, no magic patching | | Near-duplicate threshold | 0.9 similarity | Avoid redundant memories | | Trivial query fast-path | Return empty <20ms | No wasted LLM calls | | **Cross-platform** | Mac, Linux, Windows from v1 | All platforms supported | @@ -539,66 +627,19 @@ This is the core insight: passive learning REQUIRES outcome classification. --- -## Phase 6: Team Sharing (Paid) - -### Step 6.1: Alice Shares a Pitfall - -Alice finds a critical pitfall that should help the team: - -```bash -sqrl share mem_abc123 --as shared_pitfall -``` - -What happens: -1. Agent reads memory from `squirrel.db` -2. Copies to `group.db` with type `shared_pitfall` -3. Sets `contributed_by: alice`, `source_memory_id: mem_abc123` -4. Syncs `group.db` to cloud - -### Step 6.2: Bob Gets Team Context - -Bob joins Alice's team and works on the same project: - -```bash -sqrl team join abc-team-id -``` - -When Bob asks Claude Code to help with auth: -1. MCP calls `squirrel_get_task_context` -2. Vector search queries BOTH: - - Bob's `squirrel.db` (his individual memories) - - Team's `group.db` (shared team memories including Alice's pitfall) -3. LLM composes context with both individual and team memories -4. Bob gets Alice's auth pitfall in context without ever experiencing it himself - -### Step 6.3: Export for Onboarding - -Team lead exports all shared recipes for new developers: - -```bash -sqrl export shared_recipe --project --format json > onboarding.json -``` +## Memory Type Reference -New developer imports: -```bash -sqrl import onboarding.json -``` +3 memory types with scope flag: ---- +| Type | Fields | Description | Example | +|------|--------|-------------|---------| +| `lesson` | outcome (success/failure), scope | What worked or failed | "async/await preferred", "API 500 on null user_id" | +| `fact` | fact_type (knowledge/process), scope | Project knowledge or what happened | "Uses PostgreSQL 15", "Tried X, then Y worked" | +| `profile` | scope | User info | "Backend dev, 5yr Python" | -## Memory Type Reference +### Scope Matrix -| Type | Scope | DB | Sync | Description | -|------|-------|-----|------|-------------| -| `user_style` | Global | squirrel.db | Local | Your coding preferences | -| `user_profile` | Global | squirrel.db | Local | Your info (name, role) | -| `process` | Project | squirrel.db | Local | What happened (exportable) | -| `pitfall` | Project | squirrel.db | Local | Issues you encountered | -| `recipe` | Project | squirrel.db | Local | Patterns that worked | -| `project_fact` | Project | squirrel.db | Local | Project knowledge | -| `team_style` | Global | group.db | Cloud | Team coding standards | -| `team_profile` | Global | group.db | Cloud | Team info | -| `team_process` | Project | group.db | Cloud | Shared what-happened | -| `shared_pitfall` | Project | group.db | Cloud | Team-wide issues | -| `shared_recipe` | Project | group.db | Cloud | Team-approved patterns | -| `shared_fact` | Project | group.db | Cloud | Team project knowledge | +| Scope | DB File | Description | +|-------|---------|-------------| +| Global | `~/.sqrl/squirrel.db` | User preferences, profile (applies to all projects) | +| Project | `/.sqrl/squirrel.db` | Project-specific lessons and facts | diff --git a/README.md b/README.md index cf68e5a..8420e06 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Squirrel -Local-first memory system for AI coding tools. Learns from your successes AND failures, providing personalized, task-aware context via MCP. Supports individual and team memory layers. +Local-first memory system for AI coding tools. Learns from your successes AND failures, providing personalized, task-aware context via MCP. ## What It Does @@ -9,11 +9,16 @@ You code with Claude Code / Codex / Cursor / Gemini CLI ↓ Squirrel watches logs (100% passive, invisible) ↓ - LLM analyzes: What succeeded? What failed? + LLM segments session by type: + EXECUTION_TASK β†’ SUCCESS/FAILURE/UNCERTAIN + PLANNING_DECISION β†’ decisions, rationale + RESEARCH_LEARNING β†’ knowledge gained + DISCUSSION β†’ insights, preferences ↓ - SUCCESS β†’ recipe/project_fact memories - FAILURE β†’ pitfall memories (what NOT to do) - ALL β†’ process memories (what happened, exportable) + Extracts memories: + lesson (what worked/failed) + fact (project knowledge) + profile (user preferences) ↓ AI tools call MCP β†’ get personalized context ↓ @@ -70,7 +75,6 @@ You code with Claude Code / Codex / Cursor / Gemini CLI | **MCP SDK** | rmcp (official Rust SDK) | modelcontextprotocol/rust-sdk | | **Agent Framework** | PydanticAI + LiteLLM | Multi-provider LLM support | | **Embeddings** | OpenAI text-embedding-3-small | 1536-dim, API-based | -| **Cloud Sync** | SQLite Session Extension | Changeset-based sync for team DB | | **Build/Release** | dist (cargo-dist) | Generates Homebrew, MSI, installers | | **Auto-update** | axoupdater | dist's official updater | @@ -80,13 +84,22 @@ You code with Claude Code / Codex / Cursor / Gemini CLI # Install (detects OS automatically) curl -sSL https://sqrl.dev/install.sh | sh +# First run: select which CLIs you use +sqrl config +# β†’ Select: Claude Code, Codex CLI, Gemini CLI, Cursor + +# Initialize a project +cd ~/my-project +sqrl init +# β†’ Configures MCP for selected CLIs +# β†’ Adds Squirrel instructions to CLAUDE.md, AGENTS.md, etc. +# β†’ Ingests recent history + # Natural language - just talk to it -sqrl "setup for this project" sqrl "what do you know about auth here" sqrl "show my coding style" -# Or direct commands if you prefer -sqrl init +# Or direct commands sqrl search "database patterns" sqrl status ``` @@ -129,6 +142,26 @@ Next sqrl command β†’ daemon starts again No manual daemon management. No system services. Just works. +### CLI Selection (Global) + +First run (or `sqrl config`) lets you select which CLIs you use: + +```bash +sqrl config +# Interactive: select Claude Code, Codex CLI, Gemini CLI, Cursor +``` + +```toml +# ~/.sqrl/config.toml +[agents] +claude_code = true +codex_cli = true +gemini_cli = false +cursor = true +``` + +Only selected CLIs get configured during `sqrl init`. + ### Project Initialization ```bash @@ -137,16 +170,31 @@ sqrl init ``` This: -1. Scans CLI log folders for logs mentioning this project -2. Ingests recent history (token-limited, small projects get all, large projects get recent) -3. Creates `.sqrl/squirrel.db` for project memories -4. Detects which CLIs you use and offers to configure MCP +1. Creates `.sqrl/squirrel.db` for project memories +2. Scans CLI log folders for logs mentioning this project +3. Ingests recent history (token-limited) +4. For each enabled CLI: + - Configures MCP (adds Squirrel server to CLI's MCP config) + - Adds instruction text to agent file (CLAUDE.md, AGENTS.md, GEMINI.md, .cursor/rules/) Skip history ingestion: ```bash sqrl init --skip-history ``` +### Syncing New CLIs + +If you enable a new CLI after initializing projects: + +```bash +# Enable Cursor globally +sqrl config # select Cursor + +# Update all existing projects +sqrl sync +# β†’ Adds MCP config + instructions for Cursor to all registered projects +``` + ### Passive Learning (Write Path) ``` @@ -158,13 +206,15 @@ Rust Daemon tails JSONL files β†’ normalized Events ↓ Buffers events, flushes as Episode (4hr window OR 50 events) ↓ -Python Agent analyzes Episode: - - Segments into Tasks ("fix auth bug", "add endpoint") - - Classifies: SUCCESS | FAILURE | UNCERTAIN - - Extracts memories: - SUCCESS β†’ recipe or project_fact - FAILURE β†’ pitfall - UNCERTAIN β†’ skip +Python Agent analyzes Episode (segment-first approach): + 1. Segments by kind: + - EXECUTION_TASK (coding, fixing) + - PLANNING_DECISION (architecture, design) + - RESEARCH_LEARNING (learning, exploring) + - DISCUSSION (brainstorming, chat) + 2. For EXECUTION_TASK only: SUCCESS | FAILURE | UNCERTAIN + 3. Extracts memories based on segment kind + 4. Checks for fact contradictions β†’ invalidates old facts ↓ Near-duplicate check (0.9 threshold) β†’ store or merge ``` @@ -208,24 +258,16 @@ sqrl init --skip-history sqrl search "postgres" sqrl forget sqrl config set llm.model claude-sonnet +sqrl sync # Update all projects with new CLI configs ``` -### Team Commands +### Export/Import ```bash -# Share individual memory to team (manual, opt-in) -sqrl share # Promote to team DB -sqrl share --as pitfall # Share with type conversion - -# Export/Import memories -sqrl export pitfall # Export all pitfalls as JSON -sqrl export recipe --project # Export project recipes +# Export/Import memories (for backup or sharing) +sqrl export lesson # Export all lessons as JSON +sqrl export fact --project # Export project facts sqrl import memories.json # Import memories - -# Team management (paid) -sqrl team join # Join a team -sqrl team create "Backend Team" # Create team -sqrl team sync # Force sync with cloud ``` ## MCP Tools @@ -237,64 +279,95 @@ sqrl team sync # Force sync with cloud ## Memory Types -### Individual Memories (Free) +3 memory types, each with scope (global/project): + +| Type | Fields | Description | Example | +|------|--------|-------------|---------| +| `lesson` | outcome (success/failure), scope | What worked or failed | "async/await preferred", "API 500 on null user_id" | +| `fact` | fact_type (knowledge/process), scope | Project knowledge or what happened | "Uses PostgreSQL 15", "Tried X, then Y worked" | +| `profile` | scope | User info | "Backend dev, 5yr Python" | + +### Scope + +| Scope | DB Location | Description | +|-------|-------------|-------------| +| Global | `~/.sqrl/squirrel.db` | User preferences, profile (applies to all projects) | +| Project | `/.sqrl/squirrel.db` | Project-specific lessons and facts | + +### Examples by Type + +**lesson (outcome=success):** coding preferences, patterns that worked +- "Prefers async/await over callbacks" +- "Use repository pattern for DB access" + +**lesson (outcome=failure):** issues encountered, things to avoid +- "API returns 500 on null user_id" +- "Never use ORM for bulk inserts" + +**fact (fact_type=knowledge):** project facts, tech stack info +- "Uses PostgreSQL 15" +- "Auth via JWT tokens" + +**fact (fact_type=process):** what happened, decision history +- "Tried Redis, failed due to memory, switched to PostgreSQL" +- "Sprint 12: migrated to Redis" -| Type | Scope | Description | Example | -|------|-------|-------------|---------| -| `user_style` | Global | Your coding preferences | "Prefers async/await" | -| `user_profile` | Global | Your info (name, role, skills) | "Backend dev, 5yr Python" | -| `process` | Project | What happened (exportable) | "Tried X, failed, then Y worked" | -| `pitfall` | Project | Issues you encountered | "API returns 500 on null user_id" | -| `recipe` | Project | Patterns that worked for you | "Use repository pattern for DB" | -| `project_fact` | Project | Project knowledge you learned | "Uses PostgreSQL 15" | +**profile:** user info +- "Backend dev, 5yr Python experience" +- "Prefers detailed code comments" -### Team Memories (Paid - Cloud Sync) +### Memory Lifecycle -| Type | Scope | Description | Example | -|------|-------|-------------|---------| -| `team_style` | Global | Team coding standards | "Team uses ESLint + Prettier" | -| `team_profile` | Global | Team info | "Backend team, 5 devs" | -| `team_process` | Project | Shared what-happened logs | "Sprint 12: migrated to Redis" | -| `shared_pitfall` | Project | Team-wide known issues | "Never use ORM for bulk inserts" | -| `shared_recipe` | Project | Team-approved patterns | "Use factory pattern for tests" | -| `shared_fact` | Project | Team project knowledge | "Prod DB is on AWS RDS" | +Memories have status and validity tracking: + +| Status | Description | Retrieval | +|--------|-------------|-----------| +| `active` | Normal memory | Included | +| `inactive` | Soft deleted via `sqrl forget` | Hidden (recoverable) | +| `invalidated` | Superseded by newer fact | Hidden (keeps history) | + +**Fact contradiction handling:** +- When new fact contradicts old (e.g., "uses PostgreSQL" vs "uses MySQL") +- Old fact marked `invalidated`, `valid_to` set, `superseded_by` points to new +- History preserved for debugging + +**Forget command:** +```bash +sqrl forget # Soft delete by ID +sqrl forget "deprecated API" # Search + confirm + soft delete +``` ## Storage Layout ``` ~/.sqrl/ β”œβ”€β”€ config.toml # User settings, API keys -β”œβ”€β”€ squirrel.db # Global individual (user_style, user_profile) -β”œβ”€β”€ group.db # Global team (team_style, team_profile) - synced +β”œβ”€β”€ squirrel.db # Global memories (lesson, fact, profile with scope=global) └── logs/ # Daemon logs /.sqrl/ -β”œβ”€β”€ squirrel.db # Project individual (process, pitfall, recipe, project_fact) -β”œβ”€β”€ group.db # Project team (shared_*, team_process) - synced +β”œβ”€β”€ squirrel.db # Project memories (lesson, fact with scope=project) └── config.toml # Project overrides (optional) ``` -### 3-Layer Database Architecture +### 2-Layer Database Architecture -| Layer | DB File | Contents | Sync | -|-------|---------|----------|------| -| **Global Individual** | `~/.sqrl/squirrel.db` | user_style, user_profile | Local only | -| **Global Team** | `~/.sqrl/group.db` | team_style, team_profile | Cloud (paid) | -| **Project Individual** | `/.sqrl/squirrel.db` | process, pitfall, recipe, project_fact | Local only | -| **Project Team** | `/.sqrl/group.db` | shared_pitfall, shared_recipe, shared_fact, team_process | Cloud (paid) | - -### Team Database Options - -| Mode | Location | Use Case | -|------|----------|----------| -| **Cloud** (default) | Squirrel Cloud | Teams, auto-sync, paid tier | -| **Self-hosted** | Your server | Enterprise, data sovereignty | -| **Local file** | `group.db` file | Offline, manual export/import | +| Layer | DB File | Contents | +|-------|---------|----------| +| **Global** | `~/.sqrl/squirrel.db` | lesson, fact, profile (scope=global) | +| **Project** | `/.sqrl/squirrel.db` | lesson, fact (scope=project) | ## Configuration ```toml # ~/.sqrl/config.toml + +[agents] +claude_code = true # Enable Claude Code integration +codex_cli = true # Enable Codex CLI integration +gemini_cli = false # Enable Gemini CLI integration +cursor = true # Enable Cursor integration + [llm] provider = "gemini" # gemini | openai | anthropic | ollama | ... api_key = "..." @@ -310,12 +383,6 @@ model = "text-embedding-3-small" # 1536-dim, $0.10/M tokens [daemon] idle_timeout_hours = 2 # Stop after N hours inactive - -[team] # Paid tier -enabled = false # Enable team features -team_id = "" # Your team ID -sync_mode = "cloud" # cloud | self-hosted | local -sync_url = "" # Custom sync URL (self-hosted only) ``` ### LLM Usage @@ -382,30 +449,30 @@ source .venv/bin/activate && pytest ## v1 Scope -**Individual Features (Free):** - Passive log watching (4 CLIs) -- Success detection (SUCCESS/FAILURE/UNCERTAIN classification) +- Episode segmentation (EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION) +- Success detection for EXECUTION_TASK only (with evidence requirement) +- Memory lifecycle: status (active/inactive/invalidated) + validity tracking +- Fact contradiction detection + auto-invalidation +- Soft delete (`sqrl forget`) - recoverable - Unified Python agent with tools - Natural language CLI - MCP integration (2 tools) - Lazy daemon (start on demand, stop after 2hr idle) - Retroactive log ingestion on init (token-limited) -- 6 memory types (user_style, user_profile, process, pitfall, recipe, project_fact) +- 3 memory types (lesson, fact, profile) with scope flag - Near-duplicate deduplication (0.9 threshold) - Cross-platform (Mac, Linux, Windows) - Export/import memories (JSON) - Auto-update (`sqrl update`) -- Memory consolidation -- Retrieval debugging tools +- CLI selection + MCP wiring + agent instruction injection +- `sqrl sync` for updating existing projects with new CLIs -**Team Features (Paid):** -- Cloud sync for group.db -- Team memory types (team_style, team_profile, shared_*, team_process) -- `sqrl share` command (promote individual to team) -- Team management (create, join, sync) -- Self-hosted option for enterprise +**v1 limitations:** +- No TTL/auto-expiration +- No hard delete (soft delete only) -**v2:** Hooks output, file injection (AGENTS.md/GEMINI.md), team analytics, memory marketplace +**v2:** Team/cloud sync, deep CLI integrations, TTL/temporary memory, hard purge, memory linking ## Contributing From 83f0752af556f120dbeb09c62839464edadd6c2a Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Tue, 9 Dec 2025 13:31:25 +0800 Subject: [PATCH 06/15] docs: add declarative keys and evidence source to v1 spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add declarative key registry (project.* and user.* keys) - Add evidence_source field for facts (success/failure/neutral/manual) - Update memories table schema with key, value, support_count, last_seen_at - Update episode ingestion output to include keyed facts - Add context output structure for retrieval - Update user_profile to structured identity table πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- DEVELOPMENT_PLAN.md | 179 ++++++++++++++++++++++++++++++++++---------- EXAMPLE.md | 121 +++++++++++++++++++++--------- README.md | 68 ++++++++++++----- 3 files changed, 276 insertions(+), 92 deletions(-) diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md index 28b997c..bdcfcbe 100644 --- a/DEVELOPMENT_PLAN.md +++ b/DEVELOPMENT_PLAN.md @@ -134,19 +134,40 @@ Not all sessions are coding tasks with success/failure outcomes. Sessions includ ], "memories": [ { - "memory_type": "FACT", - "scope": "PROJECT", - "content": "Project uses PostgreSQL 15 via Prisma.", + "memory_type": "fact", + "scope": "project", + "key": "project.db.engine", + "value": "PostgreSQL", + "text": "Project uses PostgreSQL 15 via Prisma.", + "evidence_source": "neutral", "source_segments": ["seg_1"], "confidence": 0.86 }, { - "memory_type": "LESSON", - "scope": "PROJECT", + "memory_type": "fact", + "scope": "global", + "key": "user.preferred_style", + "value": "async_await", + "text": "User prefers async/await over callbacks.", + "evidence_source": "success", + "source_segments": ["seg_1"], + "confidence": 0.85 + }, + { + "memory_type": "lesson", + "scope": "project", "outcome": "failure", - "content": "Validate user_id before DB insert to avoid 500s.", + "text": "Validate user_id before DB insert to avoid 500s.", "source_segments": ["seg_1"], "confidence": 0.9 + }, + { + "memory_type": "fact", + "scope": "project", + "text": "Auth module handles JWT validation in middleware.", + "evidence_source": "neutral", + "source_segments": ["seg_1"], + "confidence": 0.75 } ] } @@ -252,21 +273,36 @@ SQLite + sqlite-vec initialization: -- memories table (squirrel.db) CREATE TABLE memories ( id TEXT PRIMARY KEY, - content_hash TEXT NOT NULL UNIQUE, - content TEXT NOT NULL, + project_id TEXT, -- NULL for global/user-scope memories memory_type TEXT NOT NULL, -- lesson | fact | profile - outcome TEXT, -- success | failure (for lesson type) - fact_type TEXT, -- knowledge | process (for fact type) - scope TEXT NOT NULL, -- global | project - repo TEXT NOT NULL, -- repo path OR 'global' + + -- For lessons (task-level patterns/pitfalls) + outcome TEXT, -- success | failure | uncertain (lesson only) + + -- For facts + fact_type TEXT, -- knowledge | process (fact only, optional) + key TEXT, -- declarative key: project.db.engine, user.preferred_style + value TEXT, -- declarative value: PostgreSQL, async_await + evidence_source TEXT, -- success | failure | neutral | manual (fact only) + support_count INTEGER, -- approx episodes that support this fact + last_seen_at TEXT, -- last episode where this was seen + + -- Content + text TEXT NOT NULL, -- human-readable content embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) + metadata TEXT, -- JSON: anchors (files, components, endpoints) + + -- Confidence confidence REAL NOT NULL, importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low + + -- Lifecycle status TEXT NOT NULL DEFAULT 'active', -- active | inactive | invalidated - valid_from TEXT NOT NULL, -- when this became true - valid_to TEXT, -- when it stopped being true (null = still valid) - superseded_by TEXT, -- memory_id that replaced this (for invalidated) - semantic_key TEXT, -- for fact contradiction detection (e.g., db.engine) + valid_from TEXT NOT NULL, + valid_to TEXT, + superseded_by TEXT, + + -- Audit user_id TEXT NOT NULL DEFAULT 'local', created_at TEXT NOT NULL, updated_at TEXT NOT NULL @@ -316,14 +352,47 @@ CREATE TABLE memory_access_log ( ); ``` -### A1.1 Memory Lifecycle (Forget Mechanism) +### A1.1 Declarative Key Registry + +Declarative keys are the "rigid backbone" for critical facts. Same key + different value triggers deterministic invalidation (no LLM needed). + +**Project-scoped keys** (project_id set): +``` +project.db.engine # PostgreSQL, MySQL, SQLite +project.db.version # 15, 8.0, 3.x +project.api.framework # FastAPI, Express, Rails +project.ui.framework # React, Vue, Svelte +project.language.main # Python, TypeScript, Go +project.test.command # pytest, npm test, go test +project.build.command # npm run build, cargo build +project.auth.method # JWT, session, OAuth +project.package_manager # npm, pnpm, yarn, pip, uv +project.orm # Prisma, SQLAlchemy, TypeORM +``` + +**User-scoped keys** (project_id = NULL, stored in global db): +``` +user.preferred_style # async_await, callbacks, sync +user.preferred_language # Python, TypeScript, Go +user.strict_null_checks # true, false +user.comment_style # minimal, detailed, jsdoc +user.error_handling # exceptions, result_types, errors +``` + +**Key behaviors:** +- Keys are optional - most facts remain free-text +- LLM extracts key during ingestion when pattern matches registry +- Same key + different value β†’ deterministic invalidation of old fact +- Keys enable fast lookup without vector search + +### A1.2 Memory Lifecycle (Forget Mechanism) **Status values:** - `active` - Normal, appears in retrieval - `inactive` - Soft deleted by user (`sqrl forget`), recoverable, hidden from retrieval - `invalidated` - Superseded by newer fact, keeps history, hidden from retrieval -**Validity fields (for fact/profile):** +**Validity fields (for facts):** - `valid_from` - When this became true (default: created_at) - `valid_to` - When it stopped being true (null = still valid) - `superseded_by` - ID of memory that replaced this @@ -336,15 +405,12 @@ WHERE status = 'active' **Contradiction detection (during ingestion):** -For facts with `semantic_key`: -``` -semantic_key examples: db.engine, db.version, api.framework, - auth.method, package_manager, orm -``` +For facts with declarative `key`: - Same key + different value β†’ invalidate old (status='invalidated', valid_to=now, superseded_by=new_id) -- LLM extracts semantic_key when possible during ingestion +- Deterministic, no LLM needed +- Example: key=project.db.engine, old value=MySQL, new value=PostgreSQL β†’ invalidate old -For free-text facts without clear key: +For free-text facts without key: - LLM judges semantic conflict between new fact and similar existing facts - High confidence conflict β†’ invalidate old - Low confidence β†’ keep both, let retrieval handle via recency weighting @@ -547,25 +613,33 @@ UUIDβ†’integer mapping when showing existing memories to LLM (prevents hallucina ### C4. Schemas (`schemas/`) -Memory schema: -- id, content_hash, content, memory_type, repo, embedding -- outcome (for lesson), fact_type (for fact), scope -- confidence, importance, user_id -- created_at, updated_at -- memory_type: lesson | fact | profile -- outcome (lesson only): success | failure -- fact_type (fact only): knowledge | process -- scope: global | project - -Lifecycle fields: +**Memory schema:** +- id, project_id (NULL for global), memory_type (lesson | fact | profile) +- text (human-readable content), embedding (1536-dim) +- metadata (JSON: anchors - files, components, endpoints) +- confidence, importance (critical | high | medium | low) +- user_id, created_at, updated_at + +**Lesson-specific fields:** +- outcome: success | failure | uncertain + +**Fact-specific fields:** +- fact_type: knowledge | process (optional) +- key: declarative key (project.db.engine, user.preferred_style, etc.) +- value: declarative value (PostgreSQL, async_await, etc.) +- evidence_source: success | failure | neutral | manual +- support_count: number of episodes that support this fact +- last_seen_at: timestamp of last episode where seen + +**Lifecycle fields (all types):** - status: active | inactive | invalidated - valid_from: timestamp (when this became true) - valid_to: timestamp | null (when it stopped being true) - superseded_by: memory_id | null (for invalidated facts) -- semantic_key: string | null (for fact contradiction detection) -UserProfile schema: +**UserProfile schema (structured identity, not memories):** - key, value, source (explicit|inferred), confidence, updated_at +- Examples: name, role, experience_level, company, primary_use_case --- @@ -586,6 +660,31 @@ UserProfile schema: - Generates structured prompt with memory IDs 3. Returns ready-to-inject context prompt within token budget +**Context output structure:** +```json +{ + "project_facts": [ + {"key": "project.db.engine", "value": "PostgreSQL", "text": "..."}, + {"key": "project.api.framework", "value": "FastAPI", "text": "..."} + ], + "user_prefs": [ + {"key": "user.preferred_style", "value": "async_await", "text": "..."} + ], + "lessons": [ + {"outcome": "failure", "text": "Validate user_id before DB insert...", "id": "mem_123"}, + {"outcome": "success", "text": "Use repository pattern for DB access...", "id": "mem_456"} + ], + "process_facts": [ + {"text": "Auth module handles JWT validation in middleware.", "id": "mem_789"} + ], + "profile": { + "name": "Alice", + "role": "Backend Developer", + "experience_level": "Senior" + } +} +``` + **forget_memory(id_or_query):** - If ID: set status='inactive' (soft delete, recoverable) - If natural language query: search β†’ confirm with user β†’ soft delete matches @@ -833,8 +932,10 @@ Windows note: MSI recommended over raw .exe to reduce SmartScreen/AV friction. - Lazy daemon (start on demand, stop after 2hr idle) - Retroactive log ingestion on init (token-limited) - 3 memory types (lesson, fact, profile) with scope flag +- Declarative keys for facts (project.* and user.*) with deterministic conflict detection +- Evidence source tracking for facts (success/failure/neutral/manual) - Memory lifecycle: status (active/inactive/invalidated) + validity (valid_from/valid_to) -- Fact contradiction detection (semantic_key + LLM for free-text) +- Fact contradiction detection (declarative key match + LLM for free-text) - Soft delete (`sqrl forget`) - no hard purge - Near-duplicate deduplication (0.9 threshold) - Cross-platform (Mac, Linux, Windows) diff --git a/EXAMPLE.md b/EXAMPLE.md index cad641b..d99e34e 100644 --- a/EXAMPLE.md +++ b/EXAMPLE.md @@ -278,10 +278,12 @@ async def ingest_episode(episode: dict) -> dict: ], "memories": [ { - "memory_type": "lesson", - "outcome": "success", + "memory_type": "fact", "scope": "global", - "content": "Prefers async/await with type hints for all handlers", + "key": "user.preferred_style", + "value": "async_await", + "text": "Prefers async/await with type hints for all handlers", + "evidence_source": "success", "source_segments": ["seg_1"], "confidence": 0.9 }, @@ -289,7 +291,7 @@ async def ingest_episode(episode: dict) -> dict: "memory_type": "lesson", "outcome": "failure", "scope": "project", - "content": "Auth token refresh loops are NOT caused by localStorage or cookies - check useEffect cleanup first", + "text": "Auth token refresh loops are NOT caused by localStorage or cookies - check useEffect cleanup first", "source_segments": ["seg_2"], "confidence": 0.9 }, @@ -297,15 +299,15 @@ async def ingest_episode(episode: dict) -> dict: "memory_type": "lesson", "outcome": "success", "scope": "project", - "content": "For auth redirect loops, fix useEffect cleanup to prevent re-triggering on token refresh", + "text": "For auth redirect loops, fix useEffect cleanup to prevent re-triggering on token refresh", "source_segments": ["seg_2"], "confidence": 0.9 }, { "memory_type": "fact", - "fact_type": "process", "scope": "project", - "content": "Tried localStorage fix (failed), tried cookies fix (failed), useEffect cleanup fix worked", + "text": "Tried localStorage fix (failed), tried cookies fix (failed), useEffect cleanup fix worked", + "evidence_source": "neutral", "source_segments": ["seg_2"], "confidence": 0.85 } @@ -518,22 +520,36 @@ CREATE TABLE events ( ```sql CREATE TABLE memories ( id TEXT PRIMARY KEY, - content_hash TEXT NOT NULL UNIQUE, - content TEXT NOT NULL, - memory_type TEXT NOT NULL, -- lesson | fact | profile - outcome TEXT, -- success | failure (for lesson type) - fact_type TEXT, -- knowledge | process (for fact type) - scope TEXT NOT NULL, -- global | project - repo TEXT NOT NULL, -- repo path OR 'global' - embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) + project_id TEXT, -- NULL for global/user-scope memories + memory_type TEXT NOT NULL, -- lesson | fact | profile + + -- For lessons (task-level patterns/pitfalls) + outcome TEXT, -- success | failure | uncertain (lesson only) + + -- For facts + fact_type TEXT, -- knowledge | process (optional) + key TEXT, -- declarative key: project.db.engine, user.preferred_style + value TEXT, -- declarative value: PostgreSQL, async_await + evidence_source TEXT, -- success | failure | neutral | manual (fact only) + support_count INTEGER, -- approx episodes that support this fact + last_seen_at TEXT, -- last episode where this was seen + + -- Content + text TEXT NOT NULL, -- human-readable content + embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) + metadata TEXT, -- JSON: anchors (files, components, endpoints) + + -- Confidence confidence REAL NOT NULL, importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low - -- Lifecycle fields + + -- Lifecycle status TEXT NOT NULL DEFAULT 'active', -- active | inactive | invalidated - valid_from TEXT NOT NULL, -- when this became true - valid_to TEXT, -- when it stopped being true - superseded_by TEXT, -- memory_id that replaced this - semantic_key TEXT, -- for fact contradiction (e.g., db.engine) + valid_from TEXT NOT NULL, + valid_to TEXT, + superseded_by TEXT, + + -- Audit user_id TEXT NOT NULL DEFAULT 'local', created_at TEXT NOT NULL, updated_at TEXT NOT NULL @@ -545,15 +561,27 @@ CREATE TABLE memories ( - `inactive` - Soft deleted via `sqrl forget`, hidden but recoverable - `invalidated` - Superseded by newer fact, hidden but keeps history -### User Profile (separate from memories) +**Declarative key examples:** +- `project.db.engine` β†’ PostgreSQL, MySQL, SQLite +- `project.api.framework` β†’ FastAPI, Express, Rails +- `user.preferred_style` β†’ async_await, callbacks, sync +- `user.comment_style` β†’ minimal, detailed, jsdoc + +Same key + different value β†’ deterministic invalidation of old fact. + +### User Profile (structured identity) ```sql CREATE TABLE user_profile ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL, -- JSON value + user_id TEXT PRIMARY KEY, + name TEXT, + role TEXT, + experience_level TEXT, + company TEXT, + primary_use_case TEXT, + created_at TEXT NOT NULL, updated_at TEXT NOT NULL ); --- Keys: name, role, preferred_languages, common_frameworks, etc. ``` --- @@ -570,8 +598,9 @@ CREATE TABLE user_profile ( | Batching | Groups events into Episodes (4hr OR 50 events) | | **Segmentation** | Agent segments by kind: EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION | | **Outcome** | For EXECUTION_TASK only: SUCCESS/FAILURE/UNCERTAIN (with evidence) | -| Extraction | Based on segment kind: lesson, fact, profile | -| **Contradiction** | New fact conflicts with old β†’ old marked `invalidated` | +| Extraction | Based on segment kind: lesson (with outcome), fact (with key/evidence_source), profile | +| **Declarative Keys** | Facts with project.* or user.* keys enable deterministic conflict detection | +| **Contradiction** | Same key + different value β†’ old fact invalidated (no LLM); free-text β†’ LLM judges | | Dedup | Near-duplicate check (0.9 similarity) before ADD | | Retrieval | MCP β†’ Vector search (top 20) β†’ LLM reranks + composes context prompt | | Forget | `sqrl forget` β†’ soft delete (status=inactive), recoverable | @@ -608,14 +637,15 @@ Contradiction detection auto-invalidates old facts when new conflicting facts ar | **Segment kinds** | EXECUTION_TASK / PLANNING / RESEARCH / DISCUSSION | Different session types produce different memories | | **Outcome only for EXECUTION_TASK** | SUCCESS/FAILURE/UNCERTAIN with evidence | Avoid classifying discussions as "failures" | | Memory extraction | Based on segment kind | Architecture produces facts, coding produces lessons | -| **Fact memory** | Always recorded | Audit trail, exportable for sharing | +| **Declarative keys** | project.* and user.* keys for facts | Deterministic conflict detection (no LLM) | +| **Evidence source** | success/failure/neutral/manual on facts | Track how a fact was learned | | **Memory lifecycle** | status (active/inactive/invalidated) + validity | Soft delete + contradiction handling | -| **Fact contradiction** | semantic_key + LLM for free-text | Auto-invalidate old when new conflicts | +| **Fact contradiction** | Declarative key match + LLM for free-text | Auto-invalidate old when new conflicts | | **Soft delete only (v1)** | `sqrl forget` β†’ status=inactive | Recoverable, no hard purge until v2 | | **Context compose** | LLM reranks + generates prompt (fast_model) | Better than math scoring, one call | | **Natural language CLI** | Thin shell passes to agent | "By the way" - agent handles all | | **Retroactive ingestion** | Token-limited, not time-limited | Fair for all project sizes | -| User profile | Separate table from user_style | Structured vs unstructured | +| User profile | Separate table (structured identity) | name, role, experience_level - not learned | | **2-layer DB** | Global (squirrel.db) + Project (squirrel.db) | Scope-based separation | | **CLI selection** | User picks CLIs in `sqrl config` | Only configure what user actually uses | | **Agent instruction injection** | Add Squirrel block to CLAUDE.md, AGENTS.md, etc. | Increase MCP call success rate | @@ -631,11 +661,34 @@ Contradiction detection auto-invalidates old facts when new conflicting facts ar 3 memory types with scope flag: -| Type | Fields | Description | Example | -|------|--------|-------------|---------| -| `lesson` | outcome (success/failure), scope | What worked or failed | "async/await preferred", "API 500 on null user_id" | -| `fact` | fact_type (knowledge/process), scope | Project knowledge or what happened | "Uses PostgreSQL 15", "Tried X, then Y worked" | -| `profile` | scope | User info | "Backend dev, 5yr Python" | +| Type | Key Fields | Description | Example | +|------|------------|-------------|---------| +| `lesson` | outcome (success/failure/uncertain) | What worked or failed | "API 500 on null user_id", "Repository pattern works well" | +| `fact` | key, value, evidence_source | Project/user knowledge | key=project.db.engine, value=PostgreSQL | +| `profile` | (structured identity) | User background info | name, role, experience_level | + +### Declarative Keys + +Critical facts use declarative keys for deterministic conflict detection: + +**Project-scoped keys:** +- `project.db.engine` - PostgreSQL, MySQL, SQLite +- `project.api.framework` - FastAPI, Express, Rails +- `project.language.main` - Python, TypeScript, Go +- `project.auth.method` - JWT, session, OAuth + +**User-scoped keys (global):** +- `user.preferred_style` - async_await, callbacks, sync +- `user.preferred_language` - Python, TypeScript, Go +- `user.comment_style` - minimal, detailed, jsdoc + +### Evidence Source (Facts Only) + +How a fact was learned: +- `success` - Learned from successful task (high confidence) +- `failure` - Learned from failed task (valuable pitfall) +- `neutral` - Observed in planning/research/discussion +- `manual` - User explicitly stated via CLI ### Scope Matrix diff --git a/README.md b/README.md index 8420e06..0d0891c 100644 --- a/README.md +++ b/README.md @@ -281,11 +281,11 @@ sqrl import memories.json # Import memories 3 memory types, each with scope (global/project): -| Type | Fields | Description | Example | -|------|--------|-------------|---------| -| `lesson` | outcome (success/failure), scope | What worked or failed | "async/await preferred", "API 500 on null user_id" | -| `fact` | fact_type (knowledge/process), scope | Project knowledge or what happened | "Uses PostgreSQL 15", "Tried X, then Y worked" | -| `profile` | scope | User info | "Backend dev, 5yr Python" | +| Type | Key Fields | Description | Example | +|------|------------|-------------|---------| +| `lesson` | outcome (success/failure/uncertain) | What worked or failed | "API 500 on null user_id", "Repository pattern works well" | +| `fact` | key, value, evidence_source | Project/user knowledge | key=project.db.engine, value=PostgreSQL | +| `profile` | (structured identity) | User background info | name, role, experience_level | ### Scope @@ -294,27 +294,55 @@ sqrl import memories.json # Import memories | Global | `~/.sqrl/squirrel.db` | User preferences, profile (applies to all projects) | | Project | `/.sqrl/squirrel.db` | Project-specific lessons and facts | +### Declarative Keys (Facts) + +Critical facts use declarative keys for deterministic conflict detection: + +**Project-scoped keys:** +``` +project.db.engine # PostgreSQL, MySQL, SQLite +project.api.framework # FastAPI, Express, Rails +project.language.main # Python, TypeScript, Go +project.auth.method # JWT, session, OAuth +``` + +**User-scoped keys (global):** +``` +user.preferred_style # async_await, callbacks, sync +user.preferred_language # Python, TypeScript, Go +user.comment_style # minimal, detailed, jsdoc +``` + +Same key + different value β†’ old fact automatically invalidated (no LLM needed). + +### Evidence Source (Facts) + +How a fact was learned: +- `success` - Learned from successful task (high confidence) +- `failure` - Learned from failed task (valuable pitfall) +- `neutral` - Observed in planning/research/discussion +- `manual` - User explicitly stated via CLI + ### Examples by Type -**lesson (outcome=success):** coding preferences, patterns that worked -- "Prefers async/await over callbacks" -- "Use repository pattern for DB access" +**lesson (outcome=success):** patterns that worked +- "Repository pattern works well for DB access" +- "Batch inserts 10x faster than individual" -**lesson (outcome=failure):** issues encountered, things to avoid +**lesson (outcome=failure):** issues to avoid - "API returns 500 on null user_id" - "Never use ORM for bulk inserts" -**fact (fact_type=knowledge):** project facts, tech stack info -- "Uses PostgreSQL 15" -- "Auth via JWT tokens" +**fact (with key):** declarative project/user knowledge +- key=project.db.engine, value=PostgreSQL, text="Uses PostgreSQL 15 via Prisma" +- key=user.preferred_style, value=async_await, text="Prefers async/await over callbacks" -**fact (fact_type=process):** what happened, decision history +**fact (free-text):** process history, decisions - "Tried Redis, failed due to memory, switched to PostgreSQL" -- "Sprint 12: migrated to Redis" +- "Auth module handles JWT validation in middleware" -**profile:** user info -- "Backend dev, 5yr Python experience" -- "Prefers detailed code comments" +**profile:** structured user identity +- name, role, experience_level, company, primary_use_case ### Memory Lifecycle @@ -452,15 +480,17 @@ source .venv/bin/activate && pytest - Passive log watching (4 CLIs) - Episode segmentation (EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION) - Success detection for EXECUTION_TASK only (with evidence requirement) +- 3 memory types (lesson, fact, profile) with scope flag +- Declarative keys for facts (project.* and user.*) with deterministic conflict detection +- Evidence source tracking for facts (success/failure/neutral/manual) - Memory lifecycle: status (active/inactive/invalidated) + validity tracking -- Fact contradiction detection + auto-invalidation +- Fact contradiction detection (declarative key match + LLM for free-text) - Soft delete (`sqrl forget`) - recoverable - Unified Python agent with tools - Natural language CLI - MCP integration (2 tools) - Lazy daemon (start on demand, stop after 2hr idle) - Retroactive log ingestion on init (token-limited) -- 3 memory types (lesson, fact, profile) with scope flag - Near-duplicate deduplication (0.9 threshold) - Cross-platform (Mac, Linux, Windows) - Export/import memories (JSON) From a3b1dac419c53fbcab30d01383ffa036d5e790b1 Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Tue, 9 Dec 2025 18:53:20 +0800 Subject: [PATCH 07/15] docs: add spec-driven development structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add specs/ folder with Spec Kit pattern: - CONSTITUTION.md: project governance, principles P1-P5, rules DR1-DR5 - ARCHITECTURE.md: system boundaries ARCH-001/002/003, data flows - SCHEMAS.md: database schemas SCHEMA-001 to SCHEMA-006 - INTERFACES.md: IPC, MCP, CLI contracts with stable IDs - KEYS.md: declarative key registry KEY-P-*, KEY-U-* - PROMPTS.md: LLM prompts PROMPT-001 to PROMPT-005 with model tiers - DECISIONS.md: ADR log ADR-001 to ADR-009 - Add project-rules/ with .mdc format (Cursor + Claude compatible): - general.mdc, rust-daemon.mdc, python-agent.mdc, specs.mdc, testing.mdc - Add devenv.nix for Nix development environment (ADR-006) - Update CLAUDE.md to reference new structure - Update README.md with devenv setup option πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .claude/CLAUDE.md | 146 ++++++++------ .envrc | 4 + .gitignore | 10 + README.md | 49 ++++- devenv.nix | 121 +++++++++++ devenv.yaml | 11 + project-rules/general.mdc | 40 ++++ project-rules/python-agent.mdc | 76 +++++++ project-rules/rust-daemon.mdc | 55 +++++ project-rules/specs.mdc | 58 ++++++ project-rules/testing.mdc | 66 ++++++ specs/ARCHITECTURE.md | 214 ++++++++++++++++++++ specs/CONSTITUTION.md | 77 +++++++ specs/DECISIONS.md | 243 ++++++++++++++++++++++ specs/INTERFACES.md | 358 +++++++++++++++++++++++++++++++++ specs/KEYS.md | 109 ++++++++++ specs/PROMPTS.md | 308 ++++++++++++++++++++++++++++ specs/SCHEMAS.md | 205 +++++++++++++++++++ 18 files changed, 2075 insertions(+), 75 deletions(-) create mode 100644 .envrc create mode 100644 devenv.nix create mode 100644 devenv.yaml create mode 100644 project-rules/general.mdc create mode 100644 project-rules/python-agent.mdc create mode 100644 project-rules/rust-daemon.mdc create mode 100644 project-rules/specs.mdc create mode 100644 project-rules/testing.mdc create mode 100644 specs/ARCHITECTURE.md create mode 100644 specs/CONSTITUTION.md create mode 100644 specs/DECISIONS.md create mode 100644 specs/INTERFACES.md create mode 100644 specs/KEYS.md create mode 100644 specs/PROMPTS.md create mode 100644 specs/SCHEMAS.md diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index a3543b7..0e6ac60 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -1,73 +1,85 @@ -# Squirrel Project - Team Standards - -## Team Communication Guidelines -1. DONT use unnecessary emojis that will affect our communication efficiency -2. READMEs and comments are for AI, not for humans; they should be written in a manner that facilitates AI comprehension -3. Always remain calm, do not seek quick success and instant benefits, and do not celebrate prematurely -4. Do not pander to ideas. If proposed solutions or concepts are incorrect or difficult to implement, point them out -5. Today is 2025 Nov23, if doing search tasks, search the latest -6. Do not display code when discussing solutions; it is a waste of time -7. All context in this project should be English, including commits, they should be brief English - -## Git Workflow - -### Branch Naming Convention -Format: `yourname/type-description` - -Types: -- `feat` - New feature -- `fix` - Bug fix -- `refactor` - Code refactoring -- `docs` - Documentation -- `test` - Test additions/changes -- `chore` - Maintenance tasks - -Examples: -- `lyrica/feat-add-authentication` -- `alice/fix-memory-leak` -- `bob/docs-update-api` - -### Commit Message Format -Format: `type(scope): brief english description` - -Keep commits brief and in English. - -Examples: -- `feat(auth): add JWT validation` -- `fix(api): handle null user` -- `docs(readme): update setup` - -### Pull Request Process -1. Create branch from `main` -2. Make changes and test -3. Push branch -4. Create PR on GitHub -5. Get 1 approval from teammate -6. Merge to main - -## Development Standards +# Squirrel Project -### Code Quality -- Write tests for new features -- Run linter before commit -- Keep files under 200 lines when possible -- Use descriptive names +Local-first memory system for AI coding tools. -### Security -- Never commit secrets (.env, API keys) -- Always validate user input -- Review AI-generated code for security issues +## Spec-Driven Development + +This project uses spec-driven development. **Specs are the source of truth.** + +| Spec File | Purpose | +|-----------|---------| +| specs/CONSTITUTION.md | Project governance, core principles | +| specs/ARCHITECTURE.md | System boundaries, data flow | +| specs/SCHEMAS.md | Database schemas (SCHEMA-*) | +| specs/INTERFACES.md | IPC, MCP, CLI contracts (IPC-*, MCP-*, CLI-*) | +| specs/KEYS.md | Declarative key registry (KEY-*) | +| specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | +| specs/DECISIONS.md | Architecture decision records (ADR-*) | + +**Rules:** +1. Never implement behavior not defined in specs +2. Update specs before or with code, never after +3. Reference spec IDs in commits (e.g., "implements SCHEMA-001") + +## Project Rules + +See `project-rules/*.mdc` for context-specific rules: +- `general.mdc` - Overall development rules +- `rust-daemon.mdc` - Rust daemon boundaries +- `python-agent.mdc` - Python agent boundaries +- `specs.mdc` - Specification maintenance +- `testing.mdc` - Testing requirements + +## Architecture + +``` +Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) +``` + +| Component | Responsibility | +|-----------|----------------| +| Rust Daemon | Log watching, MCP server, CLI, SQLite storage | +| Python Agent | Memory extraction, context composition, conflict detection | + +See specs/ARCHITECTURE.md for details. -## Team Collaboration +## Development Environment -All 3 team members are full-stack and can work on any part of the codebase. +Uses Nix via devenv. Single command setup: + +```bash +devenv shell +``` + +Available commands: +- `test-all` - Run all tests +- `dev-daemon` - Start daemon in dev mode +- `fmt` - Format all code +- `lint` - Lint all code + +## Team Standards ### Communication -- Announce what you're working on in issues/PR -- If touching shared files, communicate with team -- Sync frequently: `git pull origin main` daily - -### Conflict Prevention -- Pull latest before starting work -- Create focused branches for specific tasks -- Communicate when working on same areas +- No unnecessary emojis +- Documentation written for AI comprehension +- English only in code, comments, commits +- Brief, direct language + +### Git Workflow + +Branch: `yourname/type-description` +- `feat`, `fix`, `refactor`, `docs`, `test`, `chore` + +Commit: `type(scope): brief description` +- Reference spec IDs when applicable + +### Code Quality +- Write tests for new features (DR4) +- Keep files under 200 lines +- Only change what's necessary (DR5) +- No drive-by refactoring + +### Security +- Never commit secrets +- Validate user input +- Review AI-generated code diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..248d176 --- /dev/null +++ b/.envrc @@ -0,0 +1,4 @@ +# Load devenv environment +# Install direnv and run `direnv allow` to auto-load + +use devenv diff --git a/.gitignore b/.gitignore index e8bc4a3..32ee2df 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,13 @@ temp/ reference/learning/cipher/ reference/learning/mem0/ reference/learning/letta/ + +# Nix/devenv +.devenv/ +.devenv.flake.nix +devenv.lock +.direnv/ + +# Rust +target/ +Cargo.lock diff --git a/README.md b/README.md index 0d0891c..ce64472 100644 --- a/README.md +++ b/README.md @@ -213,8 +213,9 @@ Python Agent analyzes Episode (segment-first approach): - RESEARCH_LEARNING (learning, exploring) - DISCUSSION (brainstorming, chat) 2. For EXECUTION_TASK only: SUCCESS | FAILURE | UNCERTAIN - 3. Extracts memories based on segment kind - 4. Checks for fact contradictions β†’ invalidates old facts + 3. Detects user frustration (swear words, anger) β†’ boosts importance + 4. Extracts memories based on segment kind + 5. Checks for fact contradictions β†’ invalidates old facts ↓ Near-duplicate check (0.9 threshold) β†’ store or merge ``` @@ -283,7 +284,7 @@ sqrl import memories.json # Import memories | Type | Key Fields | Description | Example | |------|------------|-------------|---------| -| `lesson` | outcome (success/failure/uncertain) | What worked or failed | "API 500 on null user_id", "Repository pattern works well" | +| `lesson` | outcome, importance, user_frustration | What worked or failed | "API 500 on null user_id", "Repository pattern works well" | | `fact` | key, value, evidence_source | Project/user knowledge | key=project.db.engine, value=PostgreSQL | | `profile` | (structured identity) | User background info | name, role, experience_level | @@ -323,14 +324,28 @@ How a fact was learned: - `neutral` - Observed in planning/research/discussion - `manual` - User explicitly stated via CLI +### Frustration Detection + +User frustration signals boost memory importance: + +| Signal | Importance | Example | +|--------|------------|---------| +| Swear words, strong anger | `critical` | "this f***ing bug again" | +| Repeated complaints | `high` | "still broken", "again" | +| Mild frustration | `medium` | "this is annoying" | + +Stored in `metadata.user_frustration`: none | mild | moderate | severe + +Frustration-flagged memories get priority in retrieval to prevent recurring pain points. + ### Examples by Type **lesson (outcome=success):** patterns that worked - "Repository pattern works well for DB access" - "Batch inserts 10x faster than individual" -**lesson (outcome=failure):** issues to avoid -- "API returns 500 on null user_id" +**lesson (outcome=failure, user_frustration=severe):** critical issues to avoid +- "API returns 500 on null user_id" (user was very angry) - "Never use ORM for bulk inserts" **fact (with key):** declarative project/user knowledge @@ -450,7 +465,24 @@ Squirrel/ ## Development Setup -### Prerequisites +### Option 1: Nix/devenv (Recommended) + +Single command setup with all tools: + +```bash +# Install devenv: https://devenv.sh/getting-started/ +git clone https://github.com/kaminoguo/Squirrel.git +cd Squirrel +devenv shell + +# Available commands: +test-all # Run all tests +dev-daemon # Start daemon in dev mode +fmt # Format all code +lint # Lint all code +``` + +### Option 2: Manual Setup ```bash # Rust 1.83+ @@ -467,10 +499,10 @@ git clone https://github.com/kaminoguo/Squirrel.git cd Squirrel # Rust -cd agent && cargo build && cargo test +cd daemon && cargo build && cargo test # Python -cd ../memory_service +cd ../agent uv venv && uv pip install -e ".[dev]" source .venv/bin/activate && pytest ``` @@ -492,6 +524,7 @@ source .venv/bin/activate && pytest - Lazy daemon (start on demand, stop after 2hr idle) - Retroactive log ingestion on init (token-limited) - Near-duplicate deduplication (0.9 threshold) +- Frustration detection (anger/swearing β†’ boost importance, prioritize in retrieval) - Cross-platform (Mac, Linux, Windows) - Export/import memories (JSON) - Auto-update (`sqrl update`) diff --git a/devenv.nix b/devenv.nix new file mode 100644 index 0000000..9c9bf81 --- /dev/null +++ b/devenv.nix @@ -0,0 +1,121 @@ +{ pkgs, lib, config, inputs, ... }: + +{ + # Project metadata + name = "squirrel"; + + # Environment variables + env = { + SQUIRREL_DEV = "1"; + }; + + # Packages available in the development shell + packages = with pkgs; [ + # Build tools + git + gnumake + + # SQLite with extensions + sqlite + + # Documentation + mdbook + + # Utilities + jq + ripgrep + fd + ]; + + # Rust toolchain via fenix + languages.rust = { + enable = true; + channel = "stable"; + components = [ "rustc" "cargo" "clippy" "rustfmt" "rust-analyzer" ]; + }; + + # Python with packages + languages.python = { + enable = true; + version = "3.12"; + + venv = { + enable = true; + requirements = '' + pydantic-ai + httpx + openai + pytest + pytest-asyncio + ruff + ''; + }; + }; + + # Pre-commit hooks + pre-commit.hooks = { + # Rust + rustfmt.enable = true; + clippy.enable = true; + + # Python + ruff.enable = true; + + # General + check-merge-conflict.enable = true; + end-of-file-fixer.enable = true; + trim-trailing-whitespace.enable = true; + }; + + # Shell scripts available in the environment + scripts = { + # Run all tests + test-all.exec = '' + echo "Running Rust tests..." + cargo test + echo "Running Python tests..." + pytest agent/tests/ + ''; + + # Start daemon in development mode + dev-daemon.exec = '' + cargo run --bin sqrl-daemon -- --dev + ''; + + # Format all code + fmt.exec = '' + cargo fmt + ruff format agent/ + ''; + + # Lint all code + lint.exec = '' + cargo clippy -- -D warnings + ruff check agent/ + ''; + }; + + # Processes (long-running services for development) + processes = { + # daemon.exec = "cargo watch -x 'run --bin sqrl-daemon'"; + }; + + # Services (databases, etc.) + # services.sqlite.enable = true; + + # Enter shell message + enterShell = '' + echo "Squirrel development environment" + echo "" + echo "Available commands:" + echo " test-all - Run all tests" + echo " dev-daemon - Start daemon in dev mode" + echo " fmt - Format all code" + echo " lint - Lint all code" + echo "" + echo "See specs/ for project specifications" + ''; + + # Ensure minimum devenv version + devenv.flakesIntegration = true; +} diff --git a/devenv.yaml b/devenv.yaml new file mode 100644 index 0000000..286918c --- /dev/null +++ b/devenv.yaml @@ -0,0 +1,11 @@ +# devenv configuration +# See: https://devenv.sh/reference/yaml-options/ + +inputs: + nixpkgs: + url: github:cachix/devenv-nixpkgs/rolling + fenix: + url: github:nix-community/fenix + inputs: + nixpkgs: + follows: nixpkgs diff --git a/project-rules/general.mdc b/project-rules/general.mdc new file mode 100644 index 0000000..76460dd --- /dev/null +++ b/project-rules/general.mdc @@ -0,0 +1,40 @@ +--- +description: General project rules for all Squirrel development +globs: ["**/*"] +alwaysApply: true +--- + +# Squirrel Development Rules + +## Spec-Driven Development + +This project uses spec-driven development. Specs in `specs/` are the source of truth. + +1. Never implement behavior not defined in specs +2. Update specs before or with code, never after +3. Reference spec IDs in commits and PRs (e.g., "implements SCHEMA-001") + +## Code Changes + +1. Only change what's necessary - no drive-by refactoring +2. Keep files under 200 lines when possible +3. No "while I'm here" improvements +4. Don't add features beyond what was asked + +## Communication Style + +1. English only in code, comments, commits, specs +2. No emojis in documentation +3. Brief, direct language +4. Tables over paragraphs + +## Git Workflow + +Branch naming: `yourname/type-description` +- `feat` - New feature +- `fix` - Bug fix +- `refactor` - Code refactoring +- `docs` - Documentation +- `test` - Test additions + +Commit format: `type(scope): brief description` diff --git a/project-rules/python-agent.mdc b/project-rules/python-agent.mdc new file mode 100644 index 0000000..22752c1 --- /dev/null +++ b/project-rules/python-agent.mdc @@ -0,0 +1,76 @@ +--- +description: Rules for Python agent development +globs: ["agent/**/*.py", "pyproject.toml", "requirements*.txt"] +--- + +# Python Agent Rules + +## Architecture Boundary (ARCH-002) + +The Python agent handles ALL LLM operations and NEVER does file watching. + +Agent responsibilities: +- Memory extraction (PROMPT-001) +- Context composition (PROMPT-002) +- Conflict detection (PROMPT-003) +- CLI interpretation (PROMPT-004) +- User preference extraction (PROMPT-005) + +Agent NEVER: +- Watches files +- Writes directly to database (uses IPC) +- Handles MCP protocol + +## Framework + +Use PydanticAI for all agents. See ADR-003. + +```python +from pydantic_ai import Agent + +agent = Agent( + model="claude-3-5-sonnet-20241022", + result_type=MemoryList, + system_prompt=PROMPT_001_SYSTEM, +) +``` + +## Model Tiers (ADR-004) + +| Tier | Use Case | Default Model | +|------|----------|---------------| +| strong_model | Extraction, conflict resolution | Claude Sonnet | +| fast_model | Composition, CLI commands | Claude Haiku | + +Check specs/PROMPTS.md for which tier each prompt uses. + +## Output Schemas + +All LLM outputs must be Pydantic models matching specs/SCHEMAS.md: + +```python +class Memory(BaseModel): + memory_type: Literal["lesson", "fact", "profile"] + text: str + confidence: float + importance: Literal["critical", "high", "medium", "low"] + # ... see SCHEMA-001 +``` + +## Error Handling + +- Rate limit: Exponential backoff, max 3 retries +- Invalid JSON: Re-prompt with stricter format +- Timeout: Log, return empty, don't block +- Content filter: Log, skip memory, continue + +## Testing + +Use pytest. Mock LLM calls in tests: + +```python +@pytest.fixture +def mock_llm(): + with patch("pydantic_ai.Agent.run") as mock: + yield mock +``` diff --git a/project-rules/rust-daemon.mdc b/project-rules/rust-daemon.mdc new file mode 100644 index 0000000..2f6cc4a --- /dev/null +++ b/project-rules/rust-daemon.mdc @@ -0,0 +1,55 @@ +--- +description: Rules for Rust daemon development +globs: ["daemon/**/*.rs", "Cargo.toml", "Cargo.lock"] +--- + +# Rust Daemon Rules + +## Architecture Boundary (ARCH-001) + +The Rust daemon handles ALL local I/O and NEVER contains LLM logic. + +Daemon responsibilities: +- File watching (notify crate) +- MCP server (rmcp crate) +- CLI commands (clap crate) +- SQLite storage (rusqlite + sqlite-vec) +- IPC routing (tokio, JSON-RPC 2.0) + +Daemon NEVER: +- Makes LLM API calls +- Extracts memories +- Computes semantic similarity (sqlite-vec does this) + +## Key Crates + +| Purpose | Crate | +|---------|-------| +| Async runtime | tokio | +| File watching | notify | +| MCP server | rmcp | +| CLI parsing | clap | +| SQLite | rusqlite | +| Vector search | sqlite-vec | +| Serialization | serde, serde_json | + +## IPC Protocol + +Unix socket at `/tmp/sqrl_agent.sock` (Windows: named pipe). +JSON-RPC 2.0 format. See specs/INTERFACES.md for all methods. + +## Error Handling + +Use thiserror for error types. Map to JSON-RPC error codes: +- -32001: Episode empty +- -32002: Invalid repo +- -32010: Project not initialized +- -32020: Unknown command + +## Platform Support + +Must compile on macOS, Linux, Windows. No OS-specific hacks. +Use conditional compilation sparingly: +```rust +#[cfg(target_os = "windows")] +``` diff --git a/project-rules/specs.mdc b/project-rules/specs.mdc new file mode 100644 index 0000000..8c81bdd --- /dev/null +++ b/project-rules/specs.mdc @@ -0,0 +1,58 @@ +--- +description: Rules for maintaining specification files +globs: ["specs/**/*.md"] +--- + +# Specification Rules + +## Spec Structure + +All specs live in `specs/` folder: + +| File | Purpose | +|------|---------| +| CONSTITUTION.md | Project governance, principles | +| ARCHITECTURE.md | System boundaries, data flow | +| SCHEMAS.md | Database schemas with IDs | +| INTERFACES.md | IPC, MCP, CLI contracts | +| KEYS.md | Declarative key registry | +| PROMPTS.md | LLM prompts with model tiers | +| DECISIONS.md | ADR decision log | + +## ID Requirements (DR1) + +Every element must have a stable ID: + +| Type | Format | Example | +|------|--------|---------| +| Schema | SCHEMA-NNN | SCHEMA-001 | +| Interface | IPC-NNN, MCP-NNN, CLI-NNN | IPC-001 | +| Key | KEY-P-NNN, KEY-U-NNN | KEY-P-001 | +| Prompt | PROMPT-NNN | PROMPT-001 | +| Decision | ADR-NNN | ADR-001 | + +## AI-Friendly Format (P5) + +Write for AI consumption: +- Tables over prose +- Explicit schemas with types +- No ambiguity +- Code examples where helpful + +Bad: +> "The memory system stores various types of information including lessons and facts." + +Good: +| memory_type | Description | +|-------------|-------------| +| lesson | What worked or failed | +| fact | Stable knowledge | + +## Change Process + +1. Propose change in PR +2. Update spec file +3. Reference spec ID in commit +4. Implementation follows spec update + +Never implement behavior not in specs. diff --git a/project-rules/testing.mdc b/project-rules/testing.mdc new file mode 100644 index 0000000..daaf744 --- /dev/null +++ b/project-rules/testing.mdc @@ -0,0 +1,66 @@ +--- +description: Testing requirements for the project +globs: ["**/test_*.py", "**/tests/**", "**/*_test.rs"] +--- + +# Testing Rules + +## Requirement (DR4) + +All code changes require passing tests. No exceptions. + +## Rust Tests + +Location: `daemon/tests/` or inline `#[cfg(test)]` + +```bash +cargo test +``` + +Test categories: +- Unit: Individual functions +- Integration: IPC, storage +- Platform: OS-specific behavior + +## Python Tests + +Location: `agent/tests/` + +```bash +pytest +``` + +Test categories: +- Unit: Agent logic with mocked LLM +- Integration: End-to-end with mock daemon + +## Mocking LLM Calls + +Never make real LLM calls in tests. Use fixtures: + +```python +@pytest.fixture +def mock_extraction_response(): + return [ + { + "memory_type": "lesson", + "outcome": "success", + "text": "Test memory", + "confidence": 0.9, + "importance": "medium" + } + ] +``` + +## Test Naming + +Format: `test___` + +Examples: +- `test_ingest_episode_empty_returns_error` +- `test_get_context_trivial_task_fast_path` +- `test_conflict_detection_keyed_fact_deterministic` + +## Coverage Target + +Aim for 80% coverage on business logic. Don't obsess over 100%. diff --git a/specs/ARCHITECTURE.md b/specs/ARCHITECTURE.md new file mode 100644 index 0000000..e79a2cd --- /dev/null +++ b/specs/ARCHITECTURE.md @@ -0,0 +1,214 @@ +# Squirrel Architecture + +High-level system boundaries and data flow. + +## System Overview + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ User's Machine β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Claude Code β”‚ β”‚ Cursor β”‚ β”‚ Windsurf β”‚ β”‚ +β”‚ β”‚ (CLI) β”‚ β”‚ (IDE) β”‚ β”‚ (IDE) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β–Ό β–Ό β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Log Files β”‚ β”‚ MCP Client β”‚ β”‚ +β”‚ β”‚ (watched) β”‚ β”‚ (requests) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β–Ό β–Ό β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ RUST DAEMON β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ Log Watcher β”‚ β”‚ MCP Server β”‚ β”‚ CLI Handler β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ (notify) β”‚ β”‚ (rmcp) β”‚ β”‚ (clap) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β–Ό β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ IPC Router β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ (JSON-RPC 2.0) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β–Ό β–Ό β–Ό β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ SQLite β”‚ β”‚ Events β”‚ β”‚ Config β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Storage β”‚ β”‚ Queue β”‚ β”‚ Manager β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β”‚ β”‚ Unix Socket β”‚ +β”‚ β–Ό β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ PYTHON AGENT β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ Ingestion β”‚ β”‚ Retrieval β”‚ β”‚ Conflict β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Agent β”‚ β”‚ Agent β”‚ β”‚ Resolver β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ (PydanticAI)β”‚ β”‚ (PydanticAI)β”‚ β”‚ (PydanticAI)β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β–Ό β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ LLM Router β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ (strong/fast) β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ HTTPS + β–Ό + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ LLM Providers β”‚ + β”‚ (Anthropic/OpenAI) β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Component Boundaries + +### ARCH-001: Rust Daemon + +**Responsibility:** All local I/O, no LLM logic + +| Module | Crate | Purpose | +|--------|-------|---------| +| Log Watcher | notify | File system events for CLI logs | +| MCP Server | rmcp | Model Context Protocol server | +| CLI Handler | clap | User-facing commands | +| IPC Router | tokio | JSON-RPC over Unix socket | +| SQLite Storage | rusqlite + sqlite-vec | Memories, events, config | +| Events Queue | crossbeam | Buffering before agent processing | +| Config Manager | serde | User and project settings | + +**Never Contains:** +- LLM API calls +- Memory extraction logic +- Semantic similarity computation (done via sqlite-vec) + +--- + +### ARCH-002: Python Agent + +**Responsibility:** All LLM operations + +| Module | Library | Purpose | +|--------|---------|---------| +| Ingestion Agent | PydanticAI | Episode β†’ memories extraction | +| Retrieval Agent | PydanticAI | Task β†’ relevant memories | +| Conflict Resolver | PydanticAI | Semantic conflict detection | +| LLM Router | httpx | Provider switching, rate limiting | +| Embedding Client | openai | Vector generation | + +**Never Contains:** +- File watching +- Direct database writes (goes through daemon IPC) +- MCP protocol handling + +--- + +### ARCH-003: Storage Layer + +**Responsibility:** Persistence and vector search + +| Database | Location | Contents | +|----------|----------|----------| +| Global DB | `~/.sqrl/squirrel.db` | User profile, global memories, access logs | +| Project DB | `/.sqrl/squirrel.db` | Project memories, raw events | + +**Vector Search:** +- sqlite-vec extension for cosine similarity +- 1536-dim OpenAI embeddings (default) +- Indexed for top-k queries + +--- + +## Data Flow + +### FLOW-001: Passive Ingestion + +``` +1. User codes with AI CLI +2. CLI writes to log file +3. Daemon detects file change (notify) +4. Daemon parses new log entries +5. Events queued in memory +6. Batch trigger (time/count/shutdown) +7. Daemon calls agent via IPC (ingest_episode) +8. Agent extracts memories (LLM) +9. Agent returns memories to daemon +10. Daemon writes to SQLite +11. Daemon updates indexes +``` + +**Latency Target:** <100ms for steps 3-6, async for 7-11 + +--- + +### FLOW-002: Context Retrieval + +``` +1. AI CLI starts new task +2. CLI calls MCP tool (squirrel_get_task_context) +3. Daemon receives MCP request +4. Daemon queries keyed facts (fast path) +5. Daemon calls agent via IPC (get_task_context) +6. Agent embeds task description +7. Agent queries sqlite-vec for similar memories +8. Agent composes context (LLM) +9. Agent returns context to daemon +10. Daemon returns MCP response +11. CLI injects context into prompt +``` + +**Latency Target:** <500ms total, <20ms for trivial tasks (fast path) + +--- + +### FLOW-003: Memory Search + +``` +1. User runs `sqrl search "query"` +2. CLI sends IPC (search_memories) +3. Daemon embeds query +4. Daemon queries sqlite-vec +5. Daemon returns ranked results +6. CLI displays to user +``` + +**Latency Target:** <200ms + +--- + +## Platform Considerations + +| Platform | Log Locations | Socket Path | Notes | +|----------|---------------|-------------|-------| +| macOS | `~/Library/Application Support/Claude/` | `/tmp/sqrl_agent.sock` | Standard | +| Linux | `~/.config/Claude/` | `/tmp/sqrl_agent.sock` | XDG compliant | +| Windows | `%APPDATA%\Claude\` | `\\.\pipe\sqrl_agent` | Named pipe | + +## Security Boundaries + +| Boundary | Enforcement | +|----------|-------------| +| No network for daemon | Rust compile-time (no reqwest) | +| LLM keys in agent only | Environment variables, not config | +| Project isolation | Separate SQLite per project | +| No cloud sync | Feature flag, default off | + +## Extension Points + +| Point | Mechanism | Example | +|-------|-----------|---------| +| New CLI support | Log parser plugins | Add Aider support | +| New LLM provider | Agent config | Switch to local LLM | +| New embedding model | Agent config | Use Cohere embeddings | +| Team sync | Future daemon module | Cloud storage backend | diff --git a/specs/CONSTITUTION.md b/specs/CONSTITUTION.md new file mode 100644 index 0000000..459fa66 --- /dev/null +++ b/specs/CONSTITUTION.md @@ -0,0 +1,77 @@ +# Squirrel Constitution + +Project governance and principles for AI agents working on this codebase. + +## Project Identity + +- **Name**: Squirrel +- **Purpose**: Local-first memory system for AI coding tools +- **License**: AGPL-3.0 + +## Core Principles + +### P1: Local-First +All user data stays on their machine by default. No cloud dependency for core functionality. Privacy is non-negotiable. + +### P2: Passive Learning +100% invisible during coding sessions. No prompts, no confirmations, no interruptions. Watch logs silently, learn passively. + +### P3: Spec-Driven Development +Specs are source of truth. Code is generated output. Never introduce behavior not defined in specs. Update specs before or with code, never after. + +### P4: Cross-Platform +Support Mac, Linux, Windows from v1. No OS-specific hacks in code. Platform differences documented in specs. + +### P5: AI-Friendly Documentation +All specs written for AI consumption: tables over prose, explicit schemas, stable IDs, no ambiguity. + +## Architecture Boundaries + +| Component | Language | Responsibility | Boundary | +|-----------|----------|----------------|----------| +| Rust Daemon | Rust | Log watching, storage, MCP server, CLI shell | Never contains LLM logic | +| Python Agent | Python | All LLM operations, memory extraction, retrieval | Never does file watching | +| IPC | JSON-RPC 2.0 | Communication between daemon and agent | Unix socket only | + +## Technology Constraints + +| Category | Choice | Locked? | +|----------|--------|---------| +| Storage | SQLite + sqlite-vec | Yes (v1) | +| MCP SDK | rmcp (Rust) | Yes (v1) | +| Agent Framework | PydanticAI | Yes (v1) | +| Embeddings | API-based (OpenAI default) | Provider swappable | +| LLM | 2-tier (strong + fast) | Provider swappable | + +## Development Rules + +### DR1: Spec IDs Required +Every schema, interface, prompt, and key must have a stable ID (e.g., `SCHEMA-001`, `IPC-001`). PRs must reference spec IDs. + +### DR2: No Implicit Behavior +If behavior isn't in specs, it doesn't exist. No "obvious" defaults. Document everything. + +### DR3: Environment via devenv +All tools managed by devenv.nix. No global installs. `devenv shell` is the only setup command. + +### DR4: Test Before Merge +All code changes require passing tests. No exceptions. + +### DR5: Minimal Changes +Only change what's necessary. No drive-by refactoring. No "while I'm here" improvements. + +## Decision Authority + +| Decision Type | Authority | +|---------------|-----------| +| Spec changes | Team consensus (PR approval) | +| Architecture changes | Documented in DECISIONS.md first | +| Dependency additions | Must justify in PR | +| Breaking changes | Major version bump required | + +## Communication Style + +- English only in code, comments, commits, specs +- No emojis in documentation +- Brief, direct language +- Tables over paragraphs diff --git a/specs/DECISIONS.md b/specs/DECISIONS.md new file mode 100644 index 0000000..1f89043 --- /dev/null +++ b/specs/DECISIONS.md @@ -0,0 +1,243 @@ +# Squirrel Decisions + +Architecture Decision Records (ADR) for significant choices. + +## ADR Format + +Each decision follows: +- **ID**: ADR-NNN +- **Status**: proposed | accepted | deprecated | superseded +- **Date**: YYYY-MM-DD +- **Context**: Why this decision was needed +- **Decision**: What was decided +- **Consequences**: Trade-offs accepted + +--- + +## ADR-001: Rust + Python Split Architecture + +**Status:** accepted +**Date:** 2024-11-20 + +**Context:** +Need a system that watches files, serves MCP, and runs LLM operations. Single language options: +- Pure Rust: LLM libraries immature, PydanticAI not available +- Pure Python: File watching unreliable, MCP SDK less mature + +**Decision:** +Split into Rust daemon (I/O, storage, MCP) and Python agent (LLM operations). Communicate via JSON-RPC over Unix socket. + +**Consequences:** +- (+) Best libraries for each domain +- (+) Clear separation of concerns +- (+) Daemon can run without Python for basic ops +- (-) IPC overhead (~1ms per call) +- (-) Two deployment artifacts +- (-) More complex build process + +--- + +## ADR-002: SQLite with sqlite-vec + +**Status:** accepted +**Date:** 2024-11-20 + +**Context:** +Need vector storage for embeddings. Options: +- PostgreSQL + pgvector: Requires server, overkill for local +- Pinecone/Weaviate: Cloud dependency, violates local-first +- SQLite + sqlite-vec: Local, single file, good enough performance +- ChromaDB: Python-only, can't use from Rust daemon + +**Decision:** +Use SQLite with sqlite-vec extension for all storage including vectors. + +**Consequences:** +- (+) Single file database, easy backup +- (+) No server process needed +- (+) Works from both Rust and Python +- (+) sqlite-vec handles cosine similarity efficiently +- (-) Limited to ~100k vectors before slowdown +- (-) No built-in sharding + +--- + +## ADR-003: PydanticAI for Agent Framework + +**Status:** accepted +**Date:** 2024-11-23 + +**Context:** +Need structured LLM outputs for memory extraction. Options: +- Raw API calls: No validation, manual JSON parsing +- LangChain: Heavy, over-engineered for our needs +- PydanticAI: Lightweight, Pydantic validation, good typing + +**Decision:** +Use PydanticAI for all agent implementations. + +**Consequences:** +- (+) Structured outputs with validation +- (+) Type safety +- (+) Clean agent patterns +- (+) Active development +- (-) Newer library, less ecosystem +- (-) Anthropic-focused (but supports OpenAI) + +--- + +## ADR-004: 2-Tier LLM Strategy + +**Status:** accepted +**Date:** 2024-11-23 + +**Context:** +Different operations have different accuracy/speed requirements: +- Memory extraction: Needs quality, can be slow +- Context composition: Needs speed, simpler task + +**Decision:** +Use two model tiers: +- `strong_model`: Complex extraction (default: Claude Sonnet) +- `fast_model`: Quick operations (default: Claude Haiku) + +**Consequences:** +- (+) Cost optimization +- (+) Latency optimization for simple tasks +- (+) Provider flexibility +- (-) Configuration complexity +- (-) Need to maintain two sets of prompts + +--- + +## ADR-005: Declarative Keys for Facts + +**Status:** accepted +**Date:** 2024-11-23 + +**Context:** +Facts like "project uses PostgreSQL" can change. Need conflict detection: +- Pure semantic: LLM compares all facts, expensive +- Pure rule-based: Miss semantic conflicts +- Hybrid: Declarative keys for common facts, LLM for rest + +**Decision:** +Use declarative keys (e.g., `project.db.engine`) for deterministic conflict resolution. Fall back to LLM for unkeyed facts. + +**Consequences:** +- (+) Fast conflict detection for common facts +- (+) No LLM call for key-value changes +- (+) Predictable behavior +- (-) Need to maintain key registry +- (-) Some facts don't fit key-value model + +--- + +## ADR-006: Nix/devenv for Development Environment + +**Status:** accepted +**Date:** 2024-11-23 + +**Context:** +Team uses Windows/WSL2/Mac. Need reproducible dev environment: +- Docker: Heavy, doesn't integrate well with IDEs +- Manual setup: Drift, "works on my machine" +- Nix/devenv: Declarative, reproducible, integrates with shells + +**Decision:** +Use devenv.nix for development environment. All tools (Rust, Python, SQLite) managed by Nix. + +**Consequences:** +- (+) Reproducible across all platforms (via WSL2 on Windows) +- (+) Single `devenv shell` command for setup +- (+) Declarative, matches spec-driven approach +- (+) No global installs needed +- (-) Nix learning curve +- (-) Windows requires WSL2 + +--- + +## ADR-007: Spec-Driven Development + +**Status:** accepted +**Date:** 2024-11-23 + +**Context:** +Project is 98% AI-coded. Need to ensure consistency and quality: +- Ad-hoc development: AI generates inconsistent code +- Heavy process: Slows down iteration +- Spec-driven: Specs are source of truth, AI follows specs + +**Decision:** +Adopt spec-driven development. All behavior defined in specs/ before implementation. Code is "compiled output" of specs. + +**Consequences:** +- (+) AI has clear instructions +- (+) Consistent implementation +- (+) Documentation always current +- (+) Easy to review (review specs, not code) +- (-) More upfront work on specs +- (-) Need discipline to update specs first + +--- + +## ADR-008: Frustration Detection for Memory Importance + +**Status:** accepted +**Date:** 2024-11-23 + +**Context:** +Some memories are more important than others. User frustration signals high-value learning: +- Swearing after bug fix: Important lesson +- "Finally!" after struggle: Key breakthrough +- Neutral tone: Standard importance + +**Decision:** +Detect frustration signals in episodes and boost memory importance accordingly. + +| Frustration Level | Signals | Importance Boost | +|-------------------|---------|------------------| +| severe | swearing, rage | critical | +| moderate | "finally", "ugh", 3+ retries | high | +| mild | sigh, minor complaint | medium | +| none | neutral | based on content | + +**Consequences:** +- (+) Better prioritization of valuable memories +- (+) Learns from user pain points +- (+) Passive, no user action needed +- (-) May misinterpret sarcasm/humor +- (-) Cultural differences in expression + +--- + +## ADR-009: Unix Socket for IPC + +**Status:** accepted +**Date:** 2024-11-20 + +**Context:** +Daemon and agent need to communicate. Options: +- HTTP: Works but overhead for local +- gRPC: Complex setup for simple RPC +- Unix socket: Fast, secure, local-only + +**Decision:** +Use Unix socket at `/tmp/sqrl_agent.sock` with JSON-RPC 2.0 protocol. Windows uses named pipes. + +**Consequences:** +- (+) No network exposure +- (+) Low latency (<1ms) +- (+) Simple protocol +- (-) Platform-specific paths +- (-) Need to handle socket cleanup + +--- + +## Pending Decisions + +| Topic | Options | Blocking | +|-------|---------|----------| +| Team sync backend | Supabase / Custom / None | v2 | +| Local LLM support | Ollama / llama.cpp / None | v2 | +| Web UI | None / Tauri / Electron | v2 | diff --git a/specs/INTERFACES.md b/specs/INTERFACES.md new file mode 100644 index 0000000..159934f --- /dev/null +++ b/specs/INTERFACES.md @@ -0,0 +1,358 @@ +# Squirrel Interfaces + +All IPC, MCP, and CLI contracts with stable IDs. + +## IPC Protocol + +Unix socket at `/tmp/sqrl_agent.sock`. JSON-RPC 2.0 format. + +### Request Format + +```json +{ + "jsonrpc": "2.0", + "method": "", + "params": { ... }, + "id": +} +``` + +### Response Format + +```json +{ + "jsonrpc": "2.0", + "result": { ... }, + "id": +} +``` + +### Error Format + +```json +{ + "jsonrpc": "2.0", + "error": { + "code": , + "message": "", + "data": { ... } + }, + "id": +} +``` + +--- + +## IPC Methods + +### IPC-001: ingest_episode + +Daemon β†’ Agent. Process episode, extract memories. + +**Input:** +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| episode.id | string | Yes | Episode UUID | +| episode.repo | string | Yes | Absolute project path | +| episode.start_ts | string | Yes | ISO 8601 | +| episode.end_ts | string | Yes | ISO 8601 | +| episode.events | array | Yes | Array of SCHEMA-002 events | + +**Output:** +| Field | Type | Description | +|-------|------|-------------| +| memories_created | integer | Number of new memories | +| memories_updated | integer | Number of merged/updated | +| memories_invalidated | integer | Number of contradicted facts | +| segments | array | Segment summaries | + +**Errors:** +| Code | Message | When | +|------|---------|------| +| -32001 | Episode empty | No events in episode | +| -32002 | Invalid repo | Repo path doesn't exist | +| -32003 | LLM error | LLM call failed | + +**Model Tier:** strong_model + +--- + +### IPC-002: get_task_context + +MCP β†’ Agent (via daemon). Retrieve relevant memories for task. + +**Input:** +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| project_root | string | Yes | - | Absolute project path | +| task | string | Yes | - | Task description | +| context_budget_tokens | integer | No | 400 | Max tokens in response | + +**Output:** +| Field | Type | Description | +|-------|------|-------------| +| context_prompt | string | Ready-to-inject prompt text | +| memory_ids | array | IDs of selected memories | +| tokens_used | integer | Actual token count | + +**Errors:** +| Code | Message | When | +|------|---------|------| +| -32010 | Project not initialized | No .sqrl/squirrel.db | +| -32011 | Empty task | Task string empty | + +**Model Tier:** fast_model + +**Fast Path:** Returns empty in <20ms for trivial tasks (typo fixes, comments). + +--- + +### IPC-003: search_memories + +MCP β†’ Agent (via daemon). Semantic search across memories. + +**Input:** +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| project_root | string | Yes | - | Absolute project path | +| query | string | Yes | - | Search query | +| top_k | integer | No | 10 | Max results | +| filters.memory_type | string | No | - | Filter by type | +| filters.outcome | string | No | - | Filter by outcome | +| filters.key | string | No | - | Filter by declarative key | + +**Output:** +| Field | Type | Description | +|-------|------|-------------| +| results | array | Matched memories with scores | +| results[].id | string | Memory ID | +| results[].text | string | Memory content | +| results[].score | float | Similarity score | +| results[].memory_type | string | Type | + +**Errors:** +| Code | Message | When | +|------|---------|------| +| -32010 | Project not initialized | No .sqrl/squirrel.db | +| -32012 | Empty query | Query string empty | + +--- + +### IPC-004: execute_command + +CLI β†’ Agent. Natural language or direct command. + +**Input:** +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| command | string | Yes | User input (natural language or direct) | +| cwd | string | Yes | Current working directory | + +**Output:** +| Field | Type | Description | +|-------|------|-------------| +| response | string | Agent response text | +| action_taken | string | What was done | +| memories_affected | integer | Memories created/updated/deleted | + +**Errors:** +| Code | Message | When | +|------|---------|------| +| -32020 | Unknown command | Can't interpret input | +| -32021 | Permission denied | Requires confirmation | + +**Model Tier:** fast_model + +--- + +### IPC-005: forget_memory + +CLI β†’ Agent. Soft delete memory. + +**Input:** +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| id | string | No | Memory ID (if known) | +| query | string | No | Search query (if ID unknown) | +| confirm | boolean | No | Skip confirmation (default false) | + +**Output:** +| Field | Type | Description | +|-------|------|-------------| +| forgotten | integer | Number of memories soft-deleted | +| ids | array | IDs of affected memories | + +**Errors:** +| Code | Message | When | +|------|---------|------| +| -32030 | Memory not found | ID doesn't exist | +| -32031 | Confirmation required | Multiple matches, needs confirm | + +--- + +## MCP Tools + +### MCP-001: squirrel_get_task_context + +Wraps IPC-002 for MCP clients. + +**Tool Definition:** +```json +{ + "name": "squirrel_get_task_context", + "description": "Get relevant memories for a coding task. Call BEFORE fixing bugs, refactoring, or adding features.", + "inputSchema": { + "type": "object", + "properties": { + "project_root": { + "type": "string", + "description": "Absolute path to project root" + }, + "task": { + "type": "string", + "description": "Description of the task" + }, + "context_budget_tokens": { + "type": "integer", + "default": 400, + "description": "Max tokens for context" + } + }, + "required": ["project_root", "task"] + } +} +``` + +--- + +### MCP-002: squirrel_search_memory + +Wraps IPC-003 for MCP clients. + +**Tool Definition:** +```json +{ + "name": "squirrel_search_memory", + "description": "Search memories by semantic query", + "inputSchema": { + "type": "object", + "properties": { + "project_root": { + "type": "string", + "description": "Absolute path to project root" + }, + "query": { + "type": "string", + "description": "Search query" + }, + "top_k": { + "type": "integer", + "default": 10, + "description": "Max results" + } + }, + "required": ["project_root", "query"] + } +} +``` + +--- + +## CLI Commands + +### CLI-001: sqrl init + +Initialize project for Squirrel. + +**Usage:** `sqrl init [--skip-history]` + +**Flags:** +| Flag | Description | +|------|-------------| +| `--skip-history` | Don't ingest historical logs | + +**Actions:** +1. Create `.sqrl/squirrel.db` +2. Scan CLI log folders for project mentions +3. Ingest recent history (unless --skip-history) +4. Configure MCP for enabled CLIs +5. Inject instructions into agent files +6. Register in `~/.sqrl/projects.json` + +--- + +### CLI-002: sqrl config + +Configure CLI selection and settings. + +**Usage:** `sqrl config [key] [value]` + +**Interactive mode:** `sqrl config` (no args) +**Set value:** `sqrl config llm.model claude-sonnet` +**Get value:** `sqrl config llm.model` + +--- + +### CLI-003: sqrl sync + +Update all projects with new CLI configs. + +**Usage:** `sqrl sync` + +**Actions:** +1. Read enabled CLIs from config +2. For each registered project: + - Add missing MCP configs + - Add missing agent instructions + +--- + +### CLI-004: sqrl search + +Search memories. + +**Usage:** `sqrl search ""` + +--- + +### CLI-005: sqrl forget + +Soft delete memory. + +**Usage:** `sqrl forget ` or `sqrl forget ""` + +--- + +### CLI-006: sqrl export + +Export memories as JSON. + +**Usage:** `sqrl export [type] [--project]` + +**Examples:** +- `sqrl export lesson` - Export all lessons +- `sqrl export fact --project` - Export project facts + +--- + +### CLI-007: sqrl import + +Import memories from JSON. + +**Usage:** `sqrl import ` + +--- + +### CLI-008: sqrl status + +Show daemon and memory stats. + +**Usage:** `sqrl status` + +--- + +### CLI-009: sqrl update + +Self-update via axoupdater. + +**Usage:** `sqrl update` diff --git a/specs/KEYS.md b/specs/KEYS.md new file mode 100644 index 0000000..3426bd4 --- /dev/null +++ b/specs/KEYS.md @@ -0,0 +1,109 @@ +# Squirrel Declarative Keys + +Registry of declarative keys for facts. Same key + different value triggers deterministic invalidation (no LLM needed). + +## Key Format + +``` +.. +``` + +- **scope**: `project` or `user` +- **category**: domain grouping (db, api, language, etc.) +- **property**: specific attribute + +## Project-Scoped Keys (KEY-P-*) + +Stored with `project_id` set. Authoritative for project environment. + +| ID | Key | Description | Example Values | +|----|-----|-------------|----------------| +| KEY-P-001 | `project.db.engine` | Database engine | PostgreSQL, MySQL, SQLite, MongoDB | +| KEY-P-002 | `project.db.version` | Database version | 15, 8.0, 3.x | +| KEY-P-003 | `project.db.orm` | ORM/query builder | Prisma, SQLAlchemy, TypeORM, Diesel | +| KEY-P-004 | `project.api.framework` | API framework | FastAPI, Express, Rails, Actix | +| KEY-P-005 | `project.ui.framework` | UI framework | React, Vue, Svelte, None | +| KEY-P-006 | `project.language.main` | Primary language | Python, TypeScript, Rust, Go | +| KEY-P-007 | `project.language.version` | Language version | 3.12, 5.0, 1.75 | +| KEY-P-008 | `project.test.framework` | Test framework | pytest, jest, cargo test | +| KEY-P-009 | `project.test.command` | Test run command | `pytest`, `npm test`, `cargo test` | +| KEY-P-010 | `project.build.command` | Build command | `npm run build`, `cargo build` | +| KEY-P-011 | `project.auth.method` | Auth mechanism | JWT, session, OAuth, API key | +| KEY-P-012 | `project.package_manager` | Package manager | npm, pnpm, yarn, pip, uv, cargo | +| KEY-P-013 | `project.deploy.platform` | Deploy target | Vercel, AWS, Railway, self-hosted | +| KEY-P-014 | `project.ci.platform` | CI system | GitHub Actions, GitLab CI, CircleCI | + +## User-Scoped Keys (KEY-U-*) + +Stored with `project_id = NULL` in global db. Stable user preferences. + +| ID | Key | Description | Example Values | +|----|-----|-------------|----------------| +| KEY-U-001 | `user.preferred_style` | Coding style | async_await, callbacks, sync | +| KEY-U-002 | `user.preferred_language` | Favorite language | Python, TypeScript, Rust | +| KEY-U-003 | `user.strict_null_checks` | Null handling | true, false | +| KEY-U-004 | `user.comment_style` | Comment preference | minimal, detailed, jsdoc | +| KEY-U-005 | `user.error_handling` | Error pattern | exceptions, result_types, errors | +| KEY-U-006 | `user.test_style` | Testing approach | tdd, after_impl, minimal | +| KEY-U-007 | `user.naming_convention` | Naming style | snake_case, camelCase | + +## Promotion Rules + +User preferences (`user.*` keys) require evidence before becoming keyed facts: + +| Rule | Threshold | +|------|-----------| +| Minimum success signals | 3 | +| Recency window | 7 days | +| Confidence threshold | 0.8 | + +Until promotion threshold is met, observations are stored as unkeyed facts with `evidence_source` tracking. + +## Conflict Resolution + +### Keyed Facts (Deterministic) + +``` +IF new_fact.key == existing_fact.key + AND new_fact.value != existing_fact.value: + + existing_fact.status = 'invalidated' + existing_fact.valid_to = now() + existing_fact.superseded_by = new_fact.id +``` + +No LLM judgment needed. Pure key-value match. + +### Unkeyed Facts (LLM-Assisted) + +For facts without declarative keys, LLM judges semantic conflict: + +1. Vector search for similar existing facts (top 5) +2. If similarity > 0.85, LLM evaluates conflict +3. High confidence conflict β†’ invalidate old +4. Low confidence β†’ keep both, recency weighted in retrieval + +## Fast Path Retrieval + +Keyed facts support direct lookup without embeddings: + +```sql +SELECT * FROM memories +WHERE key = 'project.db.engine' + AND project_id = ? + AND status = 'active' +``` + +Use this for environment queries before falling back to vector search. + +## Access Logging + +All keyed fact lookups logged with `access_type = 'key_lookup'`: + +```json +{ + "key": "project.db.engine", + "value": "PostgreSQL", + "hit": true +} +``` diff --git a/specs/PROMPTS.md b/specs/PROMPTS.md new file mode 100644 index 0000000..27a73f2 --- /dev/null +++ b/specs/PROMPTS.md @@ -0,0 +1,308 @@ +# Squirrel Prompts + +All LLM prompts with stable IDs and model tier assignments. + +## Model Tiers + +| Tier | Purpose | Default Provider | Fallback | +|------|---------|------------------|----------| +| strong_model | Complex extraction, conflict resolution | Claude Sonnet | GPT-4o | +| fast_model | Context composition, CLI commands | Claude Haiku | GPT-4o-mini | + +## PROMPT-001: Episode Ingestion + +**Model Tier:** strong_model + +**ID:** PROMPT-001-INGEST + +**Input Variables:** +| Variable | Type | Description | +|----------|------|-------------| +| episode_events | string | Formatted event log | +| existing_memories | string | Relevant existing memories | +| project_context | string | Project facts (db, framework, etc.) | + +**System Prompt:** +``` +You are a memory extraction agent for Squirrel, a coding memory system. + +Your task: Extract lasting memories from a coding session episode. + +MEMORY TYPES: +- lesson: What worked or failed (has outcome: success|failure|uncertain) +- fact: Stable knowledge about project or user (may have declarative key) + +EXTRACTION RULES: +1. Only extract information that will be useful in future sessions +2. Skip transient details (typos fixed, temporary debug code) +3. Prefer specific over generic ("use pytest-asyncio for async tests" > "testing is important") +4. Detect user frustration signals (swear words, "finally", "ugh", repeated failures) +5. For facts, check if a declarative key applies (see KEYS.md) + +FRUSTRATION SIGNALS β†’ IMPORTANCE BOOST: +- severe (swearing, rage): importance = critical +- moderate ("finally", "ugh", 3+ retries): importance = high +- mild (sigh, minor complaint): importance = medium +- none: importance based on content + +OUTPUT FORMAT (JSON array): +[ + { + "memory_type": "lesson", + "outcome": "success|failure|uncertain", + "text": "human-readable memory content", + "confidence": 0.0-1.0, + "importance": "critical|high|medium|low", + "metadata": { + "anchors": {"files": [], "components": [], "endpoints": []}, + "user_frustration": "none|mild|moderate|severe" + } + }, + { + "memory_type": "fact", + "key": "project.db.engine or null", + "value": "PostgreSQL or null", + "text": "human-readable fact", + "evidence_source": "success|failure|neutral", + "confidence": 0.0-1.0, + "importance": "critical|high|medium|low" + } +] +``` + +**User Prompt Template:** +``` +PROJECT CONTEXT: +{project_context} + +EXISTING MEMORIES (avoid duplicates): +{existing_memories} + +EPISODE EVENTS: +{episode_events} + +Extract memories from this episode. Return JSON array only. +``` + +--- + +## PROMPT-002: Context Composition + +**Model Tier:** fast_model + +**ID:** PROMPT-002-COMPOSE + +**Input Variables:** +| Variable | Type | Description | +|----------|------|-------------| +| task | string | User's task description | +| candidate_memories | string | Top-k retrieved memories | +| token_budget | integer | Max tokens for output | + +**System Prompt:** +``` +You are a context composer for Squirrel memory system. + +Your task: Select and format the most relevant memories for a coding task. + +SELECTION CRITERIA: +1. Direct relevance to the task +2. Recency (prefer recent over old) +3. Importance level (critical > high > medium > low) +4. Outcome for lessons (success patterns > failure warnings) + +OUTPUT FORMAT: +Return a concise prompt injection, max {token_budget} tokens. +Format as bullet points grouped by relevance. + +FAST PATH: +If task is trivial (typo fix, comment change, formatting), return empty string immediately. +``` + +**User Prompt Template:** +``` +TASK: {task} + +CANDIDATE MEMORIES: +{candidate_memories} + +TOKEN BUDGET: {token_budget} + +Select and format relevant memories. Return prompt text only. +``` + +--- + +## PROMPT-003: Conflict Detection + +**Model Tier:** strong_model + +**ID:** PROMPT-003-CONFLICT + +**Input Variables:** +| Variable | Type | Description | +|----------|------|-------------| +| new_fact | string | Newly extracted fact | +| similar_facts | string | Existing facts with similarity > 0.85 | + +**System Prompt:** +``` +You are a conflict detector for Squirrel memory system. + +Your task: Determine if a new fact contradicts existing facts. + +CONFLICT TYPES: +1. DIRECT: Same subject, different value ("uses PostgreSQL" vs "uses MySQL") +2. PARTIAL: Overlapping scope, incompatible details +3. NONE: Different subjects or compatible information + +OUTPUT FORMAT (JSON): +{ + "conflict_type": "direct|partial|none", + "conflicting_fact_id": "id or null", + "confidence": 0.0-1.0, + "reasoning": "brief explanation" +} + +RULES: +- Keyed facts with same key are handled deterministically (not your job) +- Only evaluate unkeyed facts here +- When uncertain, prefer "none" (keep both facts) +``` + +**User Prompt Template:** +``` +NEW FACT: +{new_fact} + +EXISTING SIMILAR FACTS: +{similar_facts} + +Evaluate conflict. Return JSON only. +``` + +--- + +## PROMPT-004: CLI Command Interpretation + +**Model Tier:** fast_model + +**ID:** PROMPT-004-CLI + +**Input Variables:** +| Variable | Type | Description | +|----------|------|-------------| +| user_input | string | Raw user command | +| available_commands | string | List of valid commands | + +**System Prompt:** +``` +You are a command interpreter for Squirrel CLI. + +Your task: Map natural language to structured commands. + +AVAILABLE COMMANDS: +- search : Search memories +- forget : Delete memory +- export [type]: Export memories +- status: Show stats + +OUTPUT FORMAT (JSON): +{ + "command": "search|forget|export|status|unknown", + "args": {}, + "confirmation_needed": true|false +} + +RULES: +- "forget" always needs confirmation unless explicit ID given +- Ambiguous input β†’ command: "unknown" +``` + +**User Prompt Template:** +``` +USER INPUT: {user_input} + +AVAILABLE COMMANDS: +{available_commands} + +Interpret command. Return JSON only. +``` + +--- + +## PROMPT-005: User Preference Extraction + +**Model Tier:** strong_model + +**ID:** PROMPT-005-PREFERENCE + +**Input Variables:** +| Variable | Type | Description | +|----------|------|-------------| +| episode_events | string | Episode with user behavior signals | +| existing_preferences | string | Current user.* facts | + +**System Prompt:** +``` +You are a preference extractor for Squirrel memory system. + +Your task: Identify stable user preferences from coding behavior. + +OBSERVABLE PREFERENCES: +- Coding style (async/sync, error handling pattern) +- Naming conventions (snake_case, camelCase) +- Comment style (minimal, detailed, jsdoc) +- Testing approach (TDD, after implementation) + +EXTRACTION RULES: +1. Only extract from repeated behavior (not one-off) +2. Success signals weight more than neutral +3. User corrections are strongest signal +4. Map to user.* keys when applicable + +OUTPUT FORMAT (JSON array): +[ + { + "key": "user.preferred_style", + "value": "async_await", + "evidence_type": "success|correction|repeated", + "confidence": 0.0-1.0 + } +] + +Return empty array if no clear preferences detected. +``` + +**User Prompt Template:** +``` +EXISTING PREFERENCES: +{existing_preferences} + +EPISODE EVENTS: +{episode_events} + +Extract user preferences. Return JSON array only. +``` + +--- + +## Token Budgets + +| Prompt ID | Max Input | Max Output | +|-----------|-----------|------------| +| PROMPT-001 | 8000 | 2000 | +| PROMPT-002 | 4000 | 500 | +| PROMPT-003 | 2000 | 200 | +| PROMPT-004 | 500 | 100 | +| PROMPT-005 | 6000 | 500 | + +## Error Handling + +All prompts must handle: +| Error | Action | +|-------|--------| +| Rate limit | Exponential backoff, max 3 retries | +| Invalid JSON | Re-prompt with stricter format instruction | +| Timeout | Log, return empty result, don't block | +| Content filter | Log, skip memory, continue | diff --git a/specs/SCHEMAS.md b/specs/SCHEMAS.md new file mode 100644 index 0000000..2e79e09 --- /dev/null +++ b/specs/SCHEMAS.md @@ -0,0 +1,205 @@ +# Squirrel Schemas + +All data schemas with stable IDs. AI agents must reference these IDs when implementing storage logic. + +## SCHEMA-001: memories + +Primary storage for all memory types. + +```sql +CREATE TABLE memories ( + id TEXT PRIMARY KEY, -- UUID + project_id TEXT, -- NULL for global scope + memory_type TEXT NOT NULL, -- lesson | fact | profile + + -- Lesson fields (SCHEMA-001-L) + outcome TEXT, -- success | failure | uncertain + + -- Fact fields (SCHEMA-001-F) + fact_type TEXT, -- knowledge | process (optional) + key TEXT, -- declarative key (see KEYS.md) + value TEXT, -- declarative value + evidence_source TEXT, -- success | failure | neutral | manual + support_count INTEGER DEFAULT 1, -- episodes supporting this fact + last_seen_at TEXT, -- last episode timestamp + + -- Content (SCHEMA-001-C) + text TEXT NOT NULL, -- human-readable content + embedding BLOB, -- 1536-dim float32 vector + metadata TEXT, -- JSON: anchors, user_frustration, etc. + + -- Scoring (SCHEMA-001-S) + confidence REAL NOT NULL, -- 0.0 - 1.0 + importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low + + -- Lifecycle (SCHEMA-001-LC) + status TEXT NOT NULL DEFAULT 'active', -- active | inactive | invalidated + valid_from TEXT NOT NULL, -- ISO 8601 timestamp + valid_to TEXT, -- NULL = still valid + superseded_by TEXT, -- memory_id that replaced this + + -- Audit (SCHEMA-001-A) + user_id TEXT NOT NULL DEFAULT 'local', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE INDEX idx_memories_project ON memories(project_id); +CREATE INDEX idx_memories_type ON memories(memory_type); +CREATE INDEX idx_memories_key ON memories(key); +CREATE INDEX idx_memories_status ON memories(status); +``` + +### Field Constraints + +| Field | Type | Nullable | Allowed Values | +|-------|------|----------|----------------| +| memory_type | TEXT | No | `lesson`, `fact`, `profile` | +| outcome | TEXT | Yes | `success`, `failure`, `uncertain` (lesson only) | +| evidence_source | TEXT | Yes | `success`, `failure`, `neutral`, `manual` (fact only) | +| importance | TEXT | No | `critical`, `high`, `medium`, `low` | +| status | TEXT | No | `active`, `inactive`, `invalidated` | + +### Metadata JSON Schema (SCHEMA-001-M) + +```json +{ + "anchors": { + "files": ["string"], + "components": ["string"], + "endpoints": ["string"] + }, + "user_frustration": "none | mild | moderate | severe", + "source_segments": ["string"], + "source_episode_id": "string" +} +``` + +--- + +## SCHEMA-002: events + +Raw events from CLI logs (internal, not exposed to users). + +```sql +CREATE TABLE events ( + id TEXT PRIMARY KEY, + repo TEXT NOT NULL, -- absolute path + kind TEXT NOT NULL, -- user | assistant | tool | system + content TEXT NOT NULL, + file_paths TEXT, -- JSON array + ts TEXT NOT NULL, -- ISO 8601 + processed INTEGER DEFAULT 0 +); + +CREATE INDEX idx_events_repo ON events(repo); +CREATE INDEX idx_events_processed ON events(processed); +``` + +### Field Constraints + +| Field | Type | Nullable | Allowed Values | +|-------|------|----------|----------------| +| kind | TEXT | No | `user`, `assistant`, `tool`, `system` | +| processed | INTEGER | No | `0` (pending), `1` (processed) | + +--- + +## SCHEMA-003: user_profile + +Structured user identity (separate from memory-based profile). + +```sql +CREATE TABLE user_profile ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL, + source TEXT NOT NULL, -- explicit | inferred + confidence REAL, + updated_at TEXT NOT NULL +); +``` + +### Standard Keys + +| Key | Description | Example | +|-----|-------------|---------| +| `name` | User's name | "Alice" | +| `role` | Job role | "Backend Developer" | +| `experience_level` | Skill level | "Senior" | +| `company` | Organization | "Acme Inc" | +| `primary_use_case` | Main work type | "API development" | + +--- + +## SCHEMA-004: memory_history + +Audit trail for memory changes. + +```sql +CREATE TABLE memory_history ( + id TEXT PRIMARY KEY, + memory_id TEXT NOT NULL, + old_content TEXT, -- NULL for ADD + new_content TEXT NOT NULL, + event TEXT NOT NULL, -- ADD | UPDATE | DELETE + created_at TEXT NOT NULL, + FOREIGN KEY (memory_id) REFERENCES memories(id) +); + +CREATE INDEX idx_history_memory ON memory_history(memory_id); +``` + +--- + +## SCHEMA-005: memory_access_log + +Debugging and analytics for retrieval. + +```sql +CREATE TABLE memory_access_log ( + id TEXT PRIMARY KEY, + memory_id TEXT NOT NULL, + access_type TEXT NOT NULL, -- search | get_context | list | key_lookup + query TEXT, + score REAL, + metadata TEXT, -- JSON + accessed_at TEXT NOT NULL, + FOREIGN KEY (memory_id) REFERENCES memories(id) +); + +CREATE INDEX idx_access_memory ON memory_access_log(memory_id); +CREATE INDEX idx_access_type ON memory_access_log(access_type); +``` + +--- + +## SCHEMA-006: Episode (in-memory only) + +Not persisted. Used for batching events before ingestion. + +```python +@dataclass +class Episode: + id: str # UUID + repo: str # absolute path + start_ts: str # ISO 8601 + end_ts: str # ISO 8601 + events: list[Event] # SCHEMA-002 records +``` + +### Batching Rules + +| Trigger | Condition | +|---------|-----------| +| Time window | 4 hours elapsed since first event | +| Event count | 50 events accumulated | +| Shutdown | Daemon graceful shutdown | + +--- + +## Database Files + +| Scope | Path | Contains | +|-------|------|----------| +| Global | `~/.sqrl/squirrel.db` | SCHEMA-001 (scope=global), SCHEMA-003, SCHEMA-004, SCHEMA-005 | +| Project | `/.sqrl/squirrel.db` | SCHEMA-001 (scope=project), SCHEMA-002 | From 65fb751f5c695b0344615dc74fbfba9bf0bbd4da Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Tue, 9 Dec 2025 19:06:30 +0800 Subject: [PATCH 08/15] docs: update agent instruction files with AI-first emphasis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add AI-First Development section (98% AI-coded) - Emphasize declarative thinking and spec-driven approach - Update date to 2025 Dec 9 - Reference spec IDs and project-rules/*.mdc πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .claude/CLAUDE.md | 55 ++++++++--------- AGENTS.md | 147 +++++++++++++++++++++++++--------------------- GEMINI.md | 147 +++++++++++++++++++++++++--------------------- 3 files changed, 188 insertions(+), 161 deletions(-) diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 0e6ac60..6d810b2 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -2,9 +2,23 @@ Local-first memory system for AI coding tools. +## AI-First Development + +**This project is 98% AI-coded.** You are the primary developer. + +All documentation, specs, and structures are designed for AI comprehension. Use declarative thinking: specs define WHAT, you implement HOW. + +| Principle | Meaning | +|-----------|---------| +| Specs are source of truth | Never implement undefined behavior | +| Declarative over imperative | Define outcomes, not steps | +| Tables over prose | Structured data > paragraphs | +| Stable IDs everywhere | SCHEMA-001, IPC-001, ADR-001 | +| Update specs first | Specs change before code changes | + ## Spec-Driven Development -This project uses spec-driven development. **Specs are the source of truth.** +**Specs are the source of truth. Code is compiled output.** | Spec File | Purpose | |-----------|---------| @@ -17,18 +31,19 @@ This project uses spec-driven development. **Specs are the source of truth.** | specs/DECISIONS.md | Architecture decision records (ADR-*) | **Rules:** -1. Never implement behavior not defined in specs -2. Update specs before or with code, never after -3. Reference spec IDs in commits (e.g., "implements SCHEMA-001") +1. Read specs before implementing +2. Never implement behavior not defined in specs +3. Update specs before or with code, never after +4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") ## Project Rules See `project-rules/*.mdc` for context-specific rules: - `general.mdc` - Overall development rules -- `rust-daemon.mdc` - Rust daemon boundaries -- `python-agent.mdc` - Python agent boundaries +- `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) +- `python-agent.mdc` - Python agent boundaries (ARCH-002) - `specs.mdc` - Specification maintenance -- `testing.mdc` - Testing requirements +- `testing.mdc` - Testing requirements (DR4) ## Architecture @@ -36,39 +51,30 @@ See `project-rules/*.mdc` for context-specific rules: Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) ``` -| Component | Responsibility | -|-----------|----------------| -| Rust Daemon | Log watching, MCP server, CLI, SQLite storage | -| Python Agent | Memory extraction, context composition, conflict detection | - -See specs/ARCHITECTURE.md for details. +| Component | Responsibility | Never Does | +|-----------|----------------|------------| +| Rust Daemon | Log watching, MCP server, CLI, SQLite | LLM calls | +| Python Agent | Memory extraction, context composition | File watching | ## Development Environment -Uses Nix via devenv. Single command setup: +Uses Nix via devenv (ADR-006). Single command: ```bash devenv shell ``` -Available commands: -- `test-all` - Run all tests -- `dev-daemon` - Start daemon in dev mode -- `fmt` - Format all code -- `lint` - Lint all code - ## Team Standards ### Communication - No unnecessary emojis -- Documentation written for AI comprehension - English only in code, comments, commits - Brief, direct language +- Today's date: 2025 Dec 9 ### Git Workflow Branch: `yourname/type-description` -- `feat`, `fix`, `refactor`, `docs`, `test`, `chore` Commit: `type(scope): brief description` - Reference spec IDs when applicable @@ -78,8 +84,3 @@ Commit: `type(scope): brief description` - Keep files under 200 lines - Only change what's necessary (DR5) - No drive-by refactoring - -### Security -- Never commit secrets -- Validate user input -- Review AI-generated code diff --git a/AGENTS.md b/AGENTS.md index 193498b..6d810b2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,73 +1,86 @@ -# Squirrel Project - Team Standards - -## Team Communication Guidelines -1. DONT use unnecessary emojis that will affect our communication efficiency -2. READMEs and comments are for AI, not for humans; they should be written in a manner that facilitates AI comprehension -3. Always remain calm, do not seek quick success and instant benefits, and do not celebrate prematurely -4. Do not pander to ideas. If proposed solutions or concepts are incorrect or difficult to implement, point them out -5. Today is 2025 Nov23, if doing search tasks, search the latest -6. Do not display code when discussing solutions; it is a waste of time -7. All context in this project should be English, including commits, they should be brief English - -## Git Workflow - -### Branch Naming Convention -Format: `yourname/type-description` - -Types: -- `feat` - New feature -- `fix` - Bug fix -- `refactor` - Code refactoring -- `docs` - Documentation -- `test` - Test additions/changes -- `chore` - Maintenance tasks - -Examples: -- `lyrica/feat-add-authentication` -- `alice/fix-memory-leak` -- `bob/docs-update-api` - -### Commit Message Format -Format: `type(scope): brief english description` - -Keep commits brief and in English. - -Examples: -- `feat(auth): add JWT validation` -- `fix(api): handle null user` -- `docs(readme): update setup` - -### Pull Request Process -1. Create branch from `main` -2. Make changes and test -3. Push branch -4. Create PR on GitHub -5. Get 1 approval from teammate -6. Merge to main - -## Development Standards +# Squirrel Project -### Code Quality -- Write tests for new features -- Run linter before commit -- Keep files under 200 lines when possible -- Use descriptive names +Local-first memory system for AI coding tools. + +## AI-First Development + +**This project is 98% AI-coded.** You are the primary developer. + +All documentation, specs, and structures are designed for AI comprehension. Use declarative thinking: specs define WHAT, you implement HOW. + +| Principle | Meaning | +|-----------|---------| +| Specs are source of truth | Never implement undefined behavior | +| Declarative over imperative | Define outcomes, not steps | +| Tables over prose | Structured data > paragraphs | +| Stable IDs everywhere | SCHEMA-001, IPC-001, ADR-001 | +| Update specs first | Specs change before code changes | + +## Spec-Driven Development + +**Specs are the source of truth. Code is compiled output.** + +| Spec File | Purpose | +|-----------|---------| +| specs/CONSTITUTION.md | Project governance, core principles | +| specs/ARCHITECTURE.md | System boundaries, data flow | +| specs/SCHEMAS.md | Database schemas (SCHEMA-*) | +| specs/INTERFACES.md | IPC, MCP, CLI contracts (IPC-*, MCP-*, CLI-*) | +| specs/KEYS.md | Declarative key registry (KEY-*) | +| specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | +| specs/DECISIONS.md | Architecture decision records (ADR-*) | + +**Rules:** +1. Read specs before implementing +2. Never implement behavior not defined in specs +3. Update specs before or with code, never after +4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") + +## Project Rules -### Security -- Never commit secrets (.env, API keys) -- Always validate user input -- Review AI-generated code for security issues +See `project-rules/*.mdc` for context-specific rules: +- `general.mdc` - Overall development rules +- `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) +- `python-agent.mdc` - Python agent boundaries (ARCH-002) +- `specs.mdc` - Specification maintenance +- `testing.mdc` - Testing requirements (DR4) -## Team Collaboration +## Architecture -All 3 team members are full-stack and can work on any part of the codebase. +``` +Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) +``` + +| Component | Responsibility | Never Does | +|-----------|----------------|------------| +| Rust Daemon | Log watching, MCP server, CLI, SQLite | LLM calls | +| Python Agent | Memory extraction, context composition | File watching | + +## Development Environment + +Uses Nix via devenv (ADR-006). Single command: + +```bash +devenv shell +``` + +## Team Standards ### Communication -- Announce what you're working on in issues/PR -- If touching shared files, communicate with team -- Sync frequently: `git pull origin main` daily - -### Conflict Prevention -- Pull latest before starting work -- Create focused branches for specific tasks -- Communicate when working on same areas +- No unnecessary emojis +- English only in code, comments, commits +- Brief, direct language +- Today's date: 2025 Dec 9 + +### Git Workflow + +Branch: `yourname/type-description` + +Commit: `type(scope): brief description` +- Reference spec IDs when applicable + +### Code Quality +- Write tests for new features (DR4) +- Keep files under 200 lines +- Only change what's necessary (DR5) +- No drive-by refactoring diff --git a/GEMINI.md b/GEMINI.md index 193498b..6d810b2 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -1,73 +1,86 @@ -# Squirrel Project - Team Standards - -## Team Communication Guidelines -1. DONT use unnecessary emojis that will affect our communication efficiency -2. READMEs and comments are for AI, not for humans; they should be written in a manner that facilitates AI comprehension -3. Always remain calm, do not seek quick success and instant benefits, and do not celebrate prematurely -4. Do not pander to ideas. If proposed solutions or concepts are incorrect or difficult to implement, point them out -5. Today is 2025 Nov23, if doing search tasks, search the latest -6. Do not display code when discussing solutions; it is a waste of time -7. All context in this project should be English, including commits, they should be brief English - -## Git Workflow - -### Branch Naming Convention -Format: `yourname/type-description` - -Types: -- `feat` - New feature -- `fix` - Bug fix -- `refactor` - Code refactoring -- `docs` - Documentation -- `test` - Test additions/changes -- `chore` - Maintenance tasks - -Examples: -- `lyrica/feat-add-authentication` -- `alice/fix-memory-leak` -- `bob/docs-update-api` - -### Commit Message Format -Format: `type(scope): brief english description` - -Keep commits brief and in English. - -Examples: -- `feat(auth): add JWT validation` -- `fix(api): handle null user` -- `docs(readme): update setup` - -### Pull Request Process -1. Create branch from `main` -2. Make changes and test -3. Push branch -4. Create PR on GitHub -5. Get 1 approval from teammate -6. Merge to main - -## Development Standards +# Squirrel Project -### Code Quality -- Write tests for new features -- Run linter before commit -- Keep files under 200 lines when possible -- Use descriptive names +Local-first memory system for AI coding tools. + +## AI-First Development + +**This project is 98% AI-coded.** You are the primary developer. + +All documentation, specs, and structures are designed for AI comprehension. Use declarative thinking: specs define WHAT, you implement HOW. + +| Principle | Meaning | +|-----------|---------| +| Specs are source of truth | Never implement undefined behavior | +| Declarative over imperative | Define outcomes, not steps | +| Tables over prose | Structured data > paragraphs | +| Stable IDs everywhere | SCHEMA-001, IPC-001, ADR-001 | +| Update specs first | Specs change before code changes | + +## Spec-Driven Development + +**Specs are the source of truth. Code is compiled output.** + +| Spec File | Purpose | +|-----------|---------| +| specs/CONSTITUTION.md | Project governance, core principles | +| specs/ARCHITECTURE.md | System boundaries, data flow | +| specs/SCHEMAS.md | Database schemas (SCHEMA-*) | +| specs/INTERFACES.md | IPC, MCP, CLI contracts (IPC-*, MCP-*, CLI-*) | +| specs/KEYS.md | Declarative key registry (KEY-*) | +| specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | +| specs/DECISIONS.md | Architecture decision records (ADR-*) | + +**Rules:** +1. Read specs before implementing +2. Never implement behavior not defined in specs +3. Update specs before or with code, never after +4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") + +## Project Rules -### Security -- Never commit secrets (.env, API keys) -- Always validate user input -- Review AI-generated code for security issues +See `project-rules/*.mdc` for context-specific rules: +- `general.mdc` - Overall development rules +- `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) +- `python-agent.mdc` - Python agent boundaries (ARCH-002) +- `specs.mdc` - Specification maintenance +- `testing.mdc` - Testing requirements (DR4) -## Team Collaboration +## Architecture -All 3 team members are full-stack and can work on any part of the codebase. +``` +Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) +``` + +| Component | Responsibility | Never Does | +|-----------|----------------|------------| +| Rust Daemon | Log watching, MCP server, CLI, SQLite | LLM calls | +| Python Agent | Memory extraction, context composition | File watching | + +## Development Environment + +Uses Nix via devenv (ADR-006). Single command: + +```bash +devenv shell +``` + +## Team Standards ### Communication -- Announce what you're working on in issues/PR -- If touching shared files, communicate with team -- Sync frequently: `git pull origin main` daily - -### Conflict Prevention -- Pull latest before starting work -- Create focused branches for specific tasks -- Communicate when working on same areas +- No unnecessary emojis +- English only in code, comments, commits +- Brief, direct language +- Today's date: 2025 Dec 9 + +### Git Workflow + +Branch: `yourname/type-description` + +Commit: `type(scope): brief description` +- Reference spec IDs when applicable + +### Code Quality +- Write tests for new features (DR4) +- Keep files under 200 lines +- Only change what's necessary (DR5) +- No drive-by refactoring From a026be6e567653db5fc56beb96ec4c1db587def2 Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Tue, 9 Dec 2025 19:11:14 +0800 Subject: [PATCH 09/15] fix: remove .mcp.json (contains secrets), update .cursorrules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove .mcp.json from git (contains GitHub tokens) - Remove Zone.Identifier Windows metadata files - Add .mcp.json and *:Zone.Identifier to .gitignore - Update .cursorrules with AI-first development emphasis πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .claude/CLAUDE.md:Zone.Identifier | Bin 25 -> 0 bytes .cursorrules | 147 ++++++++++++++++-------------- .gitignore | 6 ++ .mcp.json | 80 ---------------- .mcp.json:Zone.Identifier | Bin 25 -> 0 bytes 5 files changed, 86 insertions(+), 147 deletions(-) delete mode 100644 .claude/CLAUDE.md:Zone.Identifier delete mode 100644 .mcp.json delete mode 100644 .mcp.json:Zone.Identifier diff --git a/.claude/CLAUDE.md:Zone.Identifier b/.claude/CLAUDE.md:Zone.Identifier deleted file mode 100644 index d6c1ec682968c796b9f5e9e080cc6f674b57c766..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25 dcma!!%Fjy;DN4*MPD?F{<>dl#JyUFr831@K2x paragraphs | +| Stable IDs everywhere | SCHEMA-001, IPC-001, ADR-001 | +| Update specs first | Specs change before code changes | + +## Spec-Driven Development + +**Specs are the source of truth. Code is compiled output.** + +| Spec File | Purpose | +|-----------|---------| +| specs/CONSTITUTION.md | Project governance, core principles | +| specs/ARCHITECTURE.md | System boundaries, data flow | +| specs/SCHEMAS.md | Database schemas (SCHEMA-*) | +| specs/INTERFACES.md | IPC, MCP, CLI contracts (IPC-*, MCP-*, CLI-*) | +| specs/KEYS.md | Declarative key registry (KEY-*) | +| specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | +| specs/DECISIONS.md | Architecture decision records (ADR-*) | + +**Rules:** +1. Read specs before implementing +2. Never implement behavior not defined in specs +3. Update specs before or with code, never after +4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") + +## Project Rules -### Security -- Never commit secrets (.env, API keys) -- Always validate user input -- Review AI-generated code for security issues +See `project-rules/*.mdc` for context-specific rules: +- `general.mdc` - Overall development rules +- `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) +- `python-agent.mdc` - Python agent boundaries (ARCH-002) +- `specs.mdc` - Specification maintenance +- `testing.mdc` - Testing requirements (DR4) -## Team Collaboration +## Architecture -All 3 team members are full-stack and can work on any part of the codebase. +``` +Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) +``` + +| Component | Responsibility | Never Does | +|-----------|----------------|------------| +| Rust Daemon | Log watching, MCP server, CLI, SQLite | LLM calls | +| Python Agent | Memory extraction, context composition | File watching | + +## Development Environment + +Uses Nix via devenv (ADR-006). Single command: + +```bash +devenv shell +``` + +## Team Standards ### Communication -- Announce what you're working on in issues/PR -- If touching shared files, communicate with team -- Sync frequently: `git pull origin main` daily - -### Conflict Prevention -- Pull latest before starting work -- Create focused branches for specific tasks -- Communicate when working on same areas +- No unnecessary emojis +- English only in code, comments, commits +- Brief, direct language +- Today's date: 2025 Dec 9 + +### Git Workflow + +Branch: `yourname/type-description` + +Commit: `type(scope): brief description` +- Reference spec IDs when applicable + +### Code Quality +- Write tests for new features (DR4) +- Keep files under 200 lines +- Only change what's necessary (DR5) +- No drive-by refactoring diff --git a/.gitignore b/.gitignore index 32ee2df..3040cbb 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,12 @@ agent.md docs/internal/ +# MCP config (contains API keys) +.mcp.json + +# Windows Zone.Identifier metadata +*:Zone.Identifier + # Environment variables .env .env.local diff --git a/.mcp.json b/.mcp.json deleted file mode 100644 index 8488c2f..0000000 --- a/.mcp.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "mcpServers": { - "context7": { - "type": "http", - "url": "https://mcp.context7.com/mcp" - }, - "filesystem": { - "type": "stdio", - "command": "node", - "args": [ - "/home/lyrica/.npm/_npx/a3241bba59c344f5/node_modules/@modelcontextprotocol/server-filesystem/dist/index.js", - "/home/lyrica/Offer_I" - ], - "env": {} - }, - "github": { - "type": "stdio", - "command": "node", - "args": [ - "/home/lyrica/.npm/_npx/3dfbf5a9eea4a1b3/node_modules/@modelcontextprotocol/server-github/dist/index.js" - ], - "env": { - "GITHUB_TOKEN": "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" - } - }, - "firecrawl": { - "type": "stdio", - "command": "/home/lyrica/Offer_I/.firecrawl-mcp-wrapper.sh", - "args": [], - "env": { - "FIRECRAWL_API_URL": "http://127.0.0.1:3002", - "FIRECRAWL_API_KEY": "fc-local-test-key" - } - }, - "deepwiki": { - "type": "sse", - "url": "https://mcp.deepwiki.com/sse" - }, - "github-hosted": { - "type": "http", - "url": "https://api.githubcopilot.com/mcp/", - "headers": { - "Authorization": "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" - } - }, - "playwright": { - "type": "stdio", - "command": "npx", - "args": [ - "@playwright/mcp@latest", - "--user-data-dir=./playwright-profile", - "--headless", - "--browser=chromium" - ], - "env": {} - }, - "bright-data": { - "type": "stdio", - "command": "npx", - "args": [ - "@brightdata/mcp" - ], - "env": { - "API_TOKEN": "d5822896f2b414ae02df8c5cc1991ecca7b8b97542bc6086cfaa76304788497d", - "WEB_UNLOCKER_ZONE": "mcp_unlocker", - "BROWSER_ZONE": "mcp_browser" - } - }, - "exa": { - "type": "stdio", - "command": "node", - "args": [ - "/home/lyrica/Offer_I/exa-mcp-server-official/.smithery/stdio/index.cjs" - ], - "env": { - "EXA_API_KEY": "8c942d3a-744c-4666-a1d4-ca9b2842e6e5" - } - } - } -} diff --git a/.mcp.json:Zone.Identifier b/.mcp.json:Zone.Identifier deleted file mode 100644 index d6c1ec682968c796b9f5e9e080cc6f674b57c766..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25 dcma!!%Fjy;DN4*MPD?F{<>dl#JyUFr831@K2x Date: Tue, 9 Dec 2025 19:16:22 +0800 Subject: [PATCH 10/15] docs: add frustration detection to development plan and example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- DEVELOPMENT_PLAN.md | 17 ++++++++++++++--- EXAMPLE.md | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md index bdcfcbe..0fbb374 100644 --- a/DEVELOPMENT_PLAN.md +++ b/DEVELOPMENT_PLAN.md @@ -602,12 +602,19 @@ LLM analyzes entire Episode in ONE call (segment-first approach): - RESEARCH_LEARNING β†’ fact (knowledge), lesson (key learnings) - DISCUSSION β†’ profile (user preferences), lesson (insights) -4. **Contradiction check for facts:** +4. **Detect user frustration signals** and boost importance: + - Swear words, angry language β†’ `importance: critical` + - Repeated complaints ("again", "still broken") β†’ `importance: high` + - Mild frustration β†’ `importance: medium` (default) + - Add `metadata.user_frustration`: none | mild | moderate | severe + - Frustration signals make associated failure lessons higher priority in retrieval + +5. **Contradiction check for facts:** - Extract semantic_key if possible (db.engine, api.framework, etc.) - Check existing facts with same key β†’ invalidate old if conflict - Free-text facts β†’ LLM judges semantic conflict -5. **Near-duplicate check** before ADD (0.9 similarity threshold) +6. **Near-duplicate check** before ADD (0.9 similarity threshold) UUIDβ†’integer mapping when showing existing memories to LLM (prevents hallucination). @@ -793,8 +800,11 @@ Supports multiple providers via config. Batch embedding with retry logic. Similarity search via sqlite-vec. -Scoring: `w_sim * similarity + w_imp * importance_weight + w_rec * recency` +Scoring: `w_sim * similarity + w_imp * importance_weight + w_rec * recency + w_frust * frustration_boost` - importance_weight: critical=1.0, high=0.75, medium=0.5, low=0.25 +- frustration_boost: severe=0.3, moderate=0.2, mild=0.1, none=0.0 + +Frustration-flagged memories surface earlier to prevent recurring pain points. Access logging to memory_access_log table. @@ -938,6 +948,7 @@ Windows note: MSI recommended over raw .exe to reduce SmartScreen/AV friction. - Fact contradiction detection (declarative key match + LLM for free-text) - Soft delete (`sqrl forget`) - no hard purge - Near-duplicate deduplication (0.9 threshold) +- Frustration detection (swear words, anger β†’ boost importance, prioritize in retrieval) - Cross-platform (Mac, Linux, Windows) - Export/import memories (JSON) - Auto-update (`sqrl update`) diff --git a/EXAMPLE.md b/EXAMPLE.md index d99e34e..17ef83d 100644 --- a/EXAMPLE.md +++ b/EXAMPLE.md @@ -12,7 +12,8 @@ Squirrel watches AI tool logs, groups events into **Episodes** (4-hour time wind - `RESEARCH_LEARNING` - learning, exploring docs - `DISCUSSION` - brainstorming, chat 2. **Classify Outcomes** - Only for EXECUTION_TASK: SUCCESS | FAILURE | UNCERTAIN (with evidence) -3. **Extract Memories** - Based on segment kind, not just success/failure +3. **Detect Frustration** - Swear words, anger β†’ boost memory importance +4. **Extract Memories** - Based on segment kind, not just success/failure Episode = batch of events from same repo within 4-hour window (internal batching, not a product concept). @@ -166,14 +167,14 @@ Alice: "Use async def, add type hints, and write a pytest fixture" Claude Code: [Revises code with async, types, fixture] Alice: "Perfect, tests pass!" -# Task 2: Fix auth bug (FAILURE then SUCCESS) +# Task 2: Fix auth bug (FAILURE then SUCCESS, with frustration) Alice: "There's an auth loop bug when tokens expire" Claude Code: "Let me check localStorage..." [Error persists] Alice: "Still broken" Claude Code: "Let me try checking cookies..." [Error persists] -Alice: "That didn't work either" +Alice: "This is so frustrating, we've been going in circles!" Claude Code: "I think the issue is in useEffect cleanup..." [Implements fix] Alice: "That fixed it, thanks!" @@ -273,7 +274,8 @@ async def ingest_episode(episode: dict) -> dict: "outcome": { "status": "SUCCESS", "evidence": ["User said 'That fixed it, thanks!'"] - } + }, + "user_frustration": "moderate" } ], "memories": [ @@ -292,6 +294,8 @@ async def ingest_episode(episode: dict) -> dict: "outcome": "failure", "scope": "project", "text": "Auth token refresh loops are NOT caused by localStorage or cookies - check useEffect cleanup first", + "importance": "high", + "metadata": {"user_frustration": "moderate"}, "source_segments": ["seg_2"], "confidence": 0.9 }, @@ -328,7 +332,7 @@ Analyze this coding session (~4 hours of activity): [assistant] Let me check localStorage... [user] Still broken [assistant] Let me try checking cookies... -[user] That didn't work either +[user] This is so frustrating, we've been going in circles! [assistant] I think the issue is in useEffect cleanup... [user] That fixed it, thanks! @@ -347,7 +351,13 @@ Analyze this session using SEGMENT-FIRST approach: IMPORTANT: Other segment kinds NEVER have SUCCESS/FAILURE. -3. Extract memories based on segment kind: +3. Detect user frustration signals: + - Swear words, strong anger β†’ importance: critical, user_frustration: severe + - Repeated complaints ("again", "still broken") β†’ importance: high, user_frustration: moderate + - Mild frustration β†’ importance: medium, user_frustration: mild + - No frustration β†’ importance: medium, user_frustration: none + +4. Extract memories based on segment kind: - EXECUTION_TASK: lesson (with outcome), fact (knowledge discovered) - PLANNING_DECISION: fact (decisions), lesson (rationale), profile - RESEARCH_LEARNING: fact (knowledge), lesson (learnings) @@ -598,6 +608,7 @@ CREATE TABLE user_profile ( | Batching | Groups events into Episodes (4hr OR 50 events) | | **Segmentation** | Agent segments by kind: EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION | | **Outcome** | For EXECUTION_TASK only: SUCCESS/FAILURE/UNCERTAIN (with evidence) | +| **Frustration** | Detects anger/swearing β†’ boosts importance (critical/high), stores user_frustration in metadata | | Extraction | Based on segment kind: lesson (with outcome), fact (with key/evidence_source), profile | | **Declarative Keys** | Facts with project.* or user.* keys enable deterministic conflict detection | | **Contradiction** | Same key + different value β†’ old fact invalidated (no LLM); free-text β†’ LLM judges | @@ -636,6 +647,7 @@ Contradiction detection auto-invalidates old facts when new conflicting facts ar | **Segment-first** | Segment by kind before outcome classification | Not all sessions are tasks with outcomes | | **Segment kinds** | EXECUTION_TASK / PLANNING / RESEARCH / DISCUSSION | Different session types produce different memories | | **Outcome only for EXECUTION_TASK** | SUCCESS/FAILURE/UNCERTAIN with evidence | Avoid classifying discussions as "failures" | +| **Frustration detection** | Anger/swearing β†’ importance boost + metadata flag | High-pain failures get prioritized in retrieval | | Memory extraction | Based on segment kind | Architecture produces facts, coding produces lessons | | **Declarative keys** | project.* and user.* keys for facts | Deterministic conflict detection (no LLM) | | **Evidence source** | success/failure/neutral/manual on facts | Track how a fact was learned | @@ -663,7 +675,7 @@ Contradiction detection auto-invalidates old facts when new conflicting facts ar | Type | Key Fields | Description | Example | |------|------------|-------------|---------| -| `lesson` | outcome (success/failure/uncertain) | What worked or failed | "API 500 on null user_id", "Repository pattern works well" | +| `lesson` | outcome, importance, user_frustration | What worked or failed | "API 500 on null user_id", "Repository pattern works well" | | `fact` | key, value, evidence_source | Project/user knowledge | key=project.db.engine, value=PostgreSQL | | `profile` | (structured identity) | User background info | name, role, experience_level | @@ -690,6 +702,19 @@ How a fact was learned: - `neutral` - Observed in planning/research/discussion - `manual` - User explicitly stated via CLI +### Frustration Detection (Lessons) + +User frustration signals boost memory importance: + +| Signal | Importance | user_frustration | +|--------|------------|------------------| +| Swear words, strong anger | `critical` | `severe` | +| Repeated complaints ("again", "still") | `high` | `moderate` | +| Mild frustration | `medium` | `mild` | +| No frustration | `medium` | `none` | + +Stored in `metadata.user_frustration`. Frustration-flagged memories get priority in retrieval to prevent recurring pain points. + ### Scope Matrix | Scope | DB File | Description | From 26060cd07d4f00f0099bf2c030dfe6ff1a4330c7 Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Tue, 9 Dec 2025 20:27:05 +0800 Subject: [PATCH 11/15] refactor: consolidate agent instruction files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AGENTS.md is now the single canonical source - .claude/CLAUDE.md β†’ symlink to AGENTS.md - Delete GEMINI.md (configure Gemini to read AGENTS.md) - Delete .cursorrules (deprecated by Cursor) - Move project-rules/*.mdc β†’ .cursor/rules/*.mdc - Update specs/CONSTITUTION.md with new structure This eliminates 4-file sync problem. Only AGENTS.md needs updating. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .claude/CLAUDE.md | 100 +----------------- {project-rules => .cursor/rules}/general.mdc | 0 .../rules}/python-agent.mdc | 0 .../rules}/rust-daemon.mdc | 0 {project-rules => .cursor/rules}/specs.mdc | 0 {project-rules => .cursor/rules}/testing.mdc | 0 .cursorrules | 86 --------------- AGENTS.md | 15 ++- GEMINI.md | 86 --------------- specs/CONSTITUTION.md | 12 +++ 10 files changed, 27 insertions(+), 272 deletions(-) mode change 100644 => 120000 .claude/CLAUDE.md rename {project-rules => .cursor/rules}/general.mdc (100%) rename {project-rules => .cursor/rules}/python-agent.mdc (100%) rename {project-rules => .cursor/rules}/rust-daemon.mdc (100%) rename {project-rules => .cursor/rules}/specs.mdc (100%) rename {project-rules => .cursor/rules}/testing.mdc (100%) delete mode 100644 .cursorrules delete mode 100644 GEMINI.md diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md deleted file mode 100644 index 02ba138..0000000 --- a/.claude/CLAUDE.md +++ /dev/null @@ -1,99 +0,0 @@ -# Squirrel Project - -Local-first memory system for AI coding tools. - -## AI-First Development - -**This project is 98% AI-coded.** You are the primary developer. - -All documentation, specs, and structures are designed for AI comprehension. Use declarative thinking: specs define WHAT, you implement HOW. - -| Principle | Meaning | -|-----------|---------| -| Specs are source of truth | Never implement undefined behavior | -| Declarative over imperative | Define outcomes, not steps | -| Tables over prose | Structured data > paragraphs | -| Stable IDs everywhere | SCHEMA-001, IPC-001, ADR-001 | -| Update specs first | Specs change before code changes | - -## Spec-Driven Development - -**Specs are the source of truth. Code is compiled output.** - -| Spec File | Purpose | -|-----------|---------| -| specs/CONSTITUTION.md | Project governance, core principles | -| specs/ARCHITECTURE.md | System boundaries, data flow | -| specs/SCHEMAS.md | Database schemas (SCHEMA-*) | -| specs/INTERFACES.md | IPC, MCP, CLI contracts (IPC-*, MCP-*, CLI-*) | -| specs/KEYS.md | Declarative key registry (KEY-*) | -| specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | -| specs/DECISIONS.md | Architecture decision records (ADR-*) | - -**Rules:** -1. Read specs before implementing -2. Never implement behavior not defined in specs -3. Update specs before or with code, never after -4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") - -## Project Rules - -See `project-rules/*.mdc` for context-specific rules: -- `general.mdc` - Overall development rules -- `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) -- `python-agent.mdc` - Python agent boundaries (ARCH-002) -- `specs.mdc` - Specification maintenance -- `testing.mdc` - Testing requirements (DR4) - -## Architecture - -``` -Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) -``` - -| Component | Responsibility | Never Does | -|-----------|----------------|------------| -| Rust Daemon | Log watching, MCP server, CLI, SQLite | LLM calls | -| Python Agent | Memory extraction, context composition | File watching | - -## Development Environment - -Uses Nix via devenv (ADR-006). Single command: - -```bash -devenv shell -``` - -Available commands: -- `test-all` - Run all tests -- `dev-daemon` - Start daemon in dev mode -- `fmt` - Format all code -- `lint` - Lint all code - -## Team Standards - -### Communication -- No unnecessary emojis -- Documentation written for AI comprehension -- English only in code, comments, commits -- Brief, direct language -- Today's date: 2025 Dec 9 - -### Git Workflow - -Branch: `yourname/type-description` -- `feat`, `fix`, `refactor`, `docs`, `test`, `chore` - -Commit: `type(scope): brief description` -- Reference spec IDs when applicable - -### Code Quality -- Write tests for new features (DR4) -- Keep files under 200 lines -- Only change what's necessary (DR5) -- No drive-by refactoring - -### Security -- Never commit secrets -- Validate user input -- Review AI-generated code diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 120000 index 0000000..be77ac8 --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1 @@ +../AGENTS.md \ No newline at end of file diff --git a/project-rules/general.mdc b/.cursor/rules/general.mdc similarity index 100% rename from project-rules/general.mdc rename to .cursor/rules/general.mdc diff --git a/project-rules/python-agent.mdc b/.cursor/rules/python-agent.mdc similarity index 100% rename from project-rules/python-agent.mdc rename to .cursor/rules/python-agent.mdc diff --git a/project-rules/rust-daemon.mdc b/.cursor/rules/rust-daemon.mdc similarity index 100% rename from project-rules/rust-daemon.mdc rename to .cursor/rules/rust-daemon.mdc diff --git a/project-rules/specs.mdc b/.cursor/rules/specs.mdc similarity index 100% rename from project-rules/specs.mdc rename to .cursor/rules/specs.mdc diff --git a/project-rules/testing.mdc b/.cursor/rules/testing.mdc similarity index 100% rename from project-rules/testing.mdc rename to .cursor/rules/testing.mdc diff --git a/.cursorrules b/.cursorrules deleted file mode 100644 index 6d810b2..0000000 --- a/.cursorrules +++ /dev/null @@ -1,86 +0,0 @@ -# Squirrel Project - -Local-first memory system for AI coding tools. - -## AI-First Development - -**This project is 98% AI-coded.** You are the primary developer. - -All documentation, specs, and structures are designed for AI comprehension. Use declarative thinking: specs define WHAT, you implement HOW. - -| Principle | Meaning | -|-----------|---------| -| Specs are source of truth | Never implement undefined behavior | -| Declarative over imperative | Define outcomes, not steps | -| Tables over prose | Structured data > paragraphs | -| Stable IDs everywhere | SCHEMA-001, IPC-001, ADR-001 | -| Update specs first | Specs change before code changes | - -## Spec-Driven Development - -**Specs are the source of truth. Code is compiled output.** - -| Spec File | Purpose | -|-----------|---------| -| specs/CONSTITUTION.md | Project governance, core principles | -| specs/ARCHITECTURE.md | System boundaries, data flow | -| specs/SCHEMAS.md | Database schemas (SCHEMA-*) | -| specs/INTERFACES.md | IPC, MCP, CLI contracts (IPC-*, MCP-*, CLI-*) | -| specs/KEYS.md | Declarative key registry (KEY-*) | -| specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | -| specs/DECISIONS.md | Architecture decision records (ADR-*) | - -**Rules:** -1. Read specs before implementing -2. Never implement behavior not defined in specs -3. Update specs before or with code, never after -4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") - -## Project Rules - -See `project-rules/*.mdc` for context-specific rules: -- `general.mdc` - Overall development rules -- `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) -- `python-agent.mdc` - Python agent boundaries (ARCH-002) -- `specs.mdc` - Specification maintenance -- `testing.mdc` - Testing requirements (DR4) - -## Architecture - -``` -Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) -``` - -| Component | Responsibility | Never Does | -|-----------|----------------|------------| -| Rust Daemon | Log watching, MCP server, CLI, SQLite | LLM calls | -| Python Agent | Memory extraction, context composition | File watching | - -## Development Environment - -Uses Nix via devenv (ADR-006). Single command: - -```bash -devenv shell -``` - -## Team Standards - -### Communication -- No unnecessary emojis -- English only in code, comments, commits -- Brief, direct language -- Today's date: 2025 Dec 9 - -### Git Workflow - -Branch: `yourname/type-description` - -Commit: `type(scope): brief description` -- Reference spec IDs when applicable - -### Code Quality -- Write tests for new features (DR4) -- Keep files under 200 lines -- Only change what's necessary (DR5) -- No drive-by refactoring diff --git a/AGENTS.md b/AGENTS.md index 6d810b2..96b7d4d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -38,7 +38,7 @@ All documentation, specs, and structures are designed for AI comprehension. Use ## Project Rules -See `project-rules/*.mdc` for context-specific rules: +See `.cursor/rules/*.mdc` for context-specific rules: - `general.mdc` - Overall development rules - `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) - `python-agent.mdc` - Python agent boundaries (ARCH-002) @@ -64,10 +64,17 @@ Uses Nix via devenv (ADR-006). Single command: devenv shell ``` +Available commands: +- `test-all` - Run all tests +- `dev-daemon` - Start daemon in dev mode +- `fmt` - Format all code +- `lint` - Lint all code + ## Team Standards ### Communication - No unnecessary emojis +- Documentation written for AI comprehension - English only in code, comments, commits - Brief, direct language - Today's date: 2025 Dec 9 @@ -75,6 +82,7 @@ devenv shell ### Git Workflow Branch: `yourname/type-description` +- `feat`, `fix`, `refactor`, `docs`, `test`, `chore` Commit: `type(scope): brief description` - Reference spec IDs when applicable @@ -84,3 +92,8 @@ Commit: `type(scope): brief description` - Keep files under 200 lines - Only change what's necessary (DR5) - No drive-by refactoring + +### Security +- Never commit secrets +- Validate user input +- Review AI-generated code diff --git a/GEMINI.md b/GEMINI.md deleted file mode 100644 index 6d810b2..0000000 --- a/GEMINI.md +++ /dev/null @@ -1,86 +0,0 @@ -# Squirrel Project - -Local-first memory system for AI coding tools. - -## AI-First Development - -**This project is 98% AI-coded.** You are the primary developer. - -All documentation, specs, and structures are designed for AI comprehension. Use declarative thinking: specs define WHAT, you implement HOW. - -| Principle | Meaning | -|-----------|---------| -| Specs are source of truth | Never implement undefined behavior | -| Declarative over imperative | Define outcomes, not steps | -| Tables over prose | Structured data > paragraphs | -| Stable IDs everywhere | SCHEMA-001, IPC-001, ADR-001 | -| Update specs first | Specs change before code changes | - -## Spec-Driven Development - -**Specs are the source of truth. Code is compiled output.** - -| Spec File | Purpose | -|-----------|---------| -| specs/CONSTITUTION.md | Project governance, core principles | -| specs/ARCHITECTURE.md | System boundaries, data flow | -| specs/SCHEMAS.md | Database schemas (SCHEMA-*) | -| specs/INTERFACES.md | IPC, MCP, CLI contracts (IPC-*, MCP-*, CLI-*) | -| specs/KEYS.md | Declarative key registry (KEY-*) | -| specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | -| specs/DECISIONS.md | Architecture decision records (ADR-*) | - -**Rules:** -1. Read specs before implementing -2. Never implement behavior not defined in specs -3. Update specs before or with code, never after -4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") - -## Project Rules - -See `project-rules/*.mdc` for context-specific rules: -- `general.mdc` - Overall development rules -- `rust-daemon.mdc` - Rust daemon boundaries (ARCH-001) -- `python-agent.mdc` - Python agent boundaries (ARCH-002) -- `specs.mdc` - Specification maintenance -- `testing.mdc` - Testing requirements (DR4) - -## Architecture - -``` -Rust Daemon (I/O, storage, MCP) <--IPC--> Python Agent (LLM operations) -``` - -| Component | Responsibility | Never Does | -|-----------|----------------|------------| -| Rust Daemon | Log watching, MCP server, CLI, SQLite | LLM calls | -| Python Agent | Memory extraction, context composition | File watching | - -## Development Environment - -Uses Nix via devenv (ADR-006). Single command: - -```bash -devenv shell -``` - -## Team Standards - -### Communication -- No unnecessary emojis -- English only in code, comments, commits -- Brief, direct language -- Today's date: 2025 Dec 9 - -### Git Workflow - -Branch: `yourname/type-description` - -Commit: `type(scope): brief description` -- Reference spec IDs when applicable - -### Code Quality -- Write tests for new features (DR4) -- Keep files under 200 lines -- Only change what's necessary (DR5) -- No drive-by refactoring diff --git a/specs/CONSTITUTION.md b/specs/CONSTITUTION.md index 459fa66..0b5f46c 100644 --- a/specs/CONSTITUTION.md +++ b/specs/CONSTITUTION.md @@ -75,3 +75,15 @@ Only change what's necessary. No drive-by refactoring. No "while I'm here" impro - No emojis in documentation - Brief, direct language - Tables over paragraphs + +## Agent Instruction Files + +Single source of truth for AI tool instructions: + +| File | Purpose | +|------|---------| +| `AGENTS.md` | Canonical source (Codex native) | +| `.claude/CLAUDE.md` | Symlink β†’ AGENTS.md | +| `.cursor/rules/*.mdc` | Cursor project rules | + +GEMINI.md and .cursorrules are deprecated. Configure Gemini CLI to read AGENTS.md via `contextFileName` setting. From a4fd225e55508eb685e00e2b5aa3d034381ea04f Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Wed, 10 Dec 2025 20:50:23 +0800 Subject: [PATCH 12/15] docs: add AI workflow phases and pre-commit checklist to AGENTS.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adopted from GitHub Spec Kit: - 5-phase workflow: Specify β†’ Clarify β†’ Plan β†’ Tasks β†’ Implement - NEEDS CLARIFICATION markers for unclear requirements - Mandatory pre-commit checklist for spec-code consistency --- AGENTS.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 96b7d4d..ae58482 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -36,6 +36,29 @@ All documentation, specs, and structures are designed for AI comprehension. Use 3. Update specs before or with code, never after 4. Reference spec IDs in commits (e.g., "implements SCHEMA-001") +## AI Workflow + +Follow these phases in order. Never skip phases. Never jump to code without a spec. + +| Phase | Action | Output | +|-------|--------|--------| +| 1. **Specify** | Define WHAT and WHY. No tech stack yet. | `specs/*.md` updated | +| 2. **Clarify** | Ask questions. Mark unclear areas as `[NEEDS CLARIFICATION: reason]` | Ambiguities resolved | +| 3. **Plan** | Define HOW (tech stack, architecture, file structure) | Implementation plan | +| 4. **Tasks** | Break into ordered, independently testable steps | Task list | +| 5. **Implement** | Execute one task at a time. Test after each. | Working code | + +### Before Every Commit (MANDATORY) + +Verify these before committing: + +| Check | Action if Yes | +|-------|---------------| +| Code changed? | Update related spec | +| Spec changed? | Update related code | +| New key/schema/interface? | Add to registry with ID | +| Unclear requirement? | Mark `[NEEDS CLARIFICATION]`, ask user | + ## Project Rules See `.cursor/rules/*.mdc` for context-specific rules: @@ -77,7 +100,7 @@ Available commands: - Documentation written for AI comprehension - English only in code, comments, commits - Brief, direct language -- Today's date: 2025 Dec 9 +- Today's date: 2025 Dec 10 ### Git Workflow From 000e78d263843f8a604ff3092af60141383ef78f Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Wed, 10 Dec 2025 21:51:47 +0800 Subject: [PATCH 13/15] feat(hooks): add pre-commit hook for doc sync reminder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add .githooks/pre-commit that shows changed files before commit - AI decides which docs to check based on file types - Update AGENTS.md with guidance on which docs to check - Hook does not block commits, only reminds πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .githooks/pre-commit | 62 ++++++++++++++++++++++++++++++++++++++++++++ AGENTS.md | 13 +++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100755 .githooks/pre-commit diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..cba8ef6 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,62 @@ +#!/bin/bash +# Pre-commit hook: Reminds AI to check if related docs need updates +# +# This hook does NOT block commits. It only outputs a reminder. +# The AI must decide which docs are related and whether to update them. + +# Get staged files (files in this commit) +STAGED=$(git diff --cached --name-only) + +if [ -z "$STAGED" ]; then + exit 0 +fi + +# Check if code files changed +CODE_CHANGED=$(echo "$STAGED" | grep -E '\.(rs|py|toml|nix)$' || true) + +# Check if spec files changed +SPECS_CHANGED=$(echo "$STAGED" | grep -E '^specs/' || true) + +# Check if doc files changed +DOCS_CHANGED=$(echo "$STAGED" | grep -E '\.(md|mdc)$' || true) + +# Output reminder +echo "" +echo "==========================================" +echo "PRE-COMMIT: Doc Sync Check" +echo "==========================================" +echo "" +echo "Files in this commit:" +echo "$STAGED" | sed 's/^/ - /' +echo "" + +if [ -n "$CODE_CHANGED" ]; then + echo "Code files changed:" + echo "$CODE_CHANGED" | sed 's/^/ - /' + echo "" + echo ">> Check if any specs/*.md need updates" + echo "" +fi + +if [ -n "$SPECS_CHANGED" ]; then + echo "Spec files changed:" + echo "$SPECS_CHANGED" | sed 's/^/ - /' + echo "" + echo ">> Check if related code needs updates" + echo "" +fi + +if [ -n "$DOCS_CHANGED" ] && [ -z "$SPECS_CHANGED" ]; then + echo "Doc files changed:" + echo "$DOCS_CHANGED" | sed 's/^/ - /' + echo "" +fi + +echo "==========================================" +echo "If docs are in sync, proceed with commit." +echo "==========================================" +echo "" + +# Always allow commit (exit 0) +# AI decides whether to abort and fix docs +exit 0 diff --git a/AGENTS.md b/AGENTS.md index ae58482..676e750 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -50,7 +50,18 @@ Follow these phases in order. Never skip phases. Never jump to code without a sp ### Before Every Commit (MANDATORY) -Verify these before committing: +A pre-commit hook (`.githooks/pre-commit`) will show you which files changed. + +**Your job:** Review the list and decide which docs need updates. + +| Files Changed | Check These Docs | +|---------------|------------------| +| `*.rs` (Rust) | `specs/ARCHITECTURE.md`, `specs/INTERFACES.md`, `specs/SCHEMAS.md` | +| `*.py` (Python) | `specs/ARCHITECTURE.md`, `specs/PROMPTS.md` | +| `specs/*.md` | Related code that implements the spec | +| `*.toml`, `*.nix` | `specs/DECISIONS.md` (if config change is significant) | + +**Checklist:** | Check | Action if Yes | |-------|---------------| From 8b10858dfa1f39d40c5619dca77e532b4ab3a32f Mon Sep 17 00:00:00 2001 From: kaminoguo Date: Wed, 10 Dec 2025 23:25:29 +0800 Subject: [PATCH 14/15] chore: migrate useful content from DEVELOPMENT_PLAN.md, delete redundant files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Technology Stack table to specs/ARCHITECTURE.md - Add v2 Team/Cloud architecture to specs/DECISIONS.md - Delete DEVELOPMENT_PLAN.md (superseded by specs/) - Delete EXAMPLE.md (not needed) - Delete .mcp.json:Zone.Identifier (Windows artifact) πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- DEVELOPMENT_PLAN.md | 1097 ----------------------------------------- EXAMPLE.md | 723 --------------------------- specs/ARCHITECTURE.md | 22 + specs/DECISIONS.md | 41 ++ 4 files changed, 63 insertions(+), 1820 deletions(-) delete mode 100644 DEVELOPMENT_PLAN.md delete mode 100644 EXAMPLE.md diff --git a/DEVELOPMENT_PLAN.md b/DEVELOPMENT_PLAN.md deleted file mode 100644 index 0fbb374..0000000 --- a/DEVELOPMENT_PLAN.md +++ /dev/null @@ -1,1097 +0,0 @@ -# Squirrel Development Plan (v1) - -Modular development plan with Rust daemon + Python Agent communicating via Unix socket IPC. - -## Technology Stack - -| Category | Technology | Notes | -|----------|------------|-------| -| **Storage** | SQLite + sqlite-vec | Local-first, vector search | -| **IPC Protocol** | JSON-RPC 2.0 | MCP-compatible, over Unix socket | -| **MCP SDK** | rmcp | Official Rust SDK (modelcontextprotocol/rust-sdk) | -| **CLI Framework** | clap | Rust CLI parsing | -| **Agent Framework** | PydanticAI | Python agent with tools | -| **LLM Client** | LiteLLM | Multi-provider support | -| **Embeddings** | OpenAI text-embedding-3-small | 1536-dim, API-based | -| **Build/Release** | dist (cargo-dist) | Generates Homebrew, MSI, installers | -| **Auto-update** | axoupdater | dist's official updater | -| **Python Packaging** | PyInstaller | Bundled, zero user deps | -| **Logging** | tracing (Rust), structlog (Python) | Structured logging | - -## Architecture Overview - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ RUST DAEMON β”‚ -β”‚ Log Watcher β†’ Events β†’ Episodes β†’ IPC β†’ Python Agent β”‚ -β”‚ SQLite/sqlite-vec storage β”‚ -β”‚ MCP Server (2 tools) β”‚ -β”‚ Thin CLI (passes to agent) β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - ↕ Unix socket IPC -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ PYTHON AGENT β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ Squirrel Agent (single LLM brain) β”‚ β”‚ -β”‚ β”‚ β”‚ β”‚ -β”‚ β”‚ Tools: β”‚ β”‚ -β”‚ β”‚ β”œβ”€β”€ Memory Tools β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ ingest_episode(events) β†’ memories β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ search_memories(query) β†’ results β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ get_task_context(task) β†’ relevant memories β”‚ β”‚ -β”‚ β”‚ β”‚ └── forget_memory(id) β”‚ β”‚ -β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β”‚ β”œβ”€β”€ Filesystem Tools β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ find_cli_configs() β†’ [claude, codex, ...] β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ read_file(path) β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ write_file(path, content) β”‚ β”‚ -β”‚ β”‚ β”‚ └── scan_project_logs(project_root) β†’ logs β”‚ β”‚ -β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β”‚ β”œβ”€β”€ Config Tools β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ get_mcp_config(cli) β†’ config β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ set_mcp_config(cli, server, config) β”‚ β”‚ -β”‚ β”‚ β”‚ β”œβ”€β”€ init_project(path, options) β”‚ β”‚ -β”‚ β”‚ β”‚ └── get/set_user_profile() β”‚ β”‚ -β”‚ β”‚ β”‚ β”‚ β”‚ -β”‚ β”‚ └── DB Tools β”‚ β”‚ -β”‚ β”‚ β”œβ”€β”€ query_memories(filters) β”‚ β”‚ -β”‚ β”‚ β”œβ”€β”€ add_memory(memory) β”‚ β”‚ -β”‚ β”‚ β”œβ”€β”€ update_memory(id, changes) β”‚ β”‚ -β”‚ β”‚ └── get_stats() β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ -β”‚ Entry points (all go through same agent): β”‚ -β”‚ β”œβ”€β”€ IPC: daemon sends Episode β†’ agent ingests β”‚ -β”‚ β”œβ”€β”€ IPC: MCP tool call β†’ agent retrieves β”‚ -β”‚ └── IPC: CLI command β†’ agent executes β”‚ -β”‚ β”‚ -β”‚ API Embeddings (text-embedding-3-small, 1536-dim) β”‚ -β”‚ 2-tier LLM: strong (ingest) + fast (compose, CLI, dedup) β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -## Core Insight: Episode Segmentation - -Not all sessions are coding tasks with success/failure outcomes. Sessions include architecture discussions, research, brainstorming - these produce valuable memories but don't fit the success/failure model. - -**Episode β†’ Segments β†’ Memories (single LLM call):** - -1. **Segment the episode** by kind (not by task success/failure) -2. **Extract memories** based on segment kind -3. **Only EXECUTION_TASK segments** get SUCCESS/FAILURE classification - -### Segment Kinds - -| Kind | Description | Outcome Field | Memory Output | -|------|-------------|---------------|---------------| -| `EXECUTION_TASK` | Coding, fixing bugs, running commands | `outcome`: SUCCESS / FAILURE / UNCERTAIN | lesson (with outcome), fact | -| `PLANNING_DECISION` | Architecture, design, tech choices | `resolution`: DECIDED / OPEN | fact, lesson (rationale), profile | -| `RESEARCH_LEARNING` | Learning, exploring docs, asking questions | `resolution`: ANSWERED / PARTIAL | fact, lesson | -| `DISCUSSION` | Brainstorming, market research, chat | (none) | profile, lesson (insights) | - -**Key rule:** SUCCESS/FAILURE only allowed on EXECUTION_TASK. Other kinds never output FAILURE. - -### Success/Failure Detection (EXECUTION_TASK only) - -**Success signals (require evidence):** -- AI says "done" / "complete" + User moves to next task β†’ SUCCESS -- Tests pass, build succeeds β†’ SUCCESS -- User says "thanks", "perfect", "works" β†’ SUCCESS - -**Failure signals (require evidence):** -- Same error reappears after attempted fix β†’ FAILURE -- User says "still broken", "that didn't work" β†’ FAILURE - -**No evidence β†’ UNCERTAIN** (conservative default) - -### Episode Ingestion Output Schema - -```json -{ - "episode_summary": "...", - "segments": [ - { - "id": "seg_1", - "kind": "EXECUTION_TASK", - "title": "Fix auth 500 on null user_id", - "event_range": [12, 33], - "outcome": { - "status": "SUCCESS", - "evidence": ["event#31 tests passed", "event#33 user confirmed"] - } - }, - { - "id": "seg_2", - "kind": "PLANNING_DECISION", - "title": "Choose sync conflict strategy", - "event_range": [34, 44], - "resolution": "DECIDED", - "decision": { - "choice": "server-wins", - "rationale": ["shared team DB", "simplicity"] - } - } - ], - "memories": [ - { - "memory_type": "fact", - "scope": "project", - "key": "project.db.engine", - "value": "PostgreSQL", - "text": "Project uses PostgreSQL 15 via Prisma.", - "evidence_source": "neutral", - "source_segments": ["seg_1"], - "confidence": 0.86 - }, - { - "memory_type": "fact", - "scope": "global", - "key": "user.preferred_style", - "value": "async_await", - "text": "User prefers async/await over callbacks.", - "evidence_source": "success", - "source_segments": ["seg_1"], - "confidence": 0.85 - }, - { - "memory_type": "lesson", - "scope": "project", - "outcome": "failure", - "text": "Validate user_id before DB insert to avoid 500s.", - "source_segments": ["seg_1"], - "confidence": 0.9 - }, - { - "memory_type": "fact", - "scope": "project", - "text": "Auth module handles JWT validation in middleware.", - "evidence_source": "neutral", - "source_segments": ["seg_1"], - "confidence": 0.75 - } - ] -} -``` - -**The LLM-decides-everything approach:** -- One LLM call per Episode (4-hour window) -- LLM segments by kind first, then extracts memories per segment -- No rules engine, no heuristics for task detection -- 100% passive - no user prompts or confirmations - -## Development Tracks - -``` -Phase 0: Scaffolding - | - v -+-------+-------+-------+-------+ -| | | | | -v v v v v -Track A Track B Track C Track D Track E -(Rust (Rust (Python (Python (MCP + -Storage) Daemon) Agent) Tools) CLI) - | | | | | - +---+---+ +---+---+ | - | | | - v v v - Week 1-2 Week 2-3 Week 3-4 - | | | - +-------+-------+-----------+ - | - v - Phase X - Hardening -``` - ---- - -## Phase 0 – Scaffolding - -### Rust Module (`agent/`) - -Dependencies: -- `tokio` (async runtime) -- `rusqlite` + `sqlite-vec` (storage) -- `serde`, `serde_json` (serialization, JSON-RPC 2.0) -- `notify` (file watching) -- `clap` (CLI framework) -- `rmcp` (official MCP SDK - modelcontextprotocol/rust-sdk) -- `tracing` (structured logging) -- `uuid`, `chrono` (ID, timestamps) - -Directory structure: -``` -agent/src/ -β”œβ”€β”€ main.rs -β”œβ”€β”€ lib.rs -β”œβ”€β”€ daemon.rs # Lazy start, idle shutdown -β”œβ”€β”€ watcher.rs # Log file watching -β”œβ”€β”€ storage.rs # SQLite + sqlite-vec -β”œβ”€β”€ events.rs # Event/Episode structs -β”œβ”€β”€ config.rs # Config management -β”œβ”€β”€ mcp.rs # MCP server -└── ipc.rs # Unix socket client -``` - -### Python Module (`memory_service/`) - -Dependencies: -- `pydantic-ai` (agent framework) -- `litellm` (multi-provider LLM support) -- `openai` (embeddings API client) -- `pydantic` (schemas) -- `structlog` (structured logging) - -Directory structure: -``` -memory_service/ -β”œβ”€β”€ squirrel_memory/ -β”‚ β”œβ”€β”€ __init__.py -β”‚ β”œβ”€β”€ server.py # Unix socket server -β”‚ β”œβ”€β”€ agent.py # Unified agent -β”‚ β”œβ”€β”€ tools/ -β”‚ β”‚ β”œβ”€β”€ __init__.py -β”‚ β”‚ β”œβ”€β”€ memory.py # ingest, search, get_context, forget -β”‚ β”‚ β”œβ”€β”€ filesystem.py # find_cli_configs, read/write_file -β”‚ β”‚ β”œβ”€β”€ config.py # init_project, mcp_config, user_profile -β”‚ β”‚ └── db.py # query, add, update memories -β”‚ β”œβ”€β”€ embeddings.py # API embeddings (OpenAI, etc.) -β”‚ β”œβ”€β”€ retrieval.py # Similarity search -β”‚ └── schemas/ -└── tests/ -``` - ---- - -## Track A – Rust: Storage + Config + Events - -### A1. Storage layer (`storage.rs`) - -SQLite + sqlite-vec initialization: -```sql --- memories table (squirrel.db) -CREATE TABLE memories ( - id TEXT PRIMARY KEY, - project_id TEXT, -- NULL for global/user-scope memories - memory_type TEXT NOT NULL, -- lesson | fact | profile - - -- For lessons (task-level patterns/pitfalls) - outcome TEXT, -- success | failure | uncertain (lesson only) - - -- For facts - fact_type TEXT, -- knowledge | process (fact only, optional) - key TEXT, -- declarative key: project.db.engine, user.preferred_style - value TEXT, -- declarative value: PostgreSQL, async_await - evidence_source TEXT, -- success | failure | neutral | manual (fact only) - support_count INTEGER, -- approx episodes that support this fact - last_seen_at TEXT, -- last episode where this was seen - - -- Content - text TEXT NOT NULL, -- human-readable content - embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) - metadata TEXT, -- JSON: anchors (files, components, endpoints) - - -- Confidence - confidence REAL NOT NULL, - importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low - - -- Lifecycle - status TEXT NOT NULL DEFAULT 'active', -- active | inactive | invalidated - valid_from TEXT NOT NULL, - valid_to TEXT, - superseded_by TEXT, - - -- Audit - user_id TEXT NOT NULL DEFAULT 'local', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL -); - --- events table -CREATE TABLE events ( - id TEXT PRIMARY KEY, - repo TEXT NOT NULL, - kind TEXT NOT NULL, -- user | assistant | tool | system - content TEXT NOT NULL, - file_paths TEXT, -- JSON array - ts TEXT NOT NULL, - processed INTEGER DEFAULT 0 -); - --- user_profile table (structured, not memories) -CREATE TABLE user_profile ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL, - source TEXT NOT NULL, -- explicit | inferred - confidence REAL, - updated_at TEXT NOT NULL -); - --- memory_history table (audit trail) -CREATE TABLE memory_history ( - id TEXT PRIMARY KEY, - memory_id TEXT NOT NULL, - old_content TEXT, - new_content TEXT NOT NULL, - event TEXT NOT NULL, -- ADD | UPDATE | DELETE - created_at TEXT NOT NULL, - FOREIGN KEY (memory_id) REFERENCES memories(id) -); - --- memory_access_log table (debugging) -CREATE TABLE memory_access_log ( - id TEXT PRIMARY KEY, - memory_id TEXT NOT NULL, - access_type TEXT NOT NULL, -- search | get_context | list - query TEXT, - score REAL, - metadata TEXT, -- JSON - accessed_at TEXT NOT NULL, - FOREIGN KEY (memory_id) REFERENCES memories(id) -); -``` - -### A1.1 Declarative Key Registry - -Declarative keys are the "rigid backbone" for critical facts. Same key + different value triggers deterministic invalidation (no LLM needed). - -**Project-scoped keys** (project_id set): -``` -project.db.engine # PostgreSQL, MySQL, SQLite -project.db.version # 15, 8.0, 3.x -project.api.framework # FastAPI, Express, Rails -project.ui.framework # React, Vue, Svelte -project.language.main # Python, TypeScript, Go -project.test.command # pytest, npm test, go test -project.build.command # npm run build, cargo build -project.auth.method # JWT, session, OAuth -project.package_manager # npm, pnpm, yarn, pip, uv -project.orm # Prisma, SQLAlchemy, TypeORM -``` - -**User-scoped keys** (project_id = NULL, stored in global db): -``` -user.preferred_style # async_await, callbacks, sync -user.preferred_language # Python, TypeScript, Go -user.strict_null_checks # true, false -user.comment_style # minimal, detailed, jsdoc -user.error_handling # exceptions, result_types, errors -``` - -**Key behaviors:** -- Keys are optional - most facts remain free-text -- LLM extracts key during ingestion when pattern matches registry -- Same key + different value β†’ deterministic invalidation of old fact -- Keys enable fast lookup without vector search - -### A1.2 Memory Lifecycle (Forget Mechanism) - -**Status values:** -- `active` - Normal, appears in retrieval -- `inactive` - Soft deleted by user (`sqrl forget`), recoverable, hidden from retrieval -- `invalidated` - Superseded by newer fact, keeps history, hidden from retrieval - -**Validity fields (for facts):** -- `valid_from` - When this became true (default: created_at) -- `valid_to` - When it stopped being true (null = still valid) -- `superseded_by` - ID of memory that replaced this - -**Retrieval filter:** -```sql -WHERE status = 'active' - AND (valid_to IS NULL OR valid_to > datetime('now')) -``` - -**Contradiction detection (during ingestion):** - -For facts with declarative `key`: -- Same key + different value β†’ invalidate old (status='invalidated', valid_to=now, superseded_by=new_id) -- Deterministic, no LLM needed -- Example: key=project.db.engine, old value=MySQL, new value=PostgreSQL β†’ invalidate old - -For free-text facts without key: -- LLM judges semantic conflict between new fact and similar existing facts -- High confidence conflict β†’ invalidate old -- Low confidence β†’ keep both, let retrieval handle via recency weighting - -**No cascade delete:** -- Invalidating a fact does NOT delete related lessons -- Related lessons get flagged in retrieval output: `dependency_changed: true` - -**CLI behavior:** -- `sqrl forget ` β†’ status='inactive' (soft delete, recoverable) -- `sqrl forget "deprecated API"` β†’ search + confirm + soft delete matching memories - -**v1 limitations (documented for users):** -- No TTL/auto-expiration (manual forget only) -- No hard delete/purge (data remains in SQLite file) -- Free-text contradiction detection depends on LLM, may have false positives - -### A2. Event model (`events.rs`) - -Event struct (normalized, CLI-agnostic): -- id, repo, kind (User|Assistant|Tool|System), content, file_paths, ts, processed - -Episode struct (in-memory only): -- id, repo, start_ts, end_ts, events - -Episode batching: 4-hour time window OR 50 events max (whichever first) - -### A3. Config (`config.rs`) - -Paths: -- `~/.sqrl/` (global) -- `/.sqrl/` (project) - -Files: -- `squirrel.db` - memories -- `config.toml` - settings - -Config fields: -- agents.claude_code, agents.codex_cli, agents.gemini_cli, agents.cursor (CLI selection) -- llm.provider, llm.api_key, llm.base_url -- llm.strong_model, llm.fast_model (2-tier design) -- embedding.provider, embedding.model (default: openai/text-embedding-3-small) -- daemon.idle_timeout_hours (default: 2) -- daemon.socket_path - -Projects registry (`~/.sqrl/projects.json`): -- List of initialized project paths for `sqrl sync` - ---- - -## Track B – Rust: Daemon + Watcher + IPC - -### B1. Daemon (`daemon.rs`) - -**Lazy start:** Daemon starts on first `sqrl` command, not on boot. - -**Idle shutdown:** Stop after N hours of no log activity. Flush pending Episodes on shutdown. - -Startup sequence: -1. Check if already running (socket exists and responds) -2. If not: spawn daemon process -3. Load projects registry -4. For each project, spawn watcher -5. Spawn Python Agent as child process -6. Connect via Unix socket - -### B2. Log Watchers (`watcher.rs`) - -Multi-CLI log discovery: -- `~/.claude/projects/**/*.jsonl` (Claude Code) -- `~/.codex-cli/logs/**/*.jsonl` (Codex CLI) -- `~/.gemini/logs/**/*.jsonl` (Gemini CLI) -- `~/.cursor-tutor/logs/**/*.jsonl` (Cursor) - -Line parsers for each CLI format β†’ normalized Event - -### B3. Episode Batching - -Flush triggers: -- 50 events reached OR -- 4 hours elapsed OR -- Daemon shutdown (graceful flush) - -On flush: create Episode, send to Python via IPC, mark events processed - -### B4. IPC Client (`ipc.rs`) - -Unix socket client with JSON-RPC 2.0 protocol (MCP-compatible): -```json -{"jsonrpc": "2.0", "method": "ingest_episode", "params": {"episode": {...}}, "id": 1} -{"jsonrpc": "2.0", "result": {"memories_created": 3}, "id": 1} -``` - -Methods: -- `ingest_episode` - Process episode, extract memories -- `get_task_context` - MCP tool call -- `search_memories` - MCP tool call -- `execute_command` - CLI natural language/direct command - ---- - -## Track C – Python: Unified Agent - -### C1. Unix Socket Server (`server.py`) - -Socket at `/tmp/sqrl_agent.sock` - -Single entry point - all requests go to agent: -```python -async def handle_connection(reader, writer): - request = await read_json(reader) - result = await agent.execute(request["params"]) - await write_json(writer, {"result": result, "id": request["id"]}) -``` - -### C2. Squirrel Agent (`agent.py`) - -Single LLM-powered agent with tools using PydanticAI framework. Uses 2-tier LLM design: - -| Task | Model Tier | Default | -|------|------------|---------| -| Episode Ingestion | strong_model | gemini-2.5-pro | -| Context Compose | fast_model | gemini-3-flash | -| CLI Interpretation | fast_model | gemini-3-flash | -| Near-duplicate Check | fast_model | gemini-3-flash | - -```python -class SquirrelAgent: - def __init__(self): - self.tools = [ - # Memory tools - ingest_episode, - search_memories, - get_task_context, - forget_memory, - # Filesystem tools - find_cli_configs, - read_file, - write_file, - scan_project_logs, - # Config tools - init_project, - get_mcp_config, - set_mcp_config, - get_user_profile, - set_user_profile, - # DB tools - query_memories, - add_memory, - update_memory, - get_stats, - ] - - async def execute(self, input: dict) -> dict: - """ - Single entry point for all operations. - Agent decides which tools to use based on input. - """ - # For Episode ingestion (from daemon) - if input.get("type") == "episode": - return await self.ingest(input["episode"]) - - # For MCP calls - if input.get("type") == "mcp": - return await self.handle_mcp(input["tool"], input["args"]) - - # For CLI commands (natural language or direct) - return await self.handle_command(input.get("command", "")) -``` - -### C3. Episode Ingestion (via ingest_episode tool) - -LLM analyzes entire Episode in ONE call (segment-first approach): - -1. **Segment by kind** (not by success/failure): - - EXECUTION_TASK - coding, fixing, running commands - - PLANNING_DECISION - architecture, design choices - - RESEARCH_LEARNING - learning, exploring docs - - DISCUSSION - brainstorming, chat - -2. **For EXECUTION_TASK segments only**, classify outcome: - - SUCCESS (with evidence: tests passed, user confirmed, etc.) - - FAILURE (with evidence: error persists, user says "didn't work") - - UNCERTAIN (no clear evidence - conservative default) - -3. **Extract memories based on segment kind:** - - EXECUTION_TASK β†’ lesson (with outcome), fact (knowledge discovered) - - PLANNING_DECISION β†’ fact (decisions), lesson (rationale for rejected options), profile - - RESEARCH_LEARNING β†’ fact (knowledge), lesson (key learnings) - - DISCUSSION β†’ profile (user preferences), lesson (insights) - -4. **Detect user frustration signals** and boost importance: - - Swear words, angry language β†’ `importance: critical` - - Repeated complaints ("again", "still broken") β†’ `importance: high` - - Mild frustration β†’ `importance: medium` (default) - - Add `metadata.user_frustration`: none | mild | moderate | severe - - Frustration signals make associated failure lessons higher priority in retrieval - -5. **Contradiction check for facts:** - - Extract semantic_key if possible (db.engine, api.framework, etc.) - - Check existing facts with same key β†’ invalidate old if conflict - - Free-text facts β†’ LLM judges semantic conflict - -6. **Near-duplicate check** before ADD (0.9 similarity threshold) - -UUIDβ†’integer mapping when showing existing memories to LLM (prevents hallucination). - -### C4. Schemas (`schemas/`) - -**Memory schema:** -- id, project_id (NULL for global), memory_type (lesson | fact | profile) -- text (human-readable content), embedding (1536-dim) -- metadata (JSON: anchors - files, components, endpoints) -- confidence, importance (critical | high | medium | low) -- user_id, created_at, updated_at - -**Lesson-specific fields:** -- outcome: success | failure | uncertain - -**Fact-specific fields:** -- fact_type: knowledge | process (optional) -- key: declarative key (project.db.engine, user.preferred_style, etc.) -- value: declarative value (PostgreSQL, async_await, etc.) -- evidence_source: success | failure | neutral | manual -- support_count: number of episodes that support this fact -- last_seen_at: timestamp of last episode where seen - -**Lifecycle fields (all types):** -- status: active | inactive | invalidated -- valid_from: timestamp (when this became true) -- valid_to: timestamp | null (when it stopped being true) -- superseded_by: memory_id | null (for invalidated facts) - -**UserProfile schema (structured identity, not memories):** -- key, value, source (explicit|inferred), confidence, updated_at -- Examples: name, role, experience_level, company, primary_use_case - ---- - -## Track D – Python: Tools Implementation - -### D1. Memory Tools (`tools/memory.py`) - -**ingest_episode(events):** LLM analysis, task segmentation, outcome classification, memory extraction - -**search_memories(query, filters):** Embed query, sqlite-vec search, return ranked results - -**get_task_context(task, budget):** -1. Vector search retrieves top 20 candidates -2. LLM (fast_model) reranks + composes context prompt: - - Selects relevant memories - - Resolves conflicts between memories - - Merges related memories - - Generates structured prompt with memory IDs -3. Returns ready-to-inject context prompt within token budget - -**Context output structure:** -```json -{ - "project_facts": [ - {"key": "project.db.engine", "value": "PostgreSQL", "text": "..."}, - {"key": "project.api.framework", "value": "FastAPI", "text": "..."} - ], - "user_prefs": [ - {"key": "user.preferred_style", "value": "async_await", "text": "..."} - ], - "lessons": [ - {"outcome": "failure", "text": "Validate user_id before DB insert...", "id": "mem_123"}, - {"outcome": "success", "text": "Use repository pattern for DB access...", "id": "mem_456"} - ], - "process_facts": [ - {"text": "Auth module handles JWT validation in middleware.", "id": "mem_789"} - ], - "profile": { - "name": "Alice", - "role": "Backend Developer", - "experience_level": "Senior" - } -} -``` - -**forget_memory(id_or_query):** -- If ID: set status='inactive' (soft delete, recoverable) -- If natural language query: search β†’ confirm with user β†’ soft delete matches - -**export_memories(filters, format):** Export memories as JSON for sharing/backup - -**import_memories(data):** Import memories from JSON - -### D2. Filesystem Tools (`tools/filesystem.py`) - -**find_cli_configs():** Scan for ~/.claude, ~/.codex-cli, ~/.gemini, etc. - -**scan_project_logs(project_root, token_limit):** Find logs mentioning project files, return within token limit - -**read_file(path), write_file(path, content):** Basic file operations for config management - -### D3. Config Tools (`tools/config.py`) - -**init_project(path, skip_history):** -1. Create `/.sqrl/squirrel.db` -2. If not skip_history: scan_project_logs β†’ ingest -3. For each enabled CLI in `config.agents`: - - Configure MCP (add Squirrel server to CLI's MCP config) - - Inject instruction text to agent file (CLAUDE.md, AGENTS.md, GEMINI.md, .cursor/rules/) -4. Register project in `~/.sqrl/projects.json` - -**sync_projects():** -1. Read enabled CLIs from `config.agents` -2. For each project in `~/.sqrl/projects.json`: - - For each enabled CLI not yet configured in that project: - - Configure MCP - - Inject instruction text - -**get_mcp_config(cli), set_mcp_config(cli, server, config):** Read/write MCP config files - -**get_agent_instructions(cli), set_agent_instructions(cli, content):** Read/write agent instruction files - -**get_user_profile(), set_user_profile(key, value):** Manage user_profile table - -### D3.1 MCP Config Locations - -| CLI | MCP Config Location | -|-----|---------------------| -| Claude Code | `~/.claude.json` or `/.mcp.json` | -| Codex CLI | `codex mcp add-server` command | -| Gemini CLI | `/.gemini/settings.json` | -| Cursor | `~/.cursor/mcp.json` or `/.cursor/mcp.json` | - -MCP server definition: -```json -{ - "mcpServers": { - "squirrel": { - "command": "sqrl-daemon", - "args": ["--mcp"], - "disabled": false - } - } -} -``` - -### D3.2 Agent Instruction Files - -| CLI | Instruction File | -|-----|------------------| -| Claude Code | `/CLAUDE.md` | -| Codex CLI | `/AGENTS.md` | -| Gemini CLI | `/GEMINI.md` | -| Cursor | `/.cursor/rules/squirrel.mdc` | - -Instruction text to inject: -```markdown -## Squirrel Memory System - -This project uses Squirrel for persistent memory across sessions. - -ALWAYS call `squirrel_get_task_context` BEFORE: -- Fixing bugs (to check if this bug was seen before) -- Refactoring code (to get patterns that worked/failed) -- Adding features touching existing modules -- Debugging errors that seem familiar - -DO NOT call for: -- Simple typo fixes -- Adding comments -- Formatting changes -``` - -### D4. DB Tools (`tools/db.py`) - -**query_memories(filters):** Direct DB query with filtering - -**add_memory(memory):** Insert + log to history - -**update_memory(id, changes):** Update + log old/new to history - -**get_stats():** Memory counts, access stats, etc. - -### D5. Embeddings (`embeddings.py`) - -API-based embeddings via OpenAI (text-embedding-3-small, 1536-dim). - -Supports multiple providers via config. Batch embedding with retry logic. - -### D6. Retrieval (`retrieval.py`) - -Similarity search via sqlite-vec. - -Scoring: `w_sim * similarity + w_imp * importance_weight + w_rec * recency + w_frust * frustration_boost` -- importance_weight: critical=1.0, high=0.75, medium=0.5, low=0.25 -- frustration_boost: severe=0.3, moderate=0.2, mild=0.1, none=0.0 - -Frustration-flagged memories surface earlier to prevent recurring pain points. - -Access logging to memory_access_log table. - ---- - -## Track E – MCP + CLI - -### E1. MCP Server (`mcp.rs`) - -Uses `rmcp` (official MCP SDK from modelcontextprotocol/rust-sdk). - -2 tools: -``` -squirrel_get_task_context - - project_root: string (required) - - task: string (required) - - context_budget_tokens: integer (default: 400) - -squirrel_search_memory - - project_root: string (required) - - query: string (required) - - top_k: integer (default: 10) -``` - -For trivial queries, return empty fast (<20ms). - -### E2. CLI (`cli.rs`) - -Thin shell that passes to Python agent: - -```rust -fn main() { - ensure_daemon_running()?; - - let args: Vec = std::env::args().skip(1).collect(); - let input = args.join(" "); - - let response = ipc_client.send("agent_execute", { - "type": "command", - "command": input - }); - - println!("{}", response); -} -``` - -Supports both natural language and direct commands: -- `sqrl "setup this project"` β†’ agent interprets -- `sqrl init --skip-history` β†’ agent interprets -- `sqrl config` β†’ interactive CLI selection -- `sqrl sync` β†’ update all projects with new CLI configs -- `sqrl update` β†’ auto-update via axoupdater -- `sqrl export ` β†’ export memories as JSON -- `sqrl import ` β†’ import memories - ---- - -## Phase X – Hardening - -### Logging & Observability -- Structured logging (Rust: `tracing`, Python: `structlog`) -- Metrics: events/episodes/memories processed, latency - -### Testing -- Unit tests: storage, events, agent tools -- Integration tests: full flow from log to memory to retrieval - -### Build & Release (dist) - -Uses `dist` (cargo-dist) as single release orchestrator: -- Builds Rust daemon for Mac/Linux/Windows -- Builds Python agent via PyInstaller (as dist workspace member) -- Generates installers: Homebrew, MSI, shell/powershell scripts - -### Cross-Platform Installation - -| Platform | Primary | Fallback | -|----------|---------|----------| -| Mac | `brew install sqrl` | install script | -| Linux | `brew install sqrl` | install script, AUR, nixpkg | -| Windows | MSI installer | winget, install script | - -Windows note: MSI recommended over raw .exe to reduce SmartScreen/AV friction. - -### Auto-Update (axoupdater) - -- `sqrl update` uses axoupdater (dist's official updater) -- Updates both Rust daemon and Python agent together -- Reads dist install receipt to determine installed version/source - ---- - -## Timeline Summary - -| Week | Track A | Track B | Track C | Track D | Track E | -|------|---------|---------|---------|---------|---------| -| 0 | Scaffold | Scaffold | Scaffold | - | - | -| 1 | Storage, Events, Config | Daemon start | Agent skeleton | - | - | -| 2 | - | Watchers, IPC, Batching | - | Memory tools | - | -| 3 | - | Integration | - | Filesystem, Config tools | MCP server | -| 4 | - | - | - | DB tools, Retrieval | CLI | -| 5+ | - | Hardening | - | Hardening | Cross-platform | - ---- - -## Team Assignment (3 developers) - -**Developer 1 (Rust focus):** -- Phase 0: Rust scaffold -- Track A: All -- Track B: All -- Track E: MCP server, CLI - -**Developer 2 (Python focus):** -- Phase 0: Python scaffold -- Track C: All -- Track D: All - -**Developer 3 (Full-stack / Integration):** -- Phase 0: CI, docs -- Agent prompts -- Cross-platform packaging -- Phase X: Testing, documentation - ---- - -## v1 Scope - -- Passive log watching (4 CLIs) -- Episode segmentation (EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION) -- Success detection for EXECUTION_TASK only (SUCCESS/FAILURE/UNCERTAIN with evidence) -- Unified Python agent with tools -- Natural language CLI -- MCP integration (2 tools) -- Lazy daemon (start on demand, stop after 2hr idle) -- Retroactive log ingestion on init (token-limited) -- 3 memory types (lesson, fact, profile) with scope flag -- Declarative keys for facts (project.* and user.*) with deterministic conflict detection -- Evidence source tracking for facts (success/failure/neutral/manual) -- Memory lifecycle: status (active/inactive/invalidated) + validity (valid_from/valid_to) -- Fact contradiction detection (declarative key match + LLM for free-text) -- Soft delete (`sqrl forget`) - no hard purge -- Near-duplicate deduplication (0.9 threshold) -- Frustration detection (swear words, anger β†’ boost importance, prioritize in retrieval) -- Cross-platform (Mac, Linux, Windows) -- Export/import memories (JSON) -- Auto-update (`sqrl update`) -- Memory consolidation -- Retrieval debugging tools -- CLI selection (`sqrl config`) + MCP wiring + agent instruction injection -- `sqrl sync` for updating existing projects with new CLIs - -**v1 limitations:** -- No TTL/auto-expiration (manual forget only) -- No hard delete (soft delete only, data remains in SQLite) -- Free-text contradiction detection may have false positives - -## v2 Scope (Future) - -- Team/cloud sync (group.db, share command, team management) -- Deep CLI integrations (Claude Code hooks, Cursor extension) -- Team analytics dashboard -- Memory marketplace -- TTL / temporary memory (auto-expiration) -- Hard purge for privacy/compliance -- Memory linking + evolution (A-MEM style) -- Richer conflict detection with schema/key registry -- `get_memory_history` API for debugging invalidation chains - ---- - -## v2 Architecture: Team/Cloud - -### Overview - -v2 adds team memory sharing via `group.db` - a separate database that syncs with cloud. Individual memories stay in `squirrel.db` (local-only), team memories go to `group.db` (synced). - -### 3-Layer Database Architecture (v2) - -| Layer | DB File | Contents | Sync | -|-------|---------|----------|------| -| **Global** | `~/.sqrl/squirrel.db` | lesson, fact, profile (scope=global) | Local only | -| **Project** | `/.sqrl/squirrel.db` | lesson, fact (scope=project) | Local only | -| **Team** | `~/.sqrl/group.db` + `/.sqrl/group.db` | Shared memories (owner=team) | Cloud | - -### Memory Schema (v2) - -Additional fields for team support: - -```sql -CREATE TABLE memories ( - -- ... all v1 fields ... - owner TEXT NOT NULL DEFAULT 'individual', -- individual | team - team_id TEXT, -- team identifier (for owner=team) - contributed_by TEXT, -- user who shared (for owner=team) - source_memory_id TEXT -- original memory ID (if promoted to team) -); -``` - -### Team Tools (v2) - -**share_memory(memory_id):** Promote individual memory to team -1. Read from `squirrel.db` -2. Copy to `group.db` with `owner: team` -3. Set `contributed_by`, `source_memory_id` -4. Sync triggers cloud upload - -**team_join(team_id), team_leave():** Team membership management - -**team_export(filters):** Export team memories for offline/backup - -### Sync Architecture - -**Local-first with background sync:** -- `group.db` is local copy, always available -- Background process syncs with cloud -- Users never wait for network -- Conflict resolution: last-write-wins with vector clocks - -**Scaling considerations (from research):** -- Individual user (6 months): ~6MB (900 memories) -- Team (10,000 users): ~6GB if full sync - NOT viable - -**Hybrid approach for large teams:** -| Team Size | Strategy | -|-----------|----------| -| Small (<100) | Full sync - all team memories in local group.db | -| Medium (100-1000) | Partial sync - recent + relevant memories locally | -| Large (1000+) | Cloud-primary - query cloud, cache locally | - -**Reference:** Figma, Notion, Linear all use server-first or partial sync. Nobody syncs everything locally at scale. - -### Team Commands (v2) - -```bash -sqrl team join # Join team, start syncing group.db -sqrl team leave # Leave team, remove group.db -sqrl share # Promote individual memory to team -sqrl share --all # Share all individual memories to team -sqrl team export # Export team memories to local -``` - -### Migration Paths - -**Local β†’ Cloud (user subscribes):** -```bash -sqrl share --all # Promotes all individual memories to team -``` - -**Cloud β†’ Local (team exports):** -```bash -sqrl team export --project # Downloads team memories to local squirrel.db -``` - -### Config (v2) - -```toml -# ~/.sqrl/config.toml -[team] -id = "abc-team-id" -sync_interval_seconds = 300 # 5 min background sync -sync_strategy = "full" # full | partial | cloud-primary -``` - -### Retrieval (v2) - -Context retrieval queries BOTH databases: -1. `squirrel.db` (individual memories) -2. `group.db` (team memories) -3. LLM reranks combined results - -Team memories get attribution: "From team member Alice" - ---- - -## Patterns from Competitor Analysis - -| Pattern | Source | Location | -|---------|--------|----------| -| UUIDβ†’integer mapping for LLM | mem0 | Agent ingest | -| History tracking (old/new content) | mem0 | memory_history table | -| Structured exceptions | mem0 | All tools | -| Soft-delete (state column) | mem0 | memories table | -| Access logging | mem0 | memory_access_log table | -| Success detection | claude-cache | Agent ingest | -| Pitfall learning | claude-cache | Memory types | -| Unified agent with tools | letta | Agent architecture | -| Session Q&A tracking | cognee | memory_access_log | - -**Key insight:** Passive learning requires success detection. We let the LLM decide task outcomes instead of building a rules engine. diff --git a/EXAMPLE.md b/EXAMPLE.md deleted file mode 100644 index 17ef83d..0000000 --- a/EXAMPLE.md +++ /dev/null @@ -1,723 +0,0 @@ -# Squirrel: Complete Process Walkthrough - -Detailed example demonstrating the entire Squirrel data flow from installation to personalized AI context. - -## Core Concept - -Squirrel watches AI tool logs, groups events into **Episodes** (4-hour time windows), and sends them to a unified Python Agent for analysis: - -1. **Segment by Kind** - Not all sessions are coding tasks. Identify segment type first: - - `EXECUTION_TASK` - coding, fixing bugs, running commands - - `PLANNING_DECISION` - architecture, design, tech choices - - `RESEARCH_LEARNING` - learning, exploring docs - - `DISCUSSION` - brainstorming, chat -2. **Classify Outcomes** - Only for EXECUTION_TASK: SUCCESS | FAILURE | UNCERTAIN (with evidence) -3. **Detect Frustration** - Swear words, anger β†’ boost memory importance -4. **Extract Memories** - Based on segment kind, not just success/failure - -Episode = batch of events from same repo within 4-hour window (internal batching, not a product concept). - -**The key insight:** Not every session is a "task" with success/failure. Architecture discussions, research, and chat produce valuable memories without outcomes. We segment first, then extract appropriately. - ---- - -## Scenario - -Developer "Alice" working on `inventory-api` (FastAPI project). - -Alice's coding preferences: -- Type hints everywhere -- pytest with fixtures -- Async/await patterns - ---- - -## Phase 1: Installation & Setup - -### Step 1.1: Install Squirrel - -```bash -# Universal install (Mac/Linux/Windows) -curl -sSL https://sqrl.dev/install.sh | sh - -# Or platform-specific -brew install sqrl # Mac -winget install sqrl # Windows -``` - -### Step 1.2: CLI Selection (First Run) - -```bash -sqrl config -# Interactive prompt: select which CLIs you use -# β†’ Claude Code: yes -# β†’ Codex CLI: yes -# β†’ Gemini CLI: no -# β†’ Cursor: yes -``` - -This stores CLI selection in `~/.sqrl/config.toml`: -```toml -[agents] -claude_code = true -codex_cli = true -gemini_cli = false -cursor = true -``` - -### Step 1.3: Project Initialization - -```bash -cd ~/projects/inventory-api -sqrl init -``` - -What happens: -1. First `sqrl` command auto-starts daemon (lazy start) -2. `sqrl init` triggers agent via IPC -3. Creates `.sqrl/squirrel.db` for project memories -4. Agent scans for CLI log folders containing this project -5. Agent asks: ingest historical logs? (token-limited, not time-limited) -6. For each enabled CLI (from `config.agents`): - - Configures MCP (adds Squirrel server to CLI's MCP config) - - Injects instruction text to agent file (CLAUDE.md, AGENTS.md, .cursor/rules/) -7. Registers project in `~/.sqrl/projects.json` - -File structure after init: -``` -~/.sqrl/ -β”œβ”€β”€ config.toml # API keys, settings, CLI selection -β”œβ”€β”€ squirrel.db # Global memories (lesson, fact, profile with scope=global) -β”œβ”€β”€ projects.json # List of initialized projects (for sqrl sync) -└── logs/ # Daemon logs - -~/projects/inventory-api/ -β”œβ”€β”€ .sqrl/ -β”‚ └── squirrel.db # Project memories (lesson, fact with scope=project) -β”œβ”€β”€ CLAUDE.md # ← Squirrel instructions injected -└── AGENTS.md # ← Squirrel instructions injected -``` - -### Step 1.4: Agent Instruction Injection - -For each enabled CLI, Squirrel adds this block to the agent instruction file: - -```markdown -## Squirrel Memory System - -This project uses Squirrel for persistent memory across sessions. - -ALWAYS call `squirrel_get_task_context` BEFORE: -- Fixing bugs (to check if this bug was seen before) -- Refactoring code (to get patterns that worked/failed) -- Adding features touching existing modules -- Debugging errors that seem familiar - -DO NOT call for: -- Simple typo fixes -- Adding comments -- Formatting changes -``` - -This increases the probability that AI tools will call Squirrel MCP tools at the right moments. - -### Step 1.5: Syncing New CLIs - -Weeks later, Alice enables Cursor globally: - -```bash -sqrl config # select Cursor - -# Update all existing projects -sqrl sync -# β†’ Adds MCP config + instructions for Cursor to all registered projects -``` - -### Step 1.6: Natural Language CLI - -The agent handles all CLI commands: - -```bash -sqrl "what do you know about auth here" -sqrl "show my coding style" -sqrl "I prefer functional programming" -sqrl "configure gemini cli to use squirrel" -sqrl "forget the memory about deprecated API" -``` - -Or direct commands: -```bash -sqrl search "database patterns" -sqrl status -sqrl config set llm.model claude-sonnet -sqrl sync # Update all projects with new CLI configs -``` - ---- - -## Phase 2: Learning (Passive Collection) - -### Step 2.1: Alice Codes (Two Tasks in One Session) - -``` -# Task 1: Add endpoint (SUCCESS) -Alice: "Add a new endpoint to get inventory items by category" -Claude Code: "I'll create a GET endpoint..." -Alice: "Use async def, add type hints, and write a pytest fixture" -Claude Code: [Revises code with async, types, fixture] -Alice: "Perfect, tests pass!" - -# Task 2: Fix auth bug (FAILURE then SUCCESS, with frustration) -Alice: "There's an auth loop bug when tokens expire" -Claude Code: "Let me check localStorage..." -[Error persists] -Alice: "Still broken" -Claude Code: "Let me try checking cookies..." -[Error persists] -Alice: "This is so frustrating, we've been going in circles!" -Claude Code: "I think the issue is in useEffect cleanup..." -[Implements fix] -Alice: "That fixed it, thanks!" -``` - -### Step 2.2: Rust Daemon Watches Logs - -Daemon watches log files from all supported CLIs: -``` -~/.claude/projects/**/*.jsonl # Claude Code -~/.codex-cli/logs/**/*.jsonl # Codex CLI -~/.gemini/logs/**/*.jsonl # Gemini CLI -``` - -Parses into normalized Events: - -```rust -let event = Event { - id: "evt_001", - repo: "/Users/alice/projects/inventory-api", - kind: "user", // user | assistant | tool | system - content: "Use async def, add type hints...", - file_paths: vec![], - ts: "2025-11-25T10:01:00Z", - processed: false, -}; -storage.save_event(event)?; -``` - -### Step 2.3: Episode Batching - -Episodes flush on **4-hour time window** OR **50 events max** (whichever comes first): - -```rust -fn should_flush_episode(buffer: &EventBuffer) -> bool { - let window_hours = 4; - let max_events = 50; - - buffer.events.len() >= max_events || - buffer.oldest_event_age() >= Duration::hours(window_hours) -} - -// Flush triggers IPC call to Python Agent -fn flush_episode(repo: &str, events: Vec) { - let episode = Episode { - id: generate_uuid(), - repo: repo.to_string(), - start_ts: events.first().ts, - end_ts: events.last().ts, - events: events, - }; - - ipc_client.send(json!({ - "method": "ingest_episode", - "params": { "episode": episode }, - "id": 1 - })); -} -``` - ---- - -## Phase 3: Memory Extraction (Python Agent) - -### Step 3.1: Agent Analyzes Episode (Segment-First Approach) - -The unified agent receives the Episode and uses segment-first analysis: - -```python -async def ingest_episode(episode: dict) -> dict: - # Build context from events - context = "\n".join([ - f"[{e['kind']}] {e['content']}" - for e in episode["events"] - ]) - - # LLM analyzes: segments first, then memories in ONE call - response = await llm.call(INGEST_PROMPT.format(context=context)) - - return { - "segments": [ - { - "id": "seg_1", - "kind": "EXECUTION_TASK", - "title": "Add category endpoint", - "event_range": [0, 4], - "outcome": { - "status": "SUCCESS", - "evidence": ["User said 'Perfect, tests pass!'"] - } - }, - { - "id": "seg_2", - "kind": "EXECUTION_TASK", - "title": "Fix auth loop bug", - "event_range": [5, 10], - "outcome": { - "status": "SUCCESS", - "evidence": ["User said 'That fixed it, thanks!'"] - }, - "user_frustration": "moderate" - } - ], - "memories": [ - { - "memory_type": "fact", - "scope": "global", - "key": "user.preferred_style", - "value": "async_await", - "text": "Prefers async/await with type hints for all handlers", - "evidence_source": "success", - "source_segments": ["seg_1"], - "confidence": 0.9 - }, - { - "memory_type": "lesson", - "outcome": "failure", - "scope": "project", - "text": "Auth token refresh loops are NOT caused by localStorage or cookies - check useEffect cleanup first", - "importance": "high", - "metadata": {"user_frustration": "moderate"}, - "source_segments": ["seg_2"], - "confidence": 0.9 - }, - { - "memory_type": "lesson", - "outcome": "success", - "scope": "project", - "text": "For auth redirect loops, fix useEffect cleanup to prevent re-triggering on token refresh", - "source_segments": ["seg_2"], - "confidence": 0.9 - }, - { - "memory_type": "fact", - "scope": "project", - "text": "Tried localStorage fix (failed), tried cookies fix (failed), useEffect cleanup fix worked", - "evidence_source": "neutral", - "source_segments": ["seg_2"], - "confidence": 0.85 - } - ] - } -``` - -INGEST Prompt: -``` -Analyze this coding session (~4 hours of activity): - -[user] Add a new endpoint to get inventory items by category -[assistant] I'll create a GET endpoint... -[user] Use async def, add type hints, and write a pytest fixture -[assistant] [Revises code with async, types, fixture] -[user] Perfect, tests pass! -[user] There's an auth loop bug when tokens expire -[assistant] Let me check localStorage... -[user] Still broken -[assistant] Let me try checking cookies... -[user] This is so frustrating, we've been going in circles! -[assistant] I think the issue is in useEffect cleanup... -[user] That fixed it, thanks! - -Analyze this session using SEGMENT-FIRST approach: - -1. SEGMENT the episode by kind (each segment = 5-20 events): - - EXECUTION_TASK: coding, fixing, running commands (CAN have outcome) - - PLANNING_DECISION: architecture, design (has resolution: DECIDED/OPEN) - - RESEARCH_LEARNING: learning, exploring (has resolution: ANSWERED/PARTIAL) - - DISCUSSION: brainstorming, chat (no outcome) - -2. For EXECUTION_TASK segments ONLY, classify outcome: - - SUCCESS: with evidence (tests passed, user confirmed, etc.) - - FAILURE: with evidence (error persists, user says "didn't work") - - UNCERTAIN: no clear evidence (conservative default) - - IMPORTANT: Other segment kinds NEVER have SUCCESS/FAILURE. - -3. Detect user frustration signals: - - Swear words, strong anger β†’ importance: critical, user_frustration: severe - - Repeated complaints ("again", "still broken") β†’ importance: high, user_frustration: moderate - - Mild frustration β†’ importance: medium, user_frustration: mild - - No frustration β†’ importance: medium, user_frustration: none - -4. Extract memories based on segment kind: - - EXECUTION_TASK: lesson (with outcome), fact (knowledge discovered) - - PLANNING_DECISION: fact (decisions), lesson (rationale), profile - - RESEARCH_LEARNING: fact (knowledge), lesson (learnings) - - DISCUSSION: profile (preferences), lesson (insights) - -Return segments[] and memories[] with source_segment references. -``` - -### Step 3.2: Near-Duplicate Check + Save Memory - -Agent uses its DB tools to check for duplicates: - -```python -# Before saving, check for near-duplicates -candidates = await db_tools.search_similar( - repo=memory.repo, - content=memory.content, - memory_types=[memory.memory_type], - top_k=5 -) -for candidate in candidates: - if candidate.similarity >= 0.9: - # Near-duplicate found - merge or skip - return await merge_or_skip(memory, candidate) - -# No duplicate, save new memory -await db_tools.add_memory(memory) -``` - ---- - -## Phase 4: Context Retrieval (MCP) - -### Step 4.1: MCP Tool Call - -Claude Code (or other AI tool) calls Squirrel via MCP: - -```json -{ - "tool": "squirrel_get_task_context", - "arguments": { - "project_root": "/Users/alice/projects/inventory-api", - "task": "Add a delete endpoint for inventory items", - "context_budget_tokens": 400 - } -} -``` - -### Step 4.2: Vector Search (Candidate Retrieval) - -The agent receives the MCP request via IPC and retrieves candidates: - -```python -async def get_task_context(project_root: str, task: str, budget: int) -> dict: - # For trivial queries, return empty fast (<20ms) - if is_trivial_task(task): # "fix typo", "add comment" - return {"context_prompt": "", "memory_ids": [], "tokens_used": 0} - - # Vector search retrieves top 20 candidates from both DBs - candidates = await retrieval.search( - task=task, - project_root=project_root, - include_global=True, # user_style from global DB - top_k=20 - ) - # candidates now contains ~20 memories ranked by embedding similarity -``` - -### Step 4.3: LLM Rerank + Compose (fast_model) - -LLM reranks candidates and composes a context prompt in ONE call: - -```python - # LLM reranks + composes context prompt (uses fast_model) - response = await llm.call( - model=config.fast_model, # gemini-3-flash - prompt=COMPOSE_PROMPT.format( - task=task, - candidates=format_candidates(candidates), - budget=budget - ) - ) - - return { - "context_prompt": response.prompt, # Ready-to-inject text - "memory_ids": response.selected_ids, # For tracing - "tokens_used": count_tokens(response.prompt) - } -``` - -COMPOSE_PROMPT: -``` -Task: {task} - -Candidate memories (ranked by similarity): -{candidates} - -Select the most relevant memories for this task. Then compose a context prompt that: -1. Prioritizes lessons with outcome=failure (what NOT to do) first -2. Includes relevant lessons (outcome=success) and facts -3. Resolves conflicts between memories (newer wins) -4. Merges related memories to save tokens -5. Stays within {budget} tokens - -Return: -- selected_ids: list of memory IDs you selected -- prompt: the composed context prompt for the AI tool -``` - -### Step 4.4: Response - -```json -{ - "context_prompt": "## Context from Squirrel\n\n**Style Preferences:**\n- Use async/await with type hints for all handlers [mem_abc123]\n\n**Project Facts:**\n- This project uses FastAPI with Pydantic models [mem_def456]\n\n**Relevant for this task:** You're adding an HTTP endpoint, so follow the async pattern and define a Pydantic response model.", - "memory_ids": ["mem_abc123", "mem_def456"], - "tokens_used": 89 -} -``` - -The AI tool injects this `context_prompt` directly into its system prompt for better responses. - ---- - -## Phase 5: Daemon Lifecycle - -### Lazy Start -``` -User runs: sqrl search "auth" - ↓ -CLI checks if daemon running (Unix socket) - ↓ -Not running β†’ CLI starts daemon in background - ↓ -Daemon starts β†’ CLI connects β†’ command executed -``` - -### Idle Shutdown -``` -Daemon tracks last activity timestamp - ↓ -Every minute: check if idle > 2 hours - ↓ -If idle: flush any pending Episodes β†’ graceful shutdown - ↓ -Next sqrl command β†’ daemon starts again -``` - -No manual daemon management. No system services. Just works. - ---- - -## Data Schema - -### Event (normalized from all CLIs) - -```sql -CREATE TABLE events ( - id TEXT PRIMARY KEY, - repo TEXT NOT NULL, - kind TEXT NOT NULL, -- user | assistant | tool | system - content TEXT NOT NULL, - file_paths TEXT, -- JSON array - ts TEXT NOT NULL, - processed INTEGER DEFAULT 0 -); -``` - -### Memory (squirrel.db) - -```sql -CREATE TABLE memories ( - id TEXT PRIMARY KEY, - project_id TEXT, -- NULL for global/user-scope memories - memory_type TEXT NOT NULL, -- lesson | fact | profile - - -- For lessons (task-level patterns/pitfalls) - outcome TEXT, -- success | failure | uncertain (lesson only) - - -- For facts - fact_type TEXT, -- knowledge | process (optional) - key TEXT, -- declarative key: project.db.engine, user.preferred_style - value TEXT, -- declarative value: PostgreSQL, async_await - evidence_source TEXT, -- success | failure | neutral | manual (fact only) - support_count INTEGER, -- approx episodes that support this fact - last_seen_at TEXT, -- last episode where this was seen - - -- Content - text TEXT NOT NULL, -- human-readable content - embedding BLOB, -- 1536-dim float32 (text-embedding-3-small) - metadata TEXT, -- JSON: anchors (files, components, endpoints) - - -- Confidence - confidence REAL NOT NULL, - importance TEXT NOT NULL DEFAULT 'medium', -- critical | high | medium | low - - -- Lifecycle - status TEXT NOT NULL DEFAULT 'active', -- active | inactive | invalidated - valid_from TEXT NOT NULL, - valid_to TEXT, - superseded_by TEXT, - - -- Audit - user_id TEXT NOT NULL DEFAULT 'local', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL -); -``` - -**Status values:** -- `active` - Normal, appears in retrieval -- `inactive` - Soft deleted via `sqrl forget`, hidden but recoverable -- `invalidated` - Superseded by newer fact, hidden but keeps history - -**Declarative key examples:** -- `project.db.engine` β†’ PostgreSQL, MySQL, SQLite -- `project.api.framework` β†’ FastAPI, Express, Rails -- `user.preferred_style` β†’ async_await, callbacks, sync -- `user.comment_style` β†’ minimal, detailed, jsdoc - -Same key + different value β†’ deterministic invalidation of old fact. - -### User Profile (structured identity) - -```sql -CREATE TABLE user_profile ( - user_id TEXT PRIMARY KEY, - name TEXT, - role TEXT, - experience_level TEXT, - company TEXT, - primary_use_case TEXT, - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL -); -``` - ---- - -## Summary - -| Phase | What Happens | -|-------|--------------| -| Install | Universal script or package manager | -| CLI Selection | `sqrl config` - select which CLIs you use (Claude Code, Codex, etc.) | -| Init | Creates DB, ingests history, configures MCP + injects agent instructions for enabled CLIs | -| Sync | `sqrl sync` - updates all projects when new CLIs enabled | -| Learning | Daemon watches CLI logs, parses to Events | -| Batching | Groups events into Episodes (4hr OR 50 events) | -| **Segmentation** | Agent segments by kind: EXECUTION_TASK / PLANNING_DECISION / RESEARCH_LEARNING / DISCUSSION | -| **Outcome** | For EXECUTION_TASK only: SUCCESS/FAILURE/UNCERTAIN (with evidence) | -| **Frustration** | Detects anger/swearing β†’ boosts importance (critical/high), stores user_frustration in metadata | -| Extraction | Based on segment kind: lesson (with outcome), fact (with key/evidence_source), profile | -| **Declarative Keys** | Facts with project.* or user.* keys enable deterministic conflict detection | -| **Contradiction** | Same key + different value β†’ old fact invalidated (no LLM); free-text β†’ LLM judges | -| Dedup | Near-duplicate check (0.9 similarity) before ADD | -| Retrieval | MCP β†’ Vector search (top 20) β†’ LLM reranks + composes context prompt | -| Forget | `sqrl forget` β†’ soft delete (status=inactive), recoverable | -| Idle | 2hr no activity β†’ daemon stops, next command restarts | - -### Why Segment-First Matters - -Not all sessions are coding tasks with success/failure: -- Architecture discussions β†’ produce decisions (fact), not outcomes -- Research sessions β†’ produce knowledge (fact), not outcomes -- Brainstorming β†’ produces insights (lesson) and preferences (profile) - -Segment-first ensures we extract appropriate memories from each session type, and only apply SUCCESS/FAILURE to actual execution tasks. - -### Why Contradiction Detection Matters - -Facts change over time: -- Day 1: "Project uses PostgreSQL" (fact) -- Day 30: "Migrated to MySQL" (new fact) - -Contradiction detection auto-invalidates old facts when new conflicting facts arrive, keeping retrieval clean and accurate. - ---- - -## Key Design Decisions - -| Decision | Choice | Why | -|----------|--------|-----| -| **Unified Agent** | Single Python agent with tools | One LLM brain for all operations | -| **2-tier LLM** | strong_model + fast_model | Pro for complex reasoning, Flash for quick tasks | -| **Lazy Daemon** | Start on command, stop after 2hr idle | No system service complexity | -| Episode trigger | 4-hour window OR 50 events | Balance context vs LLM cost | -| **Segment-first** | Segment by kind before outcome classification | Not all sessions are tasks with outcomes | -| **Segment kinds** | EXECUTION_TASK / PLANNING / RESEARCH / DISCUSSION | Different session types produce different memories | -| **Outcome only for EXECUTION_TASK** | SUCCESS/FAILURE/UNCERTAIN with evidence | Avoid classifying discussions as "failures" | -| **Frustration detection** | Anger/swearing β†’ importance boost + metadata flag | High-pain failures get prioritized in retrieval | -| Memory extraction | Based on segment kind | Architecture produces facts, coding produces lessons | -| **Declarative keys** | project.* and user.* keys for facts | Deterministic conflict detection (no LLM) | -| **Evidence source** | success/failure/neutral/manual on facts | Track how a fact was learned | -| **Memory lifecycle** | status (active/inactive/invalidated) + validity | Soft delete + contradiction handling | -| **Fact contradiction** | Declarative key match + LLM for free-text | Auto-invalidate old when new conflicts | -| **Soft delete only (v1)** | `sqrl forget` β†’ status=inactive | Recoverable, no hard purge until v2 | -| **Context compose** | LLM reranks + generates prompt (fast_model) | Better than math scoring, one call | -| **Natural language CLI** | Thin shell passes to agent | "By the way" - agent handles all | -| **Retroactive ingestion** | Token-limited, not time-limited | Fair for all project sizes | -| User profile | Separate table (structured identity) | name, role, experience_level - not learned | -| **2-layer DB** | Global (squirrel.db) + Project (squirrel.db) | Scope-based separation | -| **CLI selection** | User picks CLIs in `sqrl config` | Only configure what user actually uses | -| **Agent instruction injection** | Add Squirrel block to CLAUDE.md, AGENTS.md, etc. | Increase MCP call success rate | -| **sqrl sync** | Update all projects when new CLI enabled | User stays in control, no magic patching | -| Near-duplicate threshold | 0.9 similarity | Avoid redundant memories | -| Trivial query fast-path | Return empty <20ms | No wasted LLM calls | -| **Cross-platform** | Mac, Linux, Windows from v1 | All platforms supported | -| 100% passive | No user prompts during coding | Invisible during use | - ---- - -## Memory Type Reference - -3 memory types with scope flag: - -| Type | Key Fields | Description | Example | -|------|------------|-------------|---------| -| `lesson` | outcome, importance, user_frustration | What worked or failed | "API 500 on null user_id", "Repository pattern works well" | -| `fact` | key, value, evidence_source | Project/user knowledge | key=project.db.engine, value=PostgreSQL | -| `profile` | (structured identity) | User background info | name, role, experience_level | - -### Declarative Keys - -Critical facts use declarative keys for deterministic conflict detection: - -**Project-scoped keys:** -- `project.db.engine` - PostgreSQL, MySQL, SQLite -- `project.api.framework` - FastAPI, Express, Rails -- `project.language.main` - Python, TypeScript, Go -- `project.auth.method` - JWT, session, OAuth - -**User-scoped keys (global):** -- `user.preferred_style` - async_await, callbacks, sync -- `user.preferred_language` - Python, TypeScript, Go -- `user.comment_style` - minimal, detailed, jsdoc - -### Evidence Source (Facts Only) - -How a fact was learned: -- `success` - Learned from successful task (high confidence) -- `failure` - Learned from failed task (valuable pitfall) -- `neutral` - Observed in planning/research/discussion -- `manual` - User explicitly stated via CLI - -### Frustration Detection (Lessons) - -User frustration signals boost memory importance: - -| Signal | Importance | user_frustration | -|--------|------------|------------------| -| Swear words, strong anger | `critical` | `severe` | -| Repeated complaints ("again", "still") | `high` | `moderate` | -| Mild frustration | `medium` | `mild` | -| No frustration | `medium` | `none` | - -Stored in `metadata.user_frustration`. Frustration-flagged memories get priority in retrieval to prevent recurring pain points. - -### Scope Matrix - -| Scope | DB File | Description | -|-------|---------|-------------| -| Global | `~/.sqrl/squirrel.db` | User preferences, profile (applies to all projects) | -| Project | `/.sqrl/squirrel.db` | Project-specific lessons and facts | diff --git a/specs/ARCHITECTURE.md b/specs/ARCHITECTURE.md index e79a2cd..9b4f340 100644 --- a/specs/ARCHITECTURE.md +++ b/specs/ARCHITECTURE.md @@ -2,6 +2,28 @@ High-level system boundaries and data flow. +## Technology Stack + +| Category | Technology | Notes | +|----------|------------|-------| +| **Rust Daemon** | | | +| Storage | SQLite + sqlite-vec | Local-first, vector search | +| IPC Protocol | JSON-RPC 2.0 | MCP-compatible, over Unix socket | +| MCP SDK | rmcp | Official Rust SDK (modelcontextprotocol/rust-sdk) | +| CLI Framework | clap | Rust CLI parsing | +| Async Runtime | tokio | Async I/O | +| File Watching | notify | Cross-platform fs events | +| Logging | tracing | Structured logging | +| **Python Agent** | | | +| Agent Framework | PydanticAI | Python agent with tools | +| LLM Client | LiteLLM | Multi-provider support | +| Embeddings | OpenAI text-embedding-3-small | 1536-dim, API-based | +| Logging | structlog | Structured logging | +| **Build & Release** | | | +| Rust Build | cargo-dist | Generates Homebrew, MSI, installers | +| Auto-update | axoupdater | dist's official updater | +| Python Packaging | PyInstaller | Bundled, zero user deps | + ## System Overview ``` diff --git a/specs/DECISIONS.md b/specs/DECISIONS.md index 1f89043..4591c22 100644 --- a/specs/DECISIONS.md +++ b/specs/DECISIONS.md @@ -241,3 +241,44 @@ Use Unix socket at `/tmp/sqrl_agent.sock` with JSON-RPC 2.0 protocol. Windows us | Team sync backend | Supabase / Custom / None | v2 | | Local LLM support | Ollama / llama.cpp / None | v2 | | Web UI | None / Tauri / Electron | v2 | + +--- + +## Future: v2 Team/Cloud Architecture + +Reference architecture for team memory sharing (not in v1 scope). + +### 3-Layer Database Architecture + +| Layer | DB File | Contents | Sync | +|-------|---------|----------|------| +| Global | `~/.sqrl/squirrel.db` | lesson, fact, profile (scope=global) | Local only | +| Project | `/.sqrl/squirrel.db` | lesson, fact (scope=project) | Local only | +| Team | `~/.sqrl/group.db` | Shared memories (owner=team) | Cloud | + +### Memory Schema Extensions (v2) + +```sql +-- Additional fields for team support +ALTER TABLE memories ADD COLUMN owner TEXT NOT NULL DEFAULT 'individual'; -- individual | team +ALTER TABLE memories ADD COLUMN team_id TEXT; -- team identifier +ALTER TABLE memories ADD COLUMN contributed_by TEXT; -- user who shared +ALTER TABLE memories ADD COLUMN source_memory_id TEXT; -- original memory ID +``` + +### Scaling Strategy + +| Team Size | Strategy | +|-----------|----------| +| Small (<100) | Full sync - all team memories in local group.db | +| Medium (100-1000) | Partial sync - recent + relevant memories locally | +| Large (1000+) | Cloud-primary - query cloud, cache locally | + +### Team Commands (v2) + +```bash +sqrl team join # Join team, start syncing group.db +sqrl team leave # Leave team, remove group.db +sqrl share # Promote individual memory to team +sqrl team export # Export team memories to local +``` From 0a1f909b504b11aefb4c4e33bb6f28387aa93fa9 Mon Sep 17 00:00:00 2001 From: Ziming Wang Date: Thu, 11 Dec 2025 09:40:22 +0800 Subject: [PATCH 15/15] feat(ci): add CI/CD pipeline and collaboration standards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive CI/CD and collaboration infrastructure: ## CI Pipeline (.github/workflows/ci.yml) - Cross-platform testing (Linux, macOS, Windows) - Rust: fmt, clippy, test, build - Python: ruff, pytest with coverage - Security scanning (cargo-audit, pip-audit, trufflehog) - Spec validation (ID uniqueness check) ## Release Pipeline (.github/workflows/release.yml) - Multi-platform binary builds (x86_64 + aarch64) - Python wheel packaging - GitHub Release automation - PyPI publishing ## Collaboration Standards (specs/CONTRIBUTING.md) - PR workflow (PR-001, PR-002, PR-003) - Commit standards (COMMIT-001, COMMIT-002) - Code review guidelines (REVIEW-001, REVIEW-002) - Issue management (ISSUE-001, ISSUE-002) - Security policies (SEC-001, SEC-002) - Release process (RELEASE-001, RELEASE-002) ## Templates - Pull request template - Bug report issue template - Feature request issue template ## Automation - Dependabot for dependency updates - Markdown link checking Implements: DR4 (Test Before Merge) πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .github/ISSUE_TEMPLATE/bug_report.md | 35 +++ .github/ISSUE_TEMPLATE/feature_request.md | 24 ++ .github/dependabot.yml | 43 ++++ .github/markdown-link-check.json | 12 + .github/pull_request_template.md | 32 +++ .github/workflows/ci.yml | 204 +++++++++++++++++ .github/workflows/release.yml | 183 +++++++++++++++ AGENTS.md | 12 + specs/CONTRIBUTING.md | 267 ++++++++++++++++++++++ 9 files changed, 812 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/dependabot.yml create mode 100644 .github/markdown-link-check.json create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml create mode 100644 specs/CONTRIBUTING.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..ab1c740 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,35 @@ +--- +name: Bug Report +about: Report a bug in Squirrel +title: '[BUG] ' +labels: bug +assignees: '' +--- + +## Description + + +## Current Behavior + + +## Expected Behavior + + +## Reproduction Steps +1. +2. +3. + +## Environment +- **OS**: +- **Squirrel version**: +- **CLI being used**: + +## Logs + +``` +``` + +## Spec Reference + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..97b1315 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,24 @@ +--- +name: Feature Request +about: Suggest a new feature +title: '[FEAT] ' +labels: feat +assignees: '' +--- + +## Problem Statement + + +## Proposed Solution + + +## Alternatives Considered + + +## Spec Impact + +- [ ] New spec needed: +- [ ] Updates to: + +## Additional Context + diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..7522c12 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,43 @@ +# Dependabot configuration +# Implements: SEC-002 (Dependency Security) + +version: 2 +updates: + # Rust dependencies + - package-ecosystem: "cargo" + directory: "/daemon" + schedule: + interval: "weekly" + day: "monday" + open-pull-requests-limit: 5 + labels: + - "dependencies" + - "rust" + commit-message: + prefix: "chore(deps)" + + # Python dependencies + - package-ecosystem: "pip" + directory: "/agent" + schedule: + interval: "weekly" + day: "monday" + open-pull-requests-limit: 5 + labels: + - "dependencies" + - "python" + commit-message: + prefix: "chore(deps)" + + # GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + open-pull-requests-limit: 3 + labels: + - "dependencies" + - "ci" + commit-message: + prefix: "chore(ci)" diff --git a/.github/markdown-link-check.json b/.github/markdown-link-check.json new file mode 100644 index 0000000..8cb12e9 --- /dev/null +++ b/.github/markdown-link-check.json @@ -0,0 +1,12 @@ +{ + "ignorePatterns": [ + { + "pattern": "^https://sqrl.dev" + }, + { + "pattern": "^#" + } + ], + "replacementPatterns": [], + "aliveStatusCodes": [200, 206, 301, 302, 307, 308] +} diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..a84b754 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,32 @@ +## Summary + + +## Spec References + +- Implements: +- Updates: + +## Type +- [ ] `feat`: New feature +- [ ] `fix`: Bug fix +- [ ] `docs`: Documentation only +- [ ] `refactor`: Code refactoring +- [ ] `test`: Test additions +- [ ] `chore`: Build/tooling changes + +## Checklist +- [ ] Specs updated (if behavior changed) +- [ ] Tests added/updated +- [ ] `fmt` passes locally +- [ ] `lint` passes locally +- [ ] `test-all` passes locally + +## Test Plan + + +--- + diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..027fd64 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,204 @@ +# Squirrel CI Pipeline +# Runs on all PRs and pushes to main +# Implements: DR4 (Test Before Merge) + +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + +jobs: + # ==================== + # Rust Daemon Checks + # ==================== + rust-check: + name: Rust Check + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-action@stable + with: + components: rustfmt, clippy + + - name: Cache cargo registry + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Check formatting + run: cargo fmt --all -- --check + working-directory: daemon + + - name: Clippy lints + run: cargo clippy --all-targets --all-features -- -D warnings + working-directory: daemon + + - name: Run tests + run: cargo test --all-features + working-directory: daemon + + - name: Build release + run: cargo build --release + working-directory: daemon + + # ==================== + # Python Agent Checks + # ==================== + python-check: + name: Python Check + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Cache pip packages + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/requirements*.txt', '**/pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip-${{ matrix.python-version }}- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff pytest pytest-asyncio pytest-cov + pip install -e ".[dev]" + working-directory: agent + + - name: Ruff format check + run: ruff format --check . + working-directory: agent + + - name: Ruff lint + run: ruff check . + working-directory: agent + + - name: Run tests with coverage + run: pytest --cov=squirrel_agent --cov-report=xml --cov-report=term + working-directory: agent + + - name: Upload coverage + uses: codecov/codecov-action@v4 + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12' + with: + files: agent/coverage.xml + flags: python + fail_ci_if_error: false + + # ==================== + # Security Checks + # ==================== + security: + name: Security Scan + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-action@stable + + - name: Install cargo-audit + run: cargo install cargo-audit + + - name: Rust security audit + run: cargo audit + working-directory: daemon + continue-on-error: true # Advisory only for now + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install pip-audit + run: pip install pip-audit + + - name: Python security audit + run: pip-audit + working-directory: agent + continue-on-error: true # Advisory only for now + + - name: Secret scanning + uses: trufflesecurity/trufflehog@main + with: + extra_args: --only-verified + + # ==================== + # Spec Validation + # ==================== + specs: + name: Spec Validation + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Check spec ID uniqueness + run: | + echo "Checking for duplicate spec IDs..." + # Extract all spec IDs and check for duplicates + grep -rhoE '(SCHEMA|IPC|MCP|CLI|KEY-[PU]|PROMPT|ADR|ARCH|FLOW|DR|PR|COMMIT|REVIEW|ISSUE|SEC|RELEASE|COMM)-[0-9]+' specs/ | sort | uniq -d > duplicates.txt + if [ -s duplicates.txt ]; then + echo "Duplicate spec IDs found:" + cat duplicates.txt + exit 1 + fi + echo "No duplicate spec IDs found" + + - name: Validate markdown links + uses: gaurav-nelson/github-action-markdown-link-check@v1 + with: + folder-path: 'specs/' + config-file: '.github/markdown-link-check.json' + continue-on-error: true + + # ==================== + # All Checks Gate + # ==================== + all-checks: + name: All Checks Passed + runs-on: ubuntu-latest + needs: [rust-check, python-check, security, specs] + if: always() + + steps: + - name: Check all jobs passed + run: | + if [ "${{ needs.rust-check.result }}" != "success" ] || \ + [ "${{ needs.python-check.result }}" != "success" ]; then + echo "Required checks failed" + exit 1 + fi + echo "All required checks passed" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..ffa726b --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,183 @@ +# Squirrel Release Pipeline +# Triggered by version tags (v*) +# Implements: RELEASE-001, RELEASE-002 + +name: Release + +on: + push: + tags: + - 'v*' + +permissions: + contents: write + +env: + CARGO_TERM_COLOR: always + +jobs: + # ==================== + # Build Binaries + # ==================== + build: + name: Build ${{ matrix.target }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + # Linux + - target: x86_64-unknown-linux-gnu + os: ubuntu-latest + archive: tar.gz + - target: aarch64-unknown-linux-gnu + os: ubuntu-latest + archive: tar.gz + + # macOS + - target: x86_64-apple-darwin + os: macos-latest + archive: tar.gz + - target: aarch64-apple-darwin + os: macos-latest + archive: tar.gz + + # Windows + - target: x86_64-pc-windows-msvc + os: windows-latest + archive: zip + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-action@stable + with: + targets: ${{ matrix.target }} + + - name: Install cross-compilation tools (Linux ARM) + if: matrix.target == 'aarch64-unknown-linux-gnu' + run: | + sudo apt-get update + sudo apt-get install -y gcc-aarch64-linux-gnu + + - name: Build daemon + run: cargo build --release --target ${{ matrix.target }} + working-directory: daemon + env: + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER: aarch64-linux-gnu-gcc + + - name: Package (Unix) + if: matrix.os != 'windows-latest' + run: | + mkdir -p dist + cp daemon/target/${{ matrix.target }}/release/sqrl dist/ + cp daemon/target/${{ matrix.target }}/release/sqrl-daemon dist/ + cp README.md LICENSE dist/ + cd dist && tar -czvf ../squirrel-${{ matrix.target }}.${{ matrix.archive }} * + + - name: Package (Windows) + if: matrix.os == 'windows-latest' + run: | + mkdir dist + copy daemon\target\${{ matrix.target }}\release\sqrl.exe dist\ + copy daemon\target\${{ matrix.target }}\release\sqrl-daemon.exe dist\ + copy README.md dist\ + copy LICENSE dist\ + Compress-Archive -Path dist\* -DestinationPath squirrel-${{ matrix.target }}.${{ matrix.archive }} + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: squirrel-${{ matrix.target }} + path: squirrel-${{ matrix.target }}.${{ matrix.archive }} + + # ==================== + # Build Python Wheel + # ==================== + python-wheel: + name: Build Python Wheel + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install build tools + run: pip install build twine + + - name: Build wheel + run: python -m build + working-directory: agent + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: python-wheel + path: agent/dist/* + + # ==================== + # Create Release + # ==================== + release: + name: Create Release + runs-on: ubuntu-latest + needs: [build, python-wheel] + + steps: + - uses: actions/checkout@v4 + + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Generate checksums + run: | + cd artifacts + find . -type f \( -name "*.tar.gz" -o -name "*.zip" -o -name "*.whl" \) -exec sha256sum {} \; > SHA256SUMS.txt + cat SHA256SUMS.txt + + - name: Extract version from tag + id: version + run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT + + - name: Create GitHub Release + uses: softprops/action-gh-release@v1 + with: + name: Squirrel v${{ steps.version.outputs.VERSION }} + draft: true + generate_release_notes: true + files: | + artifacts/**/*.tar.gz + artifacts/**/*.zip + artifacts/**/*.whl + artifacts/SHA256SUMS.txt + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # ==================== + # Publish to PyPI + # ==================== + pypi: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: [release] + environment: pypi + + steps: + - name: Download Python wheel + uses: actions/download-artifact@v4 + with: + name: python-wheel + path: dist + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + skip-existing: true diff --git a/AGENTS.md b/AGENTS.md index 676e750..26228df 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -29,6 +29,7 @@ All documentation, specs, and structures are designed for AI comprehension. Use | specs/KEYS.md | Declarative key registry (KEY-*) | | specs/PROMPTS.md | LLM prompts with model tiers (PROMPT-*) | | specs/DECISIONS.md | Architecture decision records (ADR-*) | +| specs/CONTRIBUTING.md | Collaboration standards (PR-*, COMMIT-*, etc.) | **Rules:** 1. Read specs before implementing @@ -115,11 +116,22 @@ Available commands: ### Git Workflow +See `specs/CONTRIBUTING.md` for full details. + Branch: `yourname/type-description` - `feat`, `fix`, `refactor`, `docs`, `test`, `chore` Commit: `type(scope): brief description` - Reference spec IDs when applicable +- Scopes: daemon, agent, cli, mcp, ipc, specs, ci + +PR Process: +1. Create branch from main +2. Make changes, run `fmt` + `lint` + `test-all` +3. Push and create PR (use template) +4. CI runs automatically +5. Get review, address feedback +6. Squash merge to main ### Code Quality - Write tests for new features (DR4) diff --git a/specs/CONTRIBUTING.md b/specs/CONTRIBUTING.md new file mode 100644 index 0000000..085cc62 --- /dev/null +++ b/specs/CONTRIBUTING.md @@ -0,0 +1,267 @@ +# Squirrel Contributing Guide + +Collaboration standards for contributors. This document complements CONSTITUTION.md with practical workflow details. + +## Contributor Workflow + +### Branch Strategy + +| Branch | Purpose | Protection | +|--------|---------|------------| +| `main` | Stable release-ready code | Protected, requires PR | +| `/-` | Feature/fix branches | None | + +**Branch naming examples:** +- `adrian/feat-log-watcher` +- `lyrica/fix-ipc-timeout` +- `adrian/docs-contributing` + +### Pull Request Process + +#### PR-001: Standard PR Flow + +``` +1. Create branch from main +2. Make changes (follow spec-driven development) +3. Run local checks: `fmt` + `lint` + `test-all` +4. Push branch +5. Create PR with template +6. CI runs automatically +7. Request review +8. Address feedback +9. Squash merge to main +``` + +#### PR-002: PR Template + +All PRs must include: + +```markdown +## Summary +Brief description of changes (1-2 sentences) + +## Spec References +- Implements: SCHEMA-001, IPC-002 (if applicable) +- Updates: specs/ARCHITECTURE.md (if spec changed) + +## Type +- [ ] feat: New feature +- [ ] fix: Bug fix +- [ ] docs: Documentation only +- [ ] refactor: Code refactoring +- [ ] test: Test additions +- [ ] chore: Build/tooling changes + +## Checklist +- [ ] Specs updated (if behavior changed) +- [ ] Tests added/updated +- [ ] `fmt` passes +- [ ] `lint` passes +- [ ] `test-all` passes + +## Test Plan +How to verify this change works. +``` + +#### PR-003: Review Requirements + +| Change Type | Required Reviewers | Auto-merge | +|-------------|-------------------|------------| +| docs only | 1 | Yes (after CI) | +| code (non-breaking) | 1 | No | +| spec changes | 2 | No | +| breaking changes | 2 + explicit approval | No | + +### Commit Standards + +#### COMMIT-001: Message Format + +``` +(): + +[optional body] + +[optional footer: refs #issue, implements SPEC-ID] +``` + +**Types:** +| Type | Description | +|------|-------------| +| feat | New feature | +| fix | Bug fix | +| docs | Documentation | +| refactor | Code refactoring (no behavior change) | +| test | Test additions/fixes | +| chore | Build, CI, tooling | +| perf | Performance improvement | + +**Scopes:** +| Scope | Description | +|-------|-------------| +| daemon | Rust daemon code | +| agent | Python agent code | +| cli | CLI interface | +| mcp | MCP server | +| ipc | IPC protocol | +| specs | Specification files | +| ci | CI/CD configuration | + +**Examples:** +``` +feat(daemon): implement Claude Code log watcher + +Implements FLOW-001 passive ingestion for Claude Code JSONL files. +Uses notify crate for cross-platform file watching. + +Implements: ARCH-001 +Refs: #12 +``` + +``` +fix(agent): handle empty episode gracefully + +Return early with empty result instead of raising exception +when episode has no events. + +Fixes: #34 +``` + +#### COMMIT-002: Commit Hygiene + +| Rule | Description | +|------|-------------| +| Atomic commits | One logical change per commit | +| Passing state | Each commit should pass CI | +| No WIP commits | Squash before PR, no "WIP", "fixup" | +| Reference specs | Include spec IDs when implementing specs | + +### Code Review Guidelines + +#### REVIEW-001: Reviewer Checklist + +| Check | Question | +|-------|----------| +| Spec alignment | Does code match spec? If no spec, should there be one? | +| Boundary respect | Rust doing I/O only? Python doing LLM only? | +| Error handling | Graceful degradation? No panics in daemon? | +| Tests | New behavior covered? Edge cases? | +| Security | No secrets? Input validated? | +| Performance | Acceptable latency? No unnecessary allocations? | + +#### REVIEW-002: Review Etiquette + +| Do | Don't | +|----|-------| +| Be specific and actionable | Vague criticism | +| Suggest alternatives | Just say "this is wrong" | +| Approve when ready | Block on nitpicks | +| Use "nit:" prefix for optional | Demand perfection | + +### Issue Management + +#### ISSUE-001: Issue Labels + +| Label | Description | Color | +|-------|-------------|-------| +| `bug` | Something broken | Red | +| `feat` | Feature request | Green | +| `docs` | Documentation | Blue | +| `good-first-issue` | Beginner friendly | Purple | +| `help-wanted` | Needs contributor | Yellow | +| `blocked` | Waiting on something | Orange | +| `wontfix` | Not planned | Gray | + +#### ISSUE-002: Issue Template + +```markdown +## Description +Clear description of the issue or feature request. + +## Current Behavior (for bugs) +What happens now. + +## Expected Behavior +What should happen. + +## Reproduction Steps (for bugs) +1. Step one +2. Step two + +## Environment +- OS: +- Squirrel version: +- CLI being used: + +## Spec Reference (if applicable) +Related spec IDs: SCHEMA-001, IPC-002 +``` + +## Security + +### SEC-001: Secrets Handling + +| Rule | Enforcement | +|------|-------------| +| No secrets in code | Pre-commit hook check | +| No secrets in commits | CI secret scanning | +| API keys via env vars | Documented in README | +| `.env` files gitignored | In .gitignore | + +**Forbidden patterns in commits:** +- API keys (sk-*, anthropic-*, etc.) +- Private keys +- Passwords +- Connection strings with credentials + +### SEC-002: Dependency Security + +| Check | Frequency | Tool | +|-------|-----------|------| +| Rust advisories | Every CI run | `cargo audit` | +| Python vulnerabilities | Every CI run | `pip-audit` | +| Dependabot alerts | Automated | GitHub Dependabot | + +## Release Process + +### RELEASE-001: Versioning + +Follow [Semantic Versioning](https://semver.org/): + +``` +MAJOR.MINOR.PATCH + +MAJOR: Breaking changes +MINOR: New features (backward compatible) +PATCH: Bug fixes (backward compatible) +``` + +### RELEASE-002: Release Checklist + +``` +1. [ ] All CI passing on main +2. [ ] CHANGELOG.md updated +3. [ ] Version bumped in Cargo.toml and pyproject.toml +4. [ ] Tag created: v{version} +5. [ ] GitHub Release created with notes +6. [ ] Binaries built and attached (via CI) +7. [ ] Homebrew formula updated (if applicable) +``` + +## Communication + +### COMM-001: Channels + +| Channel | Purpose | +|---------|---------| +| GitHub Issues | Bugs, feature requests | +| GitHub Discussions | Questions, ideas, RFC | +| Pull Requests | Code review | + +### COMM-002: Response Times + +| Type | Target Response | +|------|-----------------| +| Security issues | 24 hours | +| Bug reports | 48 hours | +| Feature requests | 1 week | +| PR reviews | 48 hours |