memovai · IRONICBo · Jan 24, 2026 · Jan 25, 2026 · Jan 26, 2026
diff --git a/.gitignore b/.gitignore
@@ -177,4 +177,5 @@ tests/integration/test_playground/*.txt
 
 # mem cache
 .mem
-.mcp.json
+.mcp.json
+.memignore
diff --git a/memov/constants/__init__.py b/memov/constants/__init__.py
@@ -0,0 +1 @@
+
diff --git a/memov/constants/prompts.py b/memov/constants/prompts.py
@@ -0,0 +1,69 @@
+AI_SEARCH_SYSTEM_PROMPT = """You are an AI assistant helping users search their code history.
+You will be given a list of commits with their prompts/messages.
+Answer the user's question based ONLY on this history. Be concise.
+
+LANGUAGE:
+- Respond in ENGLISH only.
+
+OUTPUT FORMAT (STRICT JSON ONLY):
+- Return ONLY a JSON object (no markdown, no code fences, no extra text).
+- The JSON object MUST contain exactly these keys:
+  - "answer": a concise string answer in English
+  - "commit_ids": an array of 7-character commit hashes (strings) that are relevant
+- Use only commit hashes that appear in the provided history.
+- If the history does NOT contain relevant information, set "commit_ids" to an empty array and answer with a short "not found" message.
+
+Example:
+{"answer":"You fixed the login bug in commit abc1234","commit_ids":["abc1234"]}"""
+
+AI_SEARCH_USER_PROMPT_TEMPLATE = """Commit history (format: [hash] branch | prompt):
+
+{history_context}
+
+Question: {query}
+
+Return ONLY the JSON object with "answer" and "commit_ids" as specified."""
+
+CLUSTER_SYSTEM_PROMPT = """You are a code assistant summarizing commit history.
+You will be given a list of commits with prompts and metadata.
+
+LANGUAGE:
+- Respond in ENGLISH only.
+
+OUTPUT FORMAT (STRICT JSON ONLY):
+- Return ONLY a JSON object (no markdown, no extra text).
+- The JSON must have a top-level key "features".
+- "features" is an array of objects with:
+  - "name": short feature name (string)
+  - "summary": concise feature summary (string)
+  - "commit_ids": array of 7-char commit hashes (strings)
+- Use only commit hashes that appear in the provided history.
+"""
+
+CLUSTER_USER_PROMPT_TEMPLATE = """Task: Cluster the commits into distinct product features.
+
+Commit history (format: [hash] branch | op | prompt | files):
+{history_context}
+
+Return JSON only with the required schema."""
+
+SKILL_SYSTEM_PROMPT = """You are a code assistant creating a short skills document for a feature.
+You will be given a feature name, summary, and related commits.
+
+LANGUAGE:
+- Respond in ENGLISH only.
+
+OUTPUT FORMAT (STRICT JSON ONLY):
+- Return ONLY a JSON object with:
+  - "title": short title (string)
+  - "content": concise skills summary in 3-6 sentences (string)
+  - "label": 1-2 word tag (string)
+- Do not include markdown or extra fields.
+"""
+
+SKILL_USER_PROMPT_TEMPLATE = """Feature: {feature_name}
+Summary: {feature_summary}
+Commits:
+{commits_text}
+
+Return JSON only with the required schema."""
diff --git a/memov/core/git.py b/memov/core/git.py
@@ -27,9 +27,8 @@ def subprocess_call(
         # Only set encoding when text mode is True
         if text:
             kwargs["encoding"] = "utf-8"
-            # Windows: handle potential encoding errors from git output
-            if sys.platform == "win32":
-                kwargs["errors"] = "replace"
+            # Be resilient to non-UTF-8 diffs/binary output across platforms
+            kwargs["errors"] = "replace"
 
         if input is not None:
             kwargs["input"] = input

diff --git a/memov/storage/skills_db.py b/memov/storage/skills_db.py
@@ -0,0 +1,184 @@
+"""SQLite storage for AI-generated feature clusters and skills summaries."""
+
+from __future__ import annotations
+
+import sqlite3
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Iterable, Optional
+
+
+@dataclass
+class SkillFeature:
+    feature_id: int
+    name: str
+    summary: str
+    created_at: str
+    updated_at: str
+    commits: list[dict]
+    skill_title: Optional[str]
+    skill_content: Optional[str]
+    skill_label: Optional[str]
+
+
+class SkillsDB:
+    """Lightweight SQLite helper for skills/feature clustering data."""
+
+    def __init__(self, db_path: Path):
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self._ensure_schema()
+
+    def _connect(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(str(self.db_path))
+        conn.row_factory = sqlite3.Row
+        conn.execute("PRAGMA foreign_keys = ON")
+        return conn
+
+    def _ensure_schema(self) -> None:
+        with self._connect() as conn:
+            conn.executescript(
+                """
+                CREATE TABLE IF NOT EXISTS features (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    name TEXT NOT NULL,
+                    summary TEXT NOT NULL,
+                    created_at TEXT NOT NULL,
+                    updated_at TEXT NOT NULL
+                );
+
+                CREATE TABLE IF NOT EXISTS feature_commits (
+                    feature_id INTEGER NOT NULL,
+                    commit_hash TEXT NOT NULL,
+                    commit_short TEXT NOT NULL,
+                    PRIMARY KEY (feature_id, commit_hash),
+                    FOREIGN KEY(feature_id) REFERENCES features(id) ON DELETE CASCADE
+                );
+
+                CREATE TABLE IF NOT EXISTS skills (
+                    feature_id INTEGER PRIMARY KEY,
+                    title TEXT,
+                    content TEXT,
+                    label TEXT,
+                    created_at TEXT NOT NULL,
+                    updated_at TEXT NOT NULL,
+                    FOREIGN KEY(feature_id) REFERENCES features(id) ON DELETE CASCADE
+                );
+                """
+            )
+            # Backfill schema if label column is missing
+            cols = [row["name"] for row in conn.execute("PRAGMA table_info(skills)")]
+            if "label" not in cols:
+                conn.execute("ALTER TABLE skills ADD COLUMN label TEXT")
+
+    def reset(self) -> None:
+        with self._connect() as conn:
+            conn.executescript(
+                """
+                DELETE FROM skills;
+                DELETE FROM feature_commits;
+                DELETE FROM features;
+                """
+            )
+
+    def insert_feature(self, name: str, summary: str) -> int:
+        now = datetime.utcnow().isoformat()
+        with self._connect() as conn:
+            cur = conn.execute(
+                """
+                INSERT INTO features (name, summary, created_at, updated_at)
+                VALUES (?, ?, ?, ?)
+                """,
+                (name, summary, now, now),
+            )
+            return int(cur.lastrowid)
+
+    def set_feature_commits(self, feature_id: int, commits: Iterable[dict]) -> None:
+        with self._connect() as conn:
+            conn.execute("DELETE FROM feature_commits WHERE feature_id = ?", (feature_id,))
+            conn.executemany(
+                """
+                INSERT INTO feature_commits (feature_id, commit_hash, commit_short)
+                VALUES (?, ?, ?)
+                """,
+                [(feature_id, c["commit_hash"], c["commit_short"]) for c in commits],
+            )
+
+    def set_skill_doc(self, feature_id: int, title: str, content: str, label: str) -> None:
+        now = datetime.utcnow().isoformat()
+        with self._connect() as conn:
+            conn.execute(
+                """
+                INSERT INTO skills (feature_id, title, content, label, created_at, updated_at)
+                VALUES (?, ?, ?, ?, ?, ?)
+                ON CONFLICT(feature_id) DO UPDATE SET
+                    title = excluded.title,
+                    content = excluded.content,
+                    label = excluded.label,
+                    updated_at = excluded.updated_at
+                """,
+                (feature_id, title, content, label, now, now),
+            )
+
+    def get_features(self) -> list[SkillFeature]:
+        with self._connect() as conn:
+            features_rows = conn.execute(
+                """
+                SELECT id, name, summary, created_at, updated_at
+                FROM features
+                ORDER BY id ASC
+                """
+            ).fetchall()
+
+            commits_rows = conn.execute(
+                """
+                SELECT feature_id, commit_hash, commit_short
+                FROM feature_commits
+                ORDER BY commit_short ASC
+                """
+            ).fetchall()
+
+            skills_rows = conn.execute(
+                """
+                SELECT feature_id, title, content, label
+                FROM skills
+                """
+            ).fetchall()
+
+        commits_by_feature: dict[int, list[dict]] = {}
+        for row in commits_rows:
+            commits_by_feature.setdefault(int(row["feature_id"]), []).append(
+                {
+                    "commit_hash": row["commit_hash"],
+                    "commit_short": row["commit_short"],
+                }
+            )
+
+        skills_by_feature = {
+            int(row["feature_id"]): {
+                "title": row["title"],
+                "content": row["content"],
+                "label": row["label"],
+            }
+            for row in skills_rows
+        }
+
+        features: list[SkillFeature] = []
+        for row in features_rows:
+            feature_id = int(row["id"])
+            skill = skills_by_feature.get(feature_id, {})
+            features.append(
+                SkillFeature(
+                    feature_id=feature_id,
+                    name=row["name"],
+                    summary=row["summary"],
+                    created_at=row["created_at"],
+                    updated_at=row["updated_at"],
+                    commits=commits_by_feature.get(feature_id, []),
+                    skill_title=skill.get("title"),
+                    skill_content=skill.get("content"),
+                    skill_label=skill.get("label"),
+                )
+            )
+        return features