Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,5 @@ tests/integration/test_playground/*.txt

# mem cache
.mem
.mcp.json
.mcp.json
.memignore
1 change: 1 addition & 0 deletions memov/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

69 changes: 69 additions & 0 deletions memov/constants/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
AI_SEARCH_SYSTEM_PROMPT = """You are an AI assistant helping users search their code history.
You will be given a list of commits with their prompts/messages.
Answer the user's question based ONLY on this history. Be concise.

LANGUAGE:
- Respond in ENGLISH only.

OUTPUT FORMAT (STRICT JSON ONLY):
- Return ONLY a JSON object (no markdown, no code fences, no extra text).
- The JSON object MUST contain exactly these keys:
- "answer": a concise string answer in English
- "commit_ids": an array of 7-character commit hashes (strings) that are relevant
- Use only commit hashes that appear in the provided history.
- If the history does NOT contain relevant information, set "commit_ids" to an empty array and answer with a short "not found" message.

Example:
{"answer":"You fixed the login bug in commit abc1234","commit_ids":["abc1234"]}"""

AI_SEARCH_USER_PROMPT_TEMPLATE = """Commit history (format: [hash] branch | prompt):

{history_context}

Question: {query}

Return ONLY the JSON object with "answer" and "commit_ids" as specified."""

CLUSTER_SYSTEM_PROMPT = """You are a code assistant summarizing commit history.
You will be given a list of commits with prompts and metadata.

LANGUAGE:
- Respond in ENGLISH only.

OUTPUT FORMAT (STRICT JSON ONLY):
- Return ONLY a JSON object (no markdown, no extra text).
- The JSON must have a top-level key "features".
- "features" is an array of objects with:
- "name": short feature name (string)
- "summary": concise feature summary (string)
- "commit_ids": array of 7-char commit hashes (strings)
- Use only commit hashes that appear in the provided history.
"""

CLUSTER_USER_PROMPT_TEMPLATE = """Task: Cluster the commits into distinct product features.

Commit history (format: [hash] branch | op | prompt | files):
{history_context}

Return JSON only with the required schema."""

SKILL_SYSTEM_PROMPT = """You are a code assistant creating a short skills document for a feature.
You will be given a feature name, summary, and related commits.

LANGUAGE:
- Respond in ENGLISH only.

OUTPUT FORMAT (STRICT JSON ONLY):
- Return ONLY a JSON object with:
- "title": short title (string)
- "content": concise skills summary in 3-6 sentences (string)
- "label": 1-2 word tag (string)
- Do not include markdown or extra fields.
"""

SKILL_USER_PROMPT_TEMPLATE = """Feature: {feature_name}
Summary: {feature_summary}
Commits:
{commits_text}

Return JSON only with the required schema."""
5 changes: 2 additions & 3 deletions memov/core/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ def subprocess_call(
# Only set encoding when text mode is True
if text:
kwargs["encoding"] = "utf-8"
# Windows: handle potential encoding errors from git output
if sys.platform == "win32":
kwargs["errors"] = "replace"
# Be resilient to non-UTF-8 diffs/binary output across platforms
kwargs["errors"] = "replace"

if input is not None:
kwargs["input"] = input
Expand Down
184 changes: 184 additions & 0 deletions memov/storage/skills_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""SQLite storage for AI-generated feature clusters and skills summaries."""

from __future__ import annotations

import sqlite3
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Iterable, Optional


@dataclass
class SkillFeature:
feature_id: int
name: str
summary: str
created_at: str
updated_at: str
commits: list[dict]
skill_title: Optional[str]
skill_content: Optional[str]
skill_label: Optional[str]


class SkillsDB:
"""Lightweight SQLite helper for skills/feature clustering data."""

def __init__(self, db_path: Path):
self.db_path = Path(db_path)
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._ensure_schema()

def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(str(self.db_path))
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys = ON")
return conn

def _ensure_schema(self) -> None:
with self._connect() as conn:
conn.executescript(
"""
CREATE TABLE IF NOT EXISTS features (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
summary TEXT NOT NULL,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);

CREATE TABLE IF NOT EXISTS feature_commits (
feature_id INTEGER NOT NULL,
commit_hash TEXT NOT NULL,
commit_short TEXT NOT NULL,
PRIMARY KEY (feature_id, commit_hash),
FOREIGN KEY(feature_id) REFERENCES features(id) ON DELETE CASCADE
);

CREATE TABLE IF NOT EXISTS skills (
feature_id INTEGER PRIMARY KEY,
title TEXT,
content TEXT,
label TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
FOREIGN KEY(feature_id) REFERENCES features(id) ON DELETE CASCADE
);
"""
)
# Backfill schema if label column is missing
cols = [row["name"] for row in conn.execute("PRAGMA table_info(skills)")]
if "label" not in cols:
conn.execute("ALTER TABLE skills ADD COLUMN label TEXT")

def reset(self) -> None:
with self._connect() as conn:
conn.executescript(
"""
DELETE FROM skills;
DELETE FROM feature_commits;
DELETE FROM features;
"""
)

def insert_feature(self, name: str, summary: str) -> int:
now = datetime.utcnow().isoformat()
with self._connect() as conn:
cur = conn.execute(
"""
INSERT INTO features (name, summary, created_at, updated_at)
VALUES (?, ?, ?, ?)
""",
(name, summary, now, now),
)
return int(cur.lastrowid)

def set_feature_commits(self, feature_id: int, commits: Iterable[dict]) -> None:
with self._connect() as conn:
conn.execute("DELETE FROM feature_commits WHERE feature_id = ?", (feature_id,))
conn.executemany(
"""
INSERT INTO feature_commits (feature_id, commit_hash, commit_short)
VALUES (?, ?, ?)
""",
[(feature_id, c["commit_hash"], c["commit_short"]) for c in commits],
)

def set_skill_doc(self, feature_id: int, title: str, content: str, label: str) -> None:
now = datetime.utcnow().isoformat()
with self._connect() as conn:
conn.execute(
"""
INSERT INTO skills (feature_id, title, content, label, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(feature_id) DO UPDATE SET
title = excluded.title,
content = excluded.content,
label = excluded.label,
updated_at = excluded.updated_at
""",
(feature_id, title, content, label, now, now),
)

def get_features(self) -> list[SkillFeature]:
with self._connect() as conn:
features_rows = conn.execute(
"""
SELECT id, name, summary, created_at, updated_at
FROM features
ORDER BY id ASC
"""
).fetchall()

commits_rows = conn.execute(
"""
SELECT feature_id, commit_hash, commit_short
FROM feature_commits
ORDER BY commit_short ASC
"""
).fetchall()

skills_rows = conn.execute(
"""
SELECT feature_id, title, content, label
FROM skills
"""
).fetchall()

commits_by_feature: dict[int, list[dict]] = {}
for row in commits_rows:
commits_by_feature.setdefault(int(row["feature_id"]), []).append(
{
"commit_hash": row["commit_hash"],
"commit_short": row["commit_short"],
}
)

skills_by_feature = {
int(row["feature_id"]): {
"title": row["title"],
"content": row["content"],
"label": row["label"],
}
for row in skills_rows
}

features: list[SkillFeature] = []
for row in features_rows:
feature_id = int(row["id"])
skill = skills_by_feature.get(feature_id, {})
features.append(
SkillFeature(
feature_id=feature_id,
name=row["name"],
summary=row["summary"],
created_at=row["created_at"],
updated_at=row["updated_at"],
commits=commits_by_feature.get(feature_id, []),
skill_title=skill.get("title"),
skill_content=skill.get("content"),
skill_label=skill.get("label"),
)
)
return features
Loading