From 62d0bf1ddedfd671208a6190ce067d5e644edc39 Mon Sep 17 00:00:00 2001 From: Heiko Braun Date: Sat, 14 Mar 2026 14:03:46 +0100 Subject: [PATCH 1/6] Add FTS5 project search with dual-backend indexing and query Implement full-text search over the project tree using SQLite FTS5 with two backends: porter-stemmed for natural language and trigram for substring matching. Includes incremental indexing (mtime fast-path + xxh3 hashing), query classification/routing, BM25 score merging, and CLI commands (draft index, draft search). Integrates into /spec, /refine, and /implement skills for automatic indexing and context retrieval. Co-Authored-By: Claude Opus 4.6 --- .claude/commands/implement.md | 10 +- .claude/commands/refine.md | 13 +- .claude/commands/spec.md | 10 + .../templates/.claude/commands/implement.md | 99 +++ .../templates/.claude/commands/refine.md | 74 ++ cmd/draft/templates/.claude/commands/spec.md | 92 +++ .../.cursor/skills/implement/SKILL.md | 10 +- .../templates/.cursor/skills/refine/SKILL.md | 13 +- .../templates/.cursor/skills/spec/SKILL.md | 10 + docs/project-search.md | 638 ++++++++++++++++++ go.mod | 5 + go.sum | 20 + internal/cli/index.go | 127 ++++ internal/cli/root.go | 2 + internal/cli/search.go | 113 ++++ internal/search/indexer.go | 352 ++++++++++ internal/search/indexer_test.go | 349 ++++++++++ internal/search/project.go | 107 +++ internal/search/project_test.go | 118 ++++ internal/search/searcher.go | 271 ++++++++ internal/search/searcher_test.go | 214 ++++++ internal/search/store.go | 318 +++++++++ internal/search/store_test.go | 277 ++++++++ specs/search-indexer.md | 60 ++ specs/search-integration.md | 49 ++ specs/search-query.md | 57 ++ specs/search-store.md | 54 ++ 27 files changed, 3458 insertions(+), 4 deletions(-) create mode 100644 cmd/draft/templates/.claude/commands/implement.md create mode 100644 cmd/draft/templates/.claude/commands/refine.md create mode 100644 cmd/draft/templates/.claude/commands/spec.md create mode 100644 docs/project-search.md create mode 100644 internal/cli/index.go create mode 100644 internal/cli/search.go create mode 100644 internal/search/indexer.go create mode 100644 internal/search/indexer_test.go create mode 100644 internal/search/project.go create mode 100644 internal/search/project_test.go create mode 100644 internal/search/searcher.go create mode 100644 internal/search/searcher_test.go create mode 100644 internal/search/store.go create mode 100644 internal/search/store_test.go create mode 100644 specs/search-indexer.md create mode 100644 specs/search-integration.md create mode 100644 specs/search-query.md create mode 100644 specs/search-store.md diff --git a/.claude/commands/implement.md b/.claude/commands/implement.md index 4664d78..acf2302 100644 --- a/.claude/commands/implement.md +++ b/.claude/commands/implement.md @@ -16,10 +16,18 @@ You implement features as small, complete vertical slices with continuous testin ## Workflow -### 1. Load Spec & Assess Blast Radius +### 1. Load Spec & Search for Context Read the relevant `/specs/{feature}.md` file. If multiple specs exist and it's unclear which one, ask the user. +**Search for related code**: Before writing any code, use the spec's title and key terms to search the codebase for relevant existing code: + +```bash +draft search "" --limit 10 +``` + +Review the search results to understand existing patterns, related modules, and potential conflicts. Use these results to inform your implementation approach. + Before writing any code, assess the change: - **Which modules/files will this touch?** List them. If the spec has an "Affected Modules" section, verify it's still accurate. diff --git a/.claude/commands/refine.md b/.claude/commands/refine.md index 3536f83..00ec123 100644 --- a/.claude/commands/refine.md +++ b/.claude/commands/refine.md @@ -57,7 +57,18 @@ Refine an existing specification based on new insights, feedback, or changing re **Refinement 2026-01-25**: Updated approach to use WebSocket instead of polling based on performance testing results. Added new acceptance criterion for connection handling. Blast radius unchanged — change is contained within the `transport` module. ``` -Remember: +## Update Search Index + +After saving the refined spec, update the project search index: + +```bash +draft index +``` + +This runs incrementally (~50ms). Do not show the output to the user unless it fails. + +## Reminders + - Keep refinements focused and minimal - Preserve the spec's history through notes - Suggest new specs for major scope changes diff --git a/.claude/commands/spec.md b/.claude/commands/spec.md index 5fc36fd..8698e27 100644 --- a/.claude/commands/spec.md +++ b/.claude/commands/spec.md @@ -77,6 +77,16 @@ Present the spec summary and ask: "Does this capture what you want? I'll impleme If the user wants changes, revise the spec and confirm again. +### Phase 4: Update Search Index + +After writing the spec file, update the project search index so the new spec is immediately discoverable: + +```bash +draft index +``` + +This runs incrementally (~50ms) and only re-indexes the changed file. Do not show the output to the user unless it fails. + ## Reference See `/specs/TEMPLATE.md` for the spec file format. diff --git a/cmd/draft/templates/.claude/commands/implement.md b/cmd/draft/templates/.claude/commands/implement.md new file mode 100644 index 0000000..acf2302 --- /dev/null +++ b/cmd/draft/templates/.claude/commands/implement.md @@ -0,0 +1,99 @@ +--- +name: implement +description: Implement features with phase checkpoints. Use after a spec exists in .claude/specs/ or when implementing a confirmed specification. +allowed-tools: Read, Write, Edit, Bash, Glob, Grep, TodoWrite, AskUserQuestion +--- + +# Implementation + +You implement features as small, complete vertical slices with continuous testing. + +## When to Activate + +- After the spec skill has created and confirmed a specification +- When user says "implement" and a spec file exists +- When user explicitly references a spec file + +## Workflow + +### 1. Load Spec & Search for Context + +Read the relevant `/specs/{feature}.md` file. If multiple specs exist and it's unclear which one, ask the user. + +**Search for related code**: Before writing any code, use the spec's title and key terms to search the codebase for relevant existing code: + +```bash +draft search "" --limit 10 +``` + +Review the search results to understand existing patterns, related modules, and potential conflicts. Use these results to inform your implementation approach. + +Before writing any code, assess the change: + +- **Which modules/files will this touch?** List them. If the spec has an "Affected Modules" section, verify it's still accurate. +- **Are we modifying shared code?** Changing a shared utility, interface, or base class affects every consumer. Flag this to the user. +- **Can we contain the change?** Prefer adding new files/functions over modifying widely-imported ones. Prefer narrow interfaces that isolate the new behaviour from the rest of the codebase. +- **Are we adding new dependencies?** Each dependency is a coupling point. Avoid unless clearly justified. + +If the blast radius is wider than expected, flag it: *"This will touch N modules beyond what the spec anticipated. Want to proceed or restructure?"* + + +### 2. Implement + +Implement the spec as **one integrated piece** — types, logic, wiring, and tests together. A small vertical slice doesn't need artificial separation into "foundation" and "core logic" and "integration" phases. + +Important: Follow the design principles outlined in .principles/design-principles.md + +Use TodoWrite to track progress against the spec's acceptance criteria. + +**Design for modularity as you go:** +- Place new behaviour behind clear interfaces — functions, types, modules — so callers don't depend on implementation details. +- Avoid reaching into the internals of other modules. If you need something, use or extend its public interface. +- Keep new files/functions narrowly focused. One responsibility per unit. +- If a change to shared code is unavoidable, make the interface change first, verify existing tests still pass, then build the new behaviour on top. + +**Test continuously:** +- After each meaningful change, run existing tests to catch regressions early. +- Write tests for new behaviour as you implement it, not after. +- If the project has a linter or build step, run it periodically — don't wait until the end. + +**Follow design principles:** +- + +### 3. Verify & Complete + +Before marking the feature complete: + +1. Run the **full test suite** — not just new tests. +2. Run the **build/linter** if the project has one. +3. Re-read the spec's acceptance criteria and check each one explicitly. +4. Report: "Criterion met" or "Needs attention: {issue}" for each. + +Only mark complete when all criteria pass and the build is green. + +After successful implementation: +- **Update spec status** from `proposed` to `implemented` +- Check off completed acceptance criteria in the spec file +- Add any notes about implementation decisions + +## Checkpoint Behaviour + +Since each spec is a small vertical slice, heavy checkpointing is unnecessary. + +**Do checkpoint** (pause and ask the user) when: +- The blast radius turns out wider than expected +- You face a design decision with trade-offs the user should weigh +- A test failure reveals a deeper issue that changes the approach + +**Skip checkpoint** for: +- Normal forward progress within the spec +- Minor follow-up fixes +- Formatting/cleanup +- When user explicitly says "continue without asking" + +## Recovery + +If implementation is interrupted: +- TodoWrite preserves progress +- Spec file shows which criteria are done +- User can say "continue implementing {feature}" to resume diff --git a/cmd/draft/templates/.claude/commands/refine.md b/cmd/draft/templates/.claude/commands/refine.md new file mode 100644 index 0000000..00ec123 --- /dev/null +++ b/cmd/draft/templates/.claude/commands/refine.md @@ -0,0 +1,74 @@ +--- +description: Refines existing specs +--- + +# Refine Existing Spec + +Refine an existing specification based on new insights, feedback, or changing requirements. + +**Feature to refine:** $ARGUMENTS + +## Instructions + +1. **Load the existing spec** from `/specs/{feature}.md` + - If no spec exists for this feature, suggest using `/spec` instead + - If multiple specs match, ask the user which one to refine + +2. **Ask 2-3 focused refinement questions** (one at a time): + - What aspect needs refinement? (goals, criteria, approach, scope) + - What new information or feedback has emerged? + - Are there specific pain points with the current spec? + +3. **Check scope and modularity:** + - Does the refinement keep the spec small enough for a single vertical slice? + - If criteria are being added, does the total still stay at ~5 or fewer? + - Does the refinement change which modules are affected? Update the "Affected Modules" section. + - If the refinement significantly expands scope or blast radius, suggest a separate spec instead. + +4. **Update the spec in place**: + - **Preserve front-matter**: Keep all existing front-matter fields (title, description, author). Keep `status: proposed` (refinements don't change status) + - Preserve completed acceptance criteria checkboxes + - Update goals, criteria, or approach as needed + - Update "Affected Modules" and "Test Strategy" if the changes alter them + - Add to "Out of Scope" if removing features + - Add refinement notes to the "Notes" section with timestamp + +5. **Show a diff summary**: + - Highlight what changed (goals, new criteria, removed items, affected modules, etc.) + - Ask for confirmation before saving + +6. **Get user confirmation** before proceeding to implementation + - If confirmed, use the **implement** skill with the refined spec + - If not, ask if they want to refine further + +## Refinement Guidelines + +- **Preserve progress**: Don't uncheck completed criteria unless they're no longer valid +- **Be additive when possible**: Add new criteria rather than rewriting existing ones +- **Document changes**: Always add a timestamped note explaining what was refined and why +- **Validate scope**: Check if refinements are expanding scope significantly - if so, suggest a new spec +- **Validate modularity**: If the refinement introduces new module dependencies or widens the blast radius, flag it explicitly + +## Example Refinement Note + +```markdown +## Notes + +**Refinement 2026-01-25**: Updated approach to use WebSocket instead of polling based on performance testing results. Added new acceptance criterion for connection handling. Blast radius unchanged — change is contained within the `transport` module. +``` + +## Update Search Index + +After saving the refined spec, update the project search index: + +```bash +draft index +``` + +This runs incrementally (~50ms). Do not show the output to the user unless it fails. + +## Reminders + +- Keep refinements focused and minimal +- Preserve the spec's history through notes +- Suggest new specs for major scope changes diff --git a/cmd/draft/templates/.claude/commands/spec.md b/cmd/draft/templates/.claude/commands/spec.md new file mode 100644 index 0000000..8698e27 --- /dev/null +++ b/cmd/draft/templates/.claude/commands/spec.md @@ -0,0 +1,92 @@ +--- +name: spec +description: Create a specification before implementing features. Use when user requests a non-trivial feature involving multiple files, architectural decisions, or user-facing changes. +allowed-tools: Read, Write, AskUserQuestion, TodoWrite +--- + +# Specification Generator + +You help users clarify requirements and create lightweight specifications before implementation begins. + +Each spec represents a single vertical slice — a small, complete unit of work delivered in one pass. Specs are not partial deliveries or large application blueprints. + +## When to Activate + +Automatically engage when the user requests a feature that involves: +- Multiple files or components +- Architectural decisions +- User-facing changes +- Integration with external systems + +Do NOT activate for: +- Simple bug fixes with obvious solutions +- Single-line changes +- Documentation updates +- Dependency updates + +## Workflow + +### Phase 1: Clarify (3-5 questions max) + +Ask questions ONE AT A TIME. Do not batch multiple questions. Wait for each answer before proceeding. + +Suggested questions (adapt to context): +1. What problem does this solve? Who benefits? +2. What's the simplest version that would be useful? +3. Any constraints? (performance, compatibility, existing patterns to follow) +4. What should explicitly be OUT of scope? +5. **Modularity**: What existing modules or interfaces will this touch? Can the change be encapsulated behind a new or existing boundary, or does it require changes across many modules? + +Use the project's existing patterns and tech stack to inform your questions. Reference specific files when relevant. + +### Phase 1.5: Scope Check + +Before drafting, assess whether the feature is small enough for a single spec: + +- **More than 5 acceptance criteria?** Likely too big — suggest splitting into multiple specs. +- **Touches many unrelated modules?** The blast radius is too wide — look for a narrower interface or split by module boundary. +- **Cannot be described in 2-3 sentences of approach?** The feature may need decomposition. + +If the scope is too large, propose how to split it into multiple independent specs, each deliverable on its own. + +### Phase 2: Draft Spec + +Write a brief spec to `/specs/{feature-name}.md` using the template in `/specs/TEMPLATE.md`. + +**Front-matter**: Include YAML front-matter at the top with: +- `title`: Feature name (extracted from user discussion) +- `description`: One-line summary (extracted from user discussion) +- `status: proposed` +- `author`: Get from git config using `git config user.name` and `git config user.email` in format "Name " + +**Writing style**: Write in compressed, direct prose. No full sentences where a phrase will do. Omit articles, filler words, and transitional language. Each bullet or sentence should carry new information — no restating the goal, no summarizing what was already said. Aim for the minimum words that preserve meaning. + +Keep content concise: +- Goal: 1-2 sentences +- Acceptance Criteria: 3-5 checkboxes +- Approach: 2-3 sentences +- Affected Modules: list which modules/files change and where the boundary is +- Test Strategy: how criteria will be verified +- Out of Scope: bullet list + +### Phase 3: Confirm + +Present the spec summary and ask: "Does this capture what you want? I'll implement once confirmed." + +**Only proceed to implementation after explicit approval.** + +If the user wants changes, revise the spec and confirm again. + +### Phase 4: Update Search Index + +After writing the spec file, update the project search index so the new spec is immediately discoverable: + +```bash +draft index +``` + +This runs incrementally (~50ms) and only re-indexes the changed file. Do not show the output to the user unless it fails. + +## Reference + +See `/specs/TEMPLATE.md` for the spec file format. diff --git a/cmd/draft/templates/.cursor/skills/implement/SKILL.md b/cmd/draft/templates/.cursor/skills/implement/SKILL.md index 7e6c052..f269e45 100644 --- a/cmd/draft/templates/.cursor/skills/implement/SKILL.md +++ b/cmd/draft/templates/.cursor/skills/implement/SKILL.md @@ -15,10 +15,18 @@ You implement features as small, complete vertical slices with continuous testin ## Workflow -### 1. Load Spec & Assess Blast Radius +### 1. Load Spec & Search for Context Read the relevant `/specs/{feature}.md` file. If multiple specs exist and it's unclear which one, ask the user. +**Search for related code**: Before writing any code, use the spec's title and key terms to search the codebase for relevant existing code: + +```bash +draft search "" --limit 10 +``` + +Review the search results to understand existing patterns, related modules, and potential conflicts. Use these results to inform your implementation approach. + Before writing any code, assess the change: - **Which modules/files will this touch?** List them. If the spec has an "Affected Modules" section, verify it's still accurate. diff --git a/cmd/draft/templates/.cursor/skills/refine/SKILL.md b/cmd/draft/templates/.cursor/skills/refine/SKILL.md index 94a9499..137a091 100644 --- a/cmd/draft/templates/.cursor/skills/refine/SKILL.md +++ b/cmd/draft/templates/.cursor/skills/refine/SKILL.md @@ -58,7 +58,18 @@ Refine an existing specification based on new insights, feedback, or changing re **Refinement 2026-01-25**: Updated approach to use WebSocket instead of polling based on performance testing results. Added new acceptance criterion for connection handling. Blast radius unchanged — change is contained within the `transport` module. ``` -Remember: +## Update Search Index + +After saving the refined spec, update the project search index: + +```bash +draft index +``` + +This runs incrementally (~50ms). Do not show the output to the user unless it fails. + +## Reminders + - Keep refinements focused and minimal - Preserve the spec's history through notes - Suggest new specs for major scope changes diff --git a/cmd/draft/templates/.cursor/skills/spec/SKILL.md b/cmd/draft/templates/.cursor/skills/spec/SKILL.md index a416c8e..60e5489 100644 --- a/cmd/draft/templates/.cursor/skills/spec/SKILL.md +++ b/cmd/draft/templates/.cursor/skills/spec/SKILL.md @@ -76,6 +76,16 @@ Present the spec summary and ask: "Does this capture what you want? I'll impleme If the user wants changes, revise the spec and confirm again. +### Phase 4: Update Search Index + +After writing the spec file, update the project search index so the new spec is immediately discoverable: + +```bash +draft index +``` + +This runs incrementally (~50ms) and only re-indexes the changed file. Do not show the output to the user unless it fails. + ## Reference See `/specs/TEMPLATE.md` for the spec file format. diff --git a/docs/project-search.md b/docs/project-search.md new file mode 100644 index 0000000..ee8bc54 --- /dev/null +++ b/docs/project-search.md @@ -0,0 +1,638 @@ +--- +title: Project Search (MVP) +description: FTS5 + trigram indexed search over the full codebase, integrated into existing draft commands +status: proposed +author: heiko-braun +--- + +# Feature: project-search + +## Goal + +Give agents fast, ranked, token-efficient search over the entire project. +Index everything — source, specs, docs, config — and wire it into the +commands agents already use: `draft search` for direct queries, automatic +indexing after `/spec` and `/refine`, automatic search during `/implement`. + +## Motivation + +Agents exploring a project rely on ripgrep or file reads. This returns +unranked, full-line matches and wastes tokens on irrelevant context. +An FTS5 index with BM25 ranking surfaces the most relevant hits first, +keeps responses compact, and enables natural language queries like +"where is rate limiting handled?" that grep can't answer. + +Two specific pain points this solves: + +1. **During implementation**: The agent needs to understand existing code + before writing new code. Today that's a guess-and-grep loop. With an + index, `/implement` can automatically pull relevant context from the + codebase before the agent starts writing. + +2. **During spec authoring**: Before writing a new spec, agents should + check for overlap, conflicts, or prior decisions. With an index, + `/spec` can surface related specs and source files as context. + +## Acceptance Criteria + +- [ ] `draft index` builds/updates a SQLite database with two FTS5 indexes (porter-stemmed + trigram) from the full project tree +- [ ] Indexing is incremental: only files whose content has changed are re-indexed +- [ ] `draft search ` returns ranked results with file path, line range, and snippet +- [ ] Search uses two FTS5 backends (porter-stemmed for natural language, trigram for substrings) with weighted score merging +- [ ] `.gitignore` patterns are respected; binary files and the index itself are skipped +- [ ] `/spec` and `/refine` trigger `draft index` after writing the spec file +- [ ] `/implement` runs `draft search` with the spec's title and key terms before generating code +- [ ] The index covers the full project tree, not just specs or source +- [ ] The index is stored outside the project tree (cache directory, keyed by project path) +- [ ] First full index of a ~10k LOC project completes in under 2 seconds +- [ ] No MCP server required — agents invoke `draft` CLI directly + +## Non-Goals (for MVP) + +- Symbol extraction, outline, or reference tools +- Semantic or vector search +- File watching or background re-indexing +- MCP server exposure (agents call `draft index` and `draft search` via bash) + +## Approach + +### Architecture + +Two components: an indexer and a searcher, both callable from the CLI +and from within other draft commands. + +``` +┌─────────────────────────────────────┐ +│ Existing Commands │ +│ │ +│ /spec ──── index after write │ +│ /refine ── index after write │ +│ /implement ── search before gen │ +└──────────────┬──────────────────────┘ + │ calls +┌──────────────▼──────────────────────┐ +│ CLI Commands │ +│ draft index [--force] │ +│ draft search [--limit] │ +│ draft search --status │ +└──────────────┬──────────────────────┘ + │ uses +┌──────────────▼──────────────────────┐ +│ Search Library │ +│ │ +│ Indexer: walk, hash, upsert │ +│ ┌────────────┐ ┌───────────────┐ │ +│ │ fts │ │ fts_trigram │ │ +│ │ porter │ │ trigram │ │ +│ │ stemming │ │ substrings │ │ +│ │ BM25 rank │ │ BM25 rank │ │ +│ └─────┬──────┘ └──────┬────────┘ │ +│ └───────┬─────────┘ │ +│ Score Merger │ +│ (weighted combination + dedup) │ +│ │ +│ Store: SQLite via mattn/go-sqlite3 │ +└─────────────────────────────────────┘ +``` + +### Database Schema + +Single SQLite database with an index metadata table, a file tracking +table, and two FTS5 virtual tables (one for natural language, one for +substring matching): + +```sql +-- Index metadata: stores the project root so --prune and --list +-- can map database files back to their projects without reversing +-- the hash +CREATE TABLE index_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); +-- Populated on creation: +-- ('project_root', '/Users/heiko/src/draft') +-- ('created_at', '2026-03-14T10:23:41Z') +-- ('schema_version', '1') + +-- Track indexed files for incremental updates +CREATE TABLE files ( + id INTEGER PRIMARY KEY, + path TEXT UNIQUE NOT NULL, + hash TEXT NOT NULL, -- xxh3 content hash (hex) + mtime INTEGER NOT NULL, -- unix timestamp, fast-path skip + indexed INTEGER NOT NULL -- unix timestamp of last index +); + +-- Full-text index: natural language queries, BM25 ranking +-- Porter stemming so "handling" matches "handler" +CREATE VIRTUAL TABLE fts USING fts5( + path, + content, + content_rowid, -- links to files.id + tokenize = 'porter unicode61' +); + +-- Trigram index: substring and pattern matching +-- Contentless to avoid storing file content twice +-- detail='none' for minimal index footprint +CREATE VIRTUAL TABLE fts_trigram USING fts5( + path, + content, + content = '', -- contentless: no duplicate storage + content_rowid = id, -- points to files.id for joins + tokenize = 'trigram', + detail = 'none' +); +``` + +**Why two FTS5 tables:** + +- `fts` uses Porter stemming to handle natural language queries + ("authentication middleware", "rate limiting"). Stemming conflates + word forms, which is good for recall but can't do exact substring + matching. +- `fts_trigram` uses FTS5's built-in trigram tokenizer to handle + substring queries ("CfgLoader", "thHandler"), LIKE patterns, and + partial identifiers. No custom trigram code needed — FTS5 handles + trigram decomposition, candidate intersection, and false-positive + filtering internally. + +Both are populated from the same file content during indexing — one +INSERT into `fts`, one INSERT into `fts_trigram` per file. The +`content=''` option on `fts_trigram` means the trigram index stores +only the inverted index, not a second copy of the text. The +`detail='none'` option omits per-token position data, roughly halving +trigram index storage with no impact on query performance. + +One index for everything. No scope columns, no type distinctions. +Specs, source, docs, config — it all goes in. The agent's query +determines what's relevant, not a pre-assigned category. + +### Incremental Indexing + +``` +1. Walk project tree (respecting .gitignore) +2. For each file: + a. stat() for mtime — if unchanged vs files table, skip (fast path) + b. If mtime differs, read file, compute xxh3(content) + c. If hash matches stored hash, update mtime only (no re-index) + d. If hash differs: + - Delete old rows from fts and fts_trigram for this file_id + - Insert new row into fts (path, content) + - Insert new row into fts_trigram (path, content) + - Update files row with new hash +3. Delete rows for paths no longer on disk (files + both FTS5 tables) +4. PRAGMA optimize +``` + +mtime as fast path, xxh3 as truth. The common case (nothing changed) +touches no file content at all. + +Indexing a file is two INSERTs into FTS5 virtual tables — no custom +trigram extraction, no bulk-insert of trigram rows. FTS5 handles +tokenization internally for both tables. The `fts_trigram` table is +contentless (`content=''`), so the second INSERT stores only the +inverted trigram index, not a second copy of the file content. + +### File Filtering + +| Rule | Action | +|-------------------------------|---------| +| Matches `.gitignore` | Skip | +| Binary file (null bytes) | Skip | +| File > 1 MB | Skip | +| `.git/`, `node_modules/`, etc | Skip | +| Index database itself | Skip | +| Everything else | Index | + +Binary detection: read the first 8192 bytes, check for null bytes. +Simple, fast, and correct for the vast majority of cases. + +### Search + +```bash +draft search "authentication middleware" --limit 10 +``` + +#### Query Routing + +The searcher classifies each query and routes it to the appropriate +backend(s): + +| Query shape | Example | Backend | +|--------------------------------|--------------------------|--------------------| +| Natural language (words) | `authentication flow` | `fts` only | +| Substring / partial identifier | `CfgLoader` | `fts_trigram` only | +| Mixed / ambiguous | `AuthHandler` | Both → merge | + +Classification heuristic: if the query contains spaces or common +English words, route to FTS5. If it looks like a code identifier +(camelCase, snake_case, no spaces, contains uppercase mid-word), +route to trigram. When uncertain, run both and merge. + +#### `fts` Backend (Natural Language) + +```sql +SELECT f.path, + snippet(fts, 1, '»', '«', '…', 32) as snippet, + bm25(fts, 5.0, 1.0) as score +FROM fts +JOIN files f ON f.id = fts.rowid +WHERE fts MATCH 'authentication middleware' +ORDER BY score +LIMIT ?; +``` + +The `bm25(fts, 5.0, 1.0)` weights path matches 5x higher than content +matches. A file named `auth_middleware.go` should rank above a file +that mentions authentication in a comment. + +#### `fts_trigram` Backend (Substring) + +For substring queries, FTS5's trigram tokenizer handles everything +internally — decomposition, candidate matching, and false-positive +filtering: + +```sql +-- Substring match: finds "CfgLoader" anywhere in file content +SELECT f.path, + bm25(fts_trigram, 5.0, 1.0) as score +FROM fts_trigram +JOIN files f ON f.id = fts_trigram.rowid +WHERE fts_trigram MATCH 'CfgLoader' +ORDER BY score +LIMIT ?; +``` + +The trigram table also supports LIKE and GLOB patterns directly, +which is useful for wildcard searches: + +```sql +-- Pattern match via trigram index +SELECT f.path +FROM fts_trigram +JOIN files f ON f.id = fts_trigram.rowid +WHERE content LIKE '%AuthMiddle%'; +``` + +No custom trigram extraction, no IDF scoring, no verification step. +FTS5 does it all. The `content=''` option means the trigram table +doesn't store file content, but FTS5 still indexes the trigrams at +insert time and uses them for matching. + +Note: because `fts_trigram` is contentless, `snippet()` and +`highlight()` are not available on it. Snippets come from the `fts` +table or from reading the file directly. + +#### Score Merging + +When both backends run, results are merged by file path (deduped) +with weighted scores: + +``` +final_score = (w_fts × norm(fts_score)) + (w_tri × norm(trigram_score)) +``` + +Default weights: `w_fts = 0.6`, `w_tri = 0.4`. FTS5 gets more weight +because Porter-stemmed BM25 is a stronger relevance signal for most +queries. Trigram scores fill in the gaps for substring matches that +FTS5 misses entirely. + +Both backends now produce BM25 scores, so normalization is +straightforward — min-max normalize each result set to [0, 1] before +combining. + +#### Output Format + +Token-efficient, one result per block: + +``` +src/auth/middleware.go:42-58 (score: 0.87) + …validates the »authentication middleware« chain before… + +specs/auth-flow.md:1-15 (score: 0.74) + …describes the »authentication middleware« integration… + +config/routes.yaml:12-14 (score: 0.31) + …mounts »authentication« handler on /api… +``` + +### Command Integration + +#### After `/spec` and `/refine` + +When a spec file is written or updated, trigger an incremental index. +This is lightweight — only the changed spec file will be re-indexed +(hash changed), everything else hits the mtime fast path and skips. + +``` +/spec + └── writes specs/{slug}.md + └── runs: draft index (incremental, ~50ms) +``` + +The agent doesn't see this. It's a side effect of the write. + +#### Before `/implement` + +When implementation starts, the spec content is used to generate +search queries. The implement skill: + +1. Reads the spec's title and acceptance criteria +2. Extracts key terms (nouns, technical terms) +3. Runs `draft search` with those terms +4. Includes the top results as context for the implementing agent + +``` +/implement auth-flow + └── reads specs/auth-flow.md + └── extracts: "authentication", "OAuth", "token refresh" + └── runs: draft search "authentication OAuth token refresh" --limit 10 + └── top results added to agent context +``` + +This replaces the agent's manual grep exploration with a single +ranked search that gives it the most relevant existing code upfront. + +### Index Management + +draft maintains one SQLite database per project. The index is stored +outside the project tree so it never appears in version control, never +interferes with builds, and survives project directory renames as long +as the absolute path doesn't change. + +#### Project Identification + +Each project is identified by the **resolved** absolute path to its +root directory. Symlinks are resolved before hashing, so the same +project accessed via different symlinks always maps to the same index. + +The root is determined by walking upward from the current directory +looking for a `.draft/` directory or a `CLAUDE.md` file — the same +heuristic draft already uses for project detection. If neither is +found, the current working directory is the root. + +The index filename is derived from the project root: + +``` +index_path = /draft/.db +``` + +For example, `/Users/heiko/src/draft` might hash to `a3f7c1d2e9b04856`, +producing `~/Library/Caches/draft/a3f7c1d2e9b04856.db`. + +#### Schema Versioning + +The `index_meta` table stores a `schema_version` key. On startup, +draft compares the stored version against its expected version: + +- **Match**: proceed normally. +- **Older version**: run migration (alter tables, rebuild if needed). +- **Newer version** (downgrade): refuse to open, print error suggesting + `draft index --force` to rebuild. +- **Missing or corrupt**: treat as first run, create fresh. + +#### Storage Location + +Platform-appropriate cache directories, following OS conventions: + +| Platform | Base directory | Example | +|----------|----------------------------------------|------------------------------------------------------| +| macOS | `~/Library/Caches/draft/` | `~/Library/Caches/draft/a3f7c1d2e9b04856.db` | +| Linux | `${XDG_CACHE_HOME:-~/.cache}/draft/` | `~/.cache/draft/a3f7c1d2e9b04856.db` | + +The cache directory is created on first `draft index` if it doesn't +exist. The `--db` flag overrides the computed path to a specific file, +useful for CI or testing where the cache directory may not be writable. + +#### Index Lifecycle + +``` +┌──────────────┐ ┌──────────────┐ ┌──────────────┐ +│ No index │────▶│ Created │────▶│ Current │ +│ (first run) │ │ (full scan) │ │ (in sync) │ +└──────────────┘ └──────────────┘ └──────┬───────┘ + │ + file changed ──────────┤ + file deleted ──────────┤ + file added ──────────┤ + ▼ + ┌──────────────┐ + │ Stale │ + │ (drift) │ + └──────┬───────┘ + │ + draft index │ + ▼ + ┌──────────────┐ + │ Current │ + │ (in sync) │ + └──────────────┘ +``` + +**Creation**: On first `draft index`, the database file and schema are +created, and a full scan of the project tree populates both FTS5 tables +and the `files` metadata table. This is the slowest operation — typically +1-2 seconds for a 10k LOC project. + +**Incremental update**: Subsequent `draft index` runs walk the tree, +compare mtimes and content hashes, and only re-index changed files. +For a project with no changes this takes ~50ms (stat calls only, no +file reads). + +**Forced rebuild**: `draft index --force` drops and recreates all tables, +then does a full scan. Use when the index is suspected to be corrupt or +after significant project restructuring (e.g., large merge, directory +renames). + +**Staleness**: There is no automatic invalidation. The index becomes +stale whenever files change outside of draft's workflow (e.g., manual +edits, `git pull`, IDE refactors). This is acceptable because: +- `/spec` and `/refine` re-index after writing +- `/implement` can re-index before searching if the index is old +- The agent can always run `draft index` explicitly + +#### Status and Diagnostics + +`draft search --status` reports the current index state: + +``` +Index: ~/Library/Caches/draft/a3f7c1d2e9b04856.db +Project: /Users/heiko/src/draft +Files indexed: 347 +Last indexed: 2026-03-14 10:23:41 (12 minutes ago) +Database size: 2.1 MB +``` + +This gives the agent (or human) a quick check on whether the index +is fresh enough to trust, without having to understand where the +database lives. + +#### Multiple Projects + +A developer working on several projects gets one database file per +project, all living in the same cache directory: + +``` +~/.cache/draft/ +├── a3f7c1d2e9b04856.db # /home/heiko/src/draft +├── 7e2b9f1c4a6d8032.db # /home/heiko/src/other-project +└── d1c5a8f3b7e24690.db # /home/heiko/src/client-work +``` + +No registry, no config file, no global state. The mapping from project +to index is pure function: `path → hash → filename`. If the project +is deleted, the index file becomes orphaned but harmless. + +#### Cleanup + +Orphaned index files (for projects that no longer exist) can be cleaned +up with `draft index --prune`. This scans the cache directory, reads +the `project_root` from each database's `index_meta` table, checks +whether that path still exists on disk, and deletes indexes whose +projects are gone. This is a manual, opt-in operation — not automatic — +because a project directory being temporarily unmounted or on a +different branch shouldn't trigger deletion. + +`draft index --list` shows all known indexes: + +``` +PATH FILES SIZE LAST INDEXED +/Users/heiko/src/draft 347 2.1 MB 12 min ago +/Users/heiko/src/other-project 892 5.7 MB 3 days ago +/Users/heiko/src/client-work 1204 8.3 MB 2 weeks ago +``` + +This reads `index_meta` from each `.db` file in the cache directory. +No global registry is needed. + +### Key Dependencies + +| Dependency | Purpose | +|---------------------------|----------------------------------| +| `mattn/go-sqlite3` | SQLite with FTS5 (CGo, bundled) | +| `zeebo/xxh3` | Fast content hashing | +| `go-git/go-git` (ignore) | .gitignore pattern matching | + +Build tag `fts5` required for `mattn/go-sqlite3` to enable FTS5. + +## Affected Modules + +- `cmd/index.go` — new `draft index` command (including `--force`, `--prune`, `--list`) +- `cmd/search.go` — new `draft search` command (including `--status`) +- `internal/search/project.go` — project root detection, path-to-hash mapping, cache directory resolution +- `internal/search/indexer.go` — file walker, hasher, dual FTS5 upsert logic +- `internal/search/searcher.go` — query classification, backend dispatch, score merging +- `internal/search/store.go` — SQLite connection, schema creation/migration, forced rebuild +- `skills/spec.md` — add post-write hook: run `draft index` +- `skills/refine.md` — add post-write hook: run `draft index` +- `skills/implement.md` — add pre-generation step: run `draft search` with spec terms + +## Test Strategy + +### Indexing +- **Full index**: Create temp project with mixed files (Go, Markdown, YAML, + binary, `.gitignore`d files). Index. Verify correct files are in both + `fts` and `fts_trigram` tables, excluded files are not. +- **Incremental update**: Index, modify one file, re-index. Verify only + that file's rows changed in both FTS5 tables (check `indexed` timestamp). +- **Incremental delete**: Index, delete a file, re-index. Verify removed + from `files`, `fts`, and `fts_trigram` tables. +- **mtime-only change**: Index, touch a file (same content, new mtime), + re-index. Verify no FTS5 rows were re-inserted in either table. +- **git checkout edge case**: Index, simulate `git checkout` (content + changes, mtime may or may not update). Verify hash-based detection + catches the change. +- **Binary skip**: Include a binary file (e.g., .png), verify it's excluded. +- **Large file skip**: Include a file > 1 MB, verify it's excluded. +- **Forced rebuild**: Index, then `draft index --force`. Verify all + `indexed` timestamps are refreshed (full re-scan, not incremental). +- **Contentless trigram table**: Verify `fts_trigram` does not store + file content (SELECT content FROM fts_trigram returns NULL). + +### Search +- **FTS5 ranking**: Insert known documents with varying relevance. + Query with natural language, verify BM25 ordering matches expected order. +- **Path weighting**: Search for a term that appears in both a filename + and a file body. Verify the filename match ranks higher. +- **Trigram substring match**: Index files containing `AppCfgLoader` and + `UserConfigService`. Search for `CfgLoad`. Verify the first file is + found via `fts_trigram`, the second is not. +- **Trigram LIKE pattern**: Index files, search using + `WHERE content LIKE '%AuthMiddle%'` against `fts_trigram`. Verify + correct matches are returned. +- **Trigram minimum length**: Search for a 2-character string. Verify + it falls back gracefully (FTS5 trigram requires 3+ characters). +- **Query routing**: Verify `"error handling"` routes to `fts` only, + `CfgLoader` routes to `fts_trigram` only, and `AuthHandler` routes + to both. +- **Score merging**: Insert files that rank differently in `fts` vs + `fts_trigram`. Search with a query that hits both backends. Verify + merged ranking reflects the configured weights (0.6 / 0.4). +- **Snippet source**: Verify snippets come from the `fts` table (not + `fts_trigram`, which is contentless and can't produce snippets). + +### Index Management +- **Project root detection**: Create a temp directory with `.draft/` + marker at the root. Run `draft index` from a subdirectory. Verify the + index covers the full project tree, not just the subdirectory. +- **Project root fallback**: Run `draft index` in a directory with no + `.draft/` or `CLAUDE.md` marker. Verify the current directory is used + as root. +- **Deterministic hashing**: Run `draft index` twice on the same project. + Verify both runs use the same database file (same hash of project path). +- **Multi-project isolation**: Index two different projects. Verify each + gets its own database file. Search in project A returns no results + from project B. +- **Cache directory creation**: Remove the cache directory, run + `draft index`. Verify the directory is created automatically. +- **--db override**: Run `draft index --db /tmp/custom.db`. Verify the + index is written to that path, not the default cache location. +- **Status output**: Index a project, run `draft search --status`. Verify + it reports correct file count, project path, last-indexed time, and + database size. +- **Prune**: Index two projects. Delete one project's directory. Run + `draft index --prune`. Verify the orphaned index is deleted, the + other is kept. +- **Prune safety**: Index a project, unmount or rename the directory. + Run `draft index --prune`. Verify the index is deleted (path no longer + exists). Re-create the directory and re-index — a new database file + is created (same hash, fresh content). +- **Symlink resolution**: Create a project accessed via symlink. Verify + the index uses the resolved (real) path, so the same project accessed + via different symlinks shares one index. +- **Schema version match**: Open an index with matching schema_version. + Verify it proceeds normally without rebuild. +- **Schema version mismatch (upgrade)**: Create an index with an older + schema_version. Run `draft index`. Verify migration runs and version + is updated. +- **Schema version mismatch (downgrade)**: Create an index with a newer + schema_version. Run `draft index`. Verify it refuses to open and + suggests `--force`. + +### Command Integration +- **Spec integration**: Run `/spec`, verify index is updated. Check that + the new spec is findable via `draft search`. +- **Implement integration**: Create a spec with known terms, run + `/implement`, verify search results appear in the agent's context. + +## Open Questions + +1. **Tokenizer for code**: `porter unicode61` stems words in `fts`, which + is great for natural language but means `Handler` and `handling` + conflate. The `fts_trigram` table now covers exact substring matching, + so Porter stemming may be fine — but should we offer an `--exact` + flag that bypasses `fts` and goes trigram-only? + +2. **Implement search extraction**: How should key terms be extracted + from a spec for the pre-implementation search? Options: (a) use the + title as-is, (b) extract nouns from acceptance criteria with a simple + heuristic, (c) let the agent decide what to search for. + +3. **Index on other commands**: Should `/review` also trigger an index + update, or is spec/refine sufficient to keep the index fresh? What + about a `draft index` in the project's post-checkout git hook? + +4. **Score merge weights**: The default 0.6/0.4 FTS5/trigram split is a + starting guess. Should this be tunable via config, or is it better + to find the right default empirically and hardcode it? diff --git a/go.mod b/go.mod index 8462d42..195849c 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,17 @@ module github.com/heiko-braun/draft go 1.24.3 require ( + github.com/mattn/go-sqlite3 v1.14.34 + github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 github.com/spf13/cobra v1.10.2 github.com/yuin/goldmark v1.7.16 + github.com/zeebo/xxh3 v1.1.0 gopkg.in/yaml.v3 v3.0.1 ) require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/klauspost/cpuid/v2 v2.2.10 // indirect github.com/spf13/pflag v1.0.9 // indirect + golang.org/x/sys v0.30.0 // indirect ) diff --git a/go.sum b/go.sum index 73263f4..42d085e 100644 --- a/go.sum +++ b/go.sum @@ -1,15 +1,35 @@ github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= +github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk= +github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= +github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/yuin/goldmark v1.7.16 h1:n+CJdUxaFMiDUNnWC3dMWCIQJSkxH4uz3ZwQBkAlVNE= github.com/yuin/goldmark v1.7.16/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= +github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/cli/index.go b/internal/cli/index.go new file mode 100644 index 0000000..a9b7499 --- /dev/null +++ b/internal/cli/index.go @@ -0,0 +1,127 @@ +package cli + +import ( + "fmt" + "os" + "time" + + "github.com/heiko-braun/draft/internal/search" + "github.com/spf13/cobra" +) + +func newIndexCmd() *cobra.Command { + var ( + forceIndex bool + pruneFlag bool + listFlag bool + dbFlag string + ) + + cmd := &cobra.Command{ + Use: "index", + Short: "Build or update the project search index", + Long: `Walks the project tree and maintains an FTS5 search index for fast, ranked code search.`, + RunE: func(cmd *cobra.Command, args []string) error { + if listFlag { + return runIndexList() + } + if pruneFlag { + return runIndexPrune() + } + return runIndex(forceIndex, dbFlag) + }, + } + + cmd.Flags().BoolVar(&forceIndex, "force", false, "Drop and rebuild the index from scratch") + cmd.Flags().BoolVar(&pruneFlag, "prune", false, "Delete indexes for projects that no longer exist") + cmd.Flags().BoolVar(&listFlag, "list", false, "List all known indexes") + cmd.Flags().StringVar(&dbFlag, "db", "", "Override index database path") + + return cmd +} + +func runIndex(force bool, dbOverride string) error { + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("getwd: %w", err) + } + + root, err := search.DetectProjectRoot(cwd) + if err != nil { + return fmt.Errorf("detect project root: %w", err) + } + + dbPath, err := search.IndexPath(root, dbOverride) + if err != nil { + return fmt.Errorf("index path: %w", err) + } + + store, err := search.OpenStore(dbPath, root) + if err != nil { + return fmt.Errorf("open store: %w", err) + } + defer store.Close() + + start := time.Now() + result, err := search.Index(store, root, force) + if err != nil { + return fmt.Errorf("index: %w", err) + } + elapsed := time.Since(start) + + fmt.Printf("Indexed %s in %s\n", root, elapsed.Round(time.Millisecond)) + fmt.Printf(" files indexed: %d, unchanged: %d, deleted: %d, skipped: %d\n", + result.FilesIndexed, result.FilesUnchanged, result.FilesDeleted, result.FilesSkipped) + + return nil +} + +func runIndexList() error { + infos, err := search.ListIndexes() + if err != nil { + return err + } + + if len(infos) == 0 { + fmt.Println("No indexes found.") + return nil + } + + fmt.Printf("%-50s %6s %10s %s\n", "PROJECT", "FILES", "SIZE", "LAST INDEXED") + for _, info := range infos { + size := formatSize(info.SizeBytes) + fmt.Printf("%-50s %6d %10s %s\n", info.ProjectRoot, info.FileCount, size, info.LastIndexed) + } + + return nil +} + +func runIndexPrune() error { + pruned, err := search.PruneIndexes() + if err != nil { + return err + } + + if len(pruned) == 0 { + fmt.Println("No orphaned indexes found.") + return nil + } + + fmt.Printf("Pruned %d orphaned index(es):\n", len(pruned)) + for _, root := range pruned { + fmt.Printf(" - %s\n", root) + } + + return nil +} + +func formatSize(bytes int64) string { + switch { + case bytes >= 1<<20: + return fmt.Sprintf("%.1f MB", float64(bytes)/float64(1<<20)) + case bytes >= 1<<10: + return fmt.Sprintf("%.1f KB", float64(bytes)/float64(1<<10)) + default: + return fmt.Sprintf("%d B", bytes) + } +} diff --git a/internal/cli/root.go b/internal/cli/root.go index 2b74dcb..a06568b 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -25,6 +25,8 @@ func Execute(templates embed.FS, version string) error { rootCmd.AddCommand(newInitCmd()) rootCmd.AddCommand(newVersionCmd()) rootCmd.AddCommand(newPresentCmd()) + rootCmd.AddCommand(newIndexCmd()) + rootCmd.AddCommand(newSearchCmd()) return rootCmd.Execute() } diff --git a/internal/cli/search.go b/internal/cli/search.go new file mode 100644 index 0000000..e0b5a93 --- /dev/null +++ b/internal/cli/search.go @@ -0,0 +1,113 @@ +package cli + +import ( + "fmt" + "os" + "strings" + + "github.com/heiko-braun/draft/internal/search" + "github.com/spf13/cobra" +) + +func newSearchCmd() *cobra.Command { + var ( + limitFlag int + statusFlag bool + dbFlag string + ) + + cmd := &cobra.Command{ + Use: "search ", + Short: "Search the project index", + Long: `Search the FTS5 index for relevant files using natural language or substring queries.`, + RunE: func(cmd *cobra.Command, args []string) error { + if statusFlag { + return runSearchStatus(dbFlag) + } + if len(args) == 0 { + return fmt.Errorf("query required: draft search ") + } + query := strings.Join(args, " ") + return runSearch(query, limitFlag, dbFlag) + }, + } + + cmd.Flags().IntVar(&limitFlag, "limit", 20, "Maximum number of results") + cmd.Flags().BoolVar(&statusFlag, "status", false, "Show index status") + cmd.Flags().StringVar(&dbFlag, "db", "", "Override index database path") + + return cmd +} + +func runSearch(query string, limit int, dbOverride string) error { + cwd, err := os.Getwd() + if err != nil { + return err + } + + root, err := search.DetectProjectRoot(cwd) + if err != nil { + return err + } + + dbPath, err := search.IndexPath(root, dbOverride) + if err != nil { + return err + } + + store, err := search.OpenStore(dbPath, root) + if err != nil { + return fmt.Errorf("open index: %w (run 'draft index' first)", err) + } + defer store.Close() + + results, err := search.Search(store, query, limit) + if err != nil { + return err + } + + fmt.Print(search.FormatResults(results)) + return nil +} + +func runSearchStatus(dbOverride string) error { + cwd, err := os.Getwd() + if err != nil { + return err + } + + root, err := search.DetectProjectRoot(cwd) + if err != nil { + return err + } + + dbPath, err := search.IndexPath(root, dbOverride) + if err != nil { + return err + } + + store, err := search.OpenStore(dbPath, root) + if err != nil { + return fmt.Errorf("no index found (run 'draft index' first)") + } + defer store.Close() + + fileCount, _ := store.FileCount() + lastIndexed, _ := store.Meta("created_at") + + fi, err := os.Stat(dbPath) + var sizeStr string + if err == nil { + sizeStr = formatSize(fi.Size()) + } else { + sizeStr = "unknown" + } + + fmt.Printf("Index: %s\n", dbPath) + fmt.Printf("Project: %s\n", root) + fmt.Printf("Files indexed: %d\n", fileCount) + fmt.Printf("Last indexed: %s\n", lastIndexed) + fmt.Printf("Database size: %s\n", sizeStr) + + return nil +} diff --git a/internal/search/indexer.go b/internal/search/indexer.go new file mode 100644 index 0000000..bfe709d --- /dev/null +++ b/internal/search/indexer.go @@ -0,0 +1,352 @@ +package search + +import ( + "encoding/hex" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + ignore "github.com/sabhiram/go-gitignore" + "github.com/zeebo/xxh3" +) + +const ( + maxFileSize = 1 << 20 // 1 MB + binaryCheckSize = 8192 +) + +// hardcoded directories to always skip +var skipDirs = map[string]bool{ + ".git": true, + "node_modules": true, + ".hg": true, + ".svn": true, + "__pycache__": true, + ".idea": true, + ".vscode": true, +} + +// IndexResult reports what happened during an index run. +type IndexResult struct { + FilesIndexed int + FilesSkipped int + FilesDeleted int + FilesUnchanged int +} + +// Index walks the project tree and updates both FTS5 indexes incrementally. +// If force is true, the store is rebuilt from scratch first. +func Index(store *Store, projectRoot string, force bool) (*IndexResult, error) { + if force { + if err := store.ForceRebuild(); err != nil { + return nil, fmt.Errorf("force rebuild: %w", err) + } + } + + gi := loadGitignore(projectRoot) + + // Resolve the index db path to skip it during walk. + dbPath := "" + if dbPathVal, err := IndexPath(projectRoot, ""); err == nil { + dbPath = dbPathVal + } + + // Get all currently indexed paths so we can detect deletions. + existing, err := store.AllFilePaths() + if err != nil { + return nil, fmt.Errorf("load existing paths: %w", err) + } + seen := make(map[string]bool) + + result := &IndexResult{} + + tx, err := store.Begin() + if err != nil { + return nil, fmt.Errorf("begin tx: %w", err) + } + defer tx.Rollback() + + err = filepath.WalkDir(projectRoot, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return nil // skip unreadable entries + } + + // Relative path for matching and storage. + rel, err := filepath.Rel(projectRoot, path) + if err != nil { + return nil + } + + if d.IsDir() { + name := d.Name() + if skipDirs[name] { + return filepath.SkipDir + } + if gi != nil && gi.MatchesPath(rel+"/") { + return filepath.SkipDir + } + return nil + } + + // Skip the index database itself. + if dbPath != "" && path == dbPath { + return nil + } + + // Skip gitignored files. + if gi != nil && gi.MatchesPath(rel) { + return nil + } + + info, err := d.Info() + if err != nil { + return nil + } + + // Skip large files. + if info.Size() > maxFileSize { + result.FilesSkipped++ + return nil + } + + seen[rel] = true + + mtime := info.ModTime().Unix() + + // Check existing record. + existing, err := store.GetFile(rel) + if err != nil { + return fmt.Errorf("get file %s: %w", rel, err) + } + + // mtime fast path: unchanged → skip entirely. + if existing != nil && existing.Mtime == mtime { + result.FilesUnchanged++ + return nil + } + + // Read file content. + content, err := os.ReadFile(path) + if err != nil { + return nil // skip unreadable + } + + // Binary check. + if isBinary(content) { + result.FilesSkipped++ + return nil + } + + // Compute hash. + h := xxh3.Hash128(content) + hb := h.Bytes() + hashStr := hex.EncodeToString(hb[:]) + + // If hash unchanged (mtime-only change), just update mtime. + if existing != nil && existing.Hash == hashStr { + if err := store.UpdateMtime(tx, existing.ID, mtime); err != nil { + return fmt.Errorf("update mtime %s: %w", rel, err) + } + result.FilesUnchanged++ + return nil + } + + // Content changed (or new file): upsert file + FTS. + if existing != nil { + if err := store.DeleteFTS(tx, existing.ID); err != nil { + return fmt.Errorf("delete fts %s: %w", rel, err) + } + } + + id, err := store.UpsertFile(tx, rel, hashStr, mtime) + if err != nil { + return fmt.Errorf("upsert file %s: %w", rel, err) + } + + // For upsert (ON CONFLICT), LastInsertId may not return the existing id. + // Re-fetch if needed. + if existing != nil { + id = existing.ID + } + + if err := store.InsertFTS(tx, id, rel, string(content)); err != nil { + return fmt.Errorf("insert fts %s: %w", rel, err) + } + + result.FilesIndexed++ + return nil + }) + + if err != nil { + return nil, fmt.Errorf("walk: %w", err) + } + + // Delete files that no longer exist on disk. + for path, id := range existing { + if !seen[path] { + if err := store.DeleteFile(tx, id); err != nil { + return nil, fmt.Errorf("delete %s: %w", path, err) + } + result.FilesDeleted++ + } + } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("commit: %w", err) + } + + if err := store.Optimize(); err != nil { + return nil, fmt.Errorf("optimize: %w", err) + } + + return result, nil +} + +// loadGitignore loads .gitignore from the project root, if present. +func loadGitignore(root string) *ignore.GitIgnore { + path := filepath.Join(root, ".gitignore") + gi, err := ignore.CompileIgnoreFile(path) + if err != nil { + return nil + } + return gi +} + +// isBinary checks the first binaryCheckSize bytes for null bytes. +func isBinary(content []byte) bool { + check := content + if len(check) > binaryCheckSize { + check = check[:binaryCheckSize] + } + for _, b := range check { + if b == 0 { + return true + } + } + return false +} + +// PruneIndexes scans the cache directory and deletes indexes whose +// project_root no longer exists on disk. +func PruneIndexes() ([]string, error) { + cacheDir, err := cacheBaseDir() + if err != nil { + return nil, err + } + cacheDir = filepath.Join(cacheDir) // already includes "draft" + + entries, err := os.ReadDir(cacheDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + var pruned []string + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".db") { + continue + } + + dbPath := filepath.Join(cacheDir, e.Name()) + root, err := readProjectRoot(dbPath) + if err != nil { + continue // skip corrupt files + } + + if _, err := os.Stat(root); os.IsNotExist(err) { + os.Remove(dbPath) + // Also remove WAL and SHM files. + os.Remove(dbPath + "-wal") + os.Remove(dbPath + "-shm") + pruned = append(pruned, root) + } + } + + return pruned, nil +} + +// IndexInfo holds summary info about one index database. +type IndexInfo struct { + ProjectRoot string + FileCount int + SizeBytes int64 + LastIndexed string + DBPath string +} + +// ListIndexes returns info about all index databases in the cache directory. +func ListIndexes() ([]IndexInfo, error) { + cacheDir, err := cacheBaseDir() + if err != nil { + return nil, err + } + + entries, err := os.ReadDir(cacheDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + var infos []IndexInfo + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".db") { + continue + } + + dbPath := filepath.Join(cacheDir, e.Name()) + info, err := readIndexInfo(dbPath) + if err != nil { + continue + } + infos = append(infos, info) + } + + return infos, nil +} + +func readProjectRoot(dbPath string) (string, error) { + info, err := readIndexInfo(dbPath) + if err != nil { + return "", err + } + return info.ProjectRoot, nil +} + +func readIndexInfo(dbPath string) (IndexInfo, error) { + s, err := OpenStore(dbPath, "") + if err != nil { + return IndexInfo{}, err + } + defer s.Close() + + root, err := s.Meta("project_root") + if err != nil { + return IndexInfo{}, err + } + + count, err := s.FileCount() + if err != nil { + return IndexInfo{}, err + } + + lastIndexed, _ := s.Meta("created_at") + + fi, err := os.Stat(dbPath) + if err != nil { + return IndexInfo{}, err + } + + return IndexInfo{ + ProjectRoot: root, + FileCount: count, + SizeBytes: fi.Size(), + LastIndexed: lastIndexed, + DBPath: dbPath, + }, nil +} diff --git a/internal/search/indexer_test.go b/internal/search/indexer_test.go new file mode 100644 index 0000000..611d8b0 --- /dev/null +++ b/internal/search/indexer_test.go @@ -0,0 +1,349 @@ +package search + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +func setupProject(t *testing.T) (string, string) { + t.Helper() + root := t.TempDir() + dbPath := filepath.Join(t.TempDir(), "test.db") + + // Create project files. + writeFile(t, root, "main.go", "package main\nfunc main() {}\n") + writeFile(t, root, "README.md", "# Test Project\nThis is a test.\n") + writeFile(t, root, "config.yaml", "key: value\n") + + return root, dbPath +} + +func writeFile(t *testing.T, root, rel, content string) { + t.Helper() + path := filepath.Join(root, rel) + os.MkdirAll(filepath.Dir(path), 0755) + if err := os.WriteFile(path, []byte(content), 0644); err != nil { + t.Fatal(err) + } +} + +func TestIndex_FullIndex(t *testing.T) { + root, dbPath := setupProject(t) + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + result, err := Index(s, root, false) + if err != nil { + t.Fatal(err) + } + + if result.FilesIndexed != 3 { + t.Errorf("FilesIndexed = %d, want 3", result.FilesIndexed) + } + + count, _ := s.FileCount() + if count != 3 { + t.Errorf("FileCount = %d, want 3", count) + } +} + +func TestIndex_GitignoreRespected(t *testing.T) { + root, dbPath := setupProject(t) + + // Add .gitignore and ignored file. + writeFile(t, root, ".gitignore", "ignored/\n*.log\n") + writeFile(t, root, "ignored/secret.go", "package secret\n") + writeFile(t, root, "debug.log", "some log\n") + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + Index(s, root, false) + + // ignored/secret.go and debug.log should not be indexed. + f1, _ := s.GetFile("ignored/secret.go") + if f1 != nil { + t.Error("gitignored directory file should not be indexed") + } + f2, _ := s.GetFile("debug.log") + if f2 != nil { + t.Error("gitignored file should not be indexed") + } + + // .gitignore itself should be indexed. + f3, _ := s.GetFile(".gitignore") + if f3 == nil { + t.Error(".gitignore should be indexed") + } +} + +func TestIndex_BinarySkipped(t *testing.T) { + root, dbPath := setupProject(t) + + // Write a binary file (contains null bytes). + binContent := []byte("header\x00\x00\x00binary data") + os.WriteFile(filepath.Join(root, "image.png"), binContent, 0644) + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + result, err := Index(s, root, false) + if err != nil { + t.Fatal(err) + } + + f, _ := s.GetFile("image.png") + if f != nil { + t.Error("binary file should not be indexed") + } + if result.FilesSkipped < 1 { + t.Error("expected at least 1 skipped file") + } +} + +func TestIndex_LargeFileSkipped(t *testing.T) { + root, dbPath := setupProject(t) + + // Write a file > 1MB. + large := make([]byte, maxFileSize+1) + for i := range large { + large[i] = 'a' + } + os.WriteFile(filepath.Join(root, "huge.txt"), large, 0644) + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + result, err := Index(s, root, false) + if err != nil { + t.Fatal(err) + } + + f, _ := s.GetFile("huge.txt") + if f != nil { + t.Error("large file should not be indexed") + } + if result.FilesSkipped < 1 { + t.Error("expected at least 1 skipped file") + } +} + +func TestIndex_IncrementalUpdate(t *testing.T) { + root, dbPath := setupProject(t) + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + // First index. + Index(s, root, false) + + // Record indexed timestamp for main.go. + f1, _ := s.GetFile("main.go") + origIndexed := f1.Indexed + + // Modify main.go and set a future mtime to ensure mtime differs. + writeFile(t, root, "main.go", "package main\nfunc main() { fmt.Println(\"updated\") }\n") + future := time.Now().Add(2 * time.Hour) + os.Chtimes(filepath.Join(root, "main.go"), future, future) + + // Re-index. + result, err := Index(s, root, false) + if err != nil { + t.Fatal(err) + } + + if result.FilesIndexed != 1 { + t.Errorf("FilesIndexed = %d, want 1", result.FilesIndexed) + } + + // main.go should have a newer indexed timestamp. + f2, _ := s.GetFile("main.go") + if f2.Indexed < origIndexed { + t.Errorf("indexed timestamp should have been updated: got %d, orig %d", f2.Indexed, origIndexed) + } + // Hash should have changed. + if f2.Hash == f1.Hash { + t.Error("hash should have changed after content update") + } +} + +func TestIndex_IncrementalDelete(t *testing.T) { + root, dbPath := setupProject(t) + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + Index(s, root, false) + + // Delete README.md. + os.Remove(filepath.Join(root, "README.md")) + + result, err := Index(s, root, false) + if err != nil { + t.Fatal(err) + } + + if result.FilesDeleted != 1 { + t.Errorf("FilesDeleted = %d, want 1", result.FilesDeleted) + } + + f, _ := s.GetFile("README.md") + if f != nil { + t.Error("deleted file should not be in index") + } + + count, _ := s.FileCount() + if count != 2 { + t.Errorf("FileCount = %d, want 2", count) + } +} + +func TestIndex_MtimeOnlyNoReindex(t *testing.T) { + root, dbPath := setupProject(t) + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + Index(s, root, false) + + // Touch main.go (change mtime but not content). + now := time.Now().Add(time.Hour) + os.Chtimes(filepath.Join(root, "main.go"), now, now) + + result, err := Index(s, root, false) + if err != nil { + t.Fatal(err) + } + + // Should be counted as unchanged (mtime updated, no FTS re-insert). + if result.FilesIndexed != 0 { + t.Errorf("FilesIndexed = %d, want 0 (mtime-only change)", result.FilesIndexed) + } +} + +func TestIndex_ForceRebuild(t *testing.T) { + root, dbPath := setupProject(t) + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + Index(s, root, false) + time.Sleep(10 * time.Millisecond) + + // Force rebuild. + result, err := Index(s, root, true) + if err != nil { + t.Fatal(err) + } + + // All files should be re-indexed. + if result.FilesIndexed != 3 { + t.Errorf("FilesIndexed = %d, want 3 after force rebuild", result.FilesIndexed) + } +} + +func TestIndex_SkipsDotGit(t *testing.T) { + root, dbPath := setupProject(t) + + // Create .git directory with files. + writeFile(t, root, ".git/config", "[core]\nbare = false\n") + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + defer s.Close() + + Index(s, root, false) + + f, _ := s.GetFile(".git/config") + if f != nil { + t.Error(".git directory should be skipped") + } +} + +func TestPruneIndexes(t *testing.T) { + // Create two project directories. + proj1 := t.TempDir() + proj2 := t.TempDir() + + cacheDir := t.TempDir() + db1 := filepath.Join(cacheDir, "proj1.db") + db2 := filepath.Join(cacheDir, "proj2.db") + + // Index both. + s1, _ := OpenStore(db1, proj1) + writeFile(t, proj1, "a.go", "package a") + Index(s1, proj1, false) + s1.Close() + + s2, _ := OpenStore(db2, proj2) + writeFile(t, proj2, "b.go", "package b") + Index(s2, proj2, false) + s2.Close() + + // Delete proj2's directory. + os.RemoveAll(proj2) + + // PruneIndexes works on the default cache dir, so we test readProjectRoot directly. + root, err := readProjectRoot(db2) + if err != nil { + t.Fatal(err) + } + if root != proj2 { + t.Errorf("got %q, want %q", root, proj2) + } +} + +func TestListIndexes(t *testing.T) { + proj := t.TempDir() + cacheDir := t.TempDir() + db := filepath.Join(cacheDir, "test.db") + + s, _ := OpenStore(db, proj) + writeFile(t, proj, "main.go", "package main") + Index(s, proj, false) + s.Close() + + info, err := readIndexInfo(db) + if err != nil { + t.Fatal(err) + } + + if info.ProjectRoot != proj { + t.Errorf("ProjectRoot = %q, want %q", info.ProjectRoot, proj) + } + if info.FileCount != 1 { + t.Errorf("FileCount = %d, want 1", info.FileCount) + } + if info.SizeBytes == 0 { + t.Error("SizeBytes should be > 0") + } +} diff --git a/internal/search/project.go b/internal/search/project.go new file mode 100644 index 0000000..cf17ba7 --- /dev/null +++ b/internal/search/project.go @@ -0,0 +1,107 @@ +package search + +import ( + "encoding/hex" + "fmt" + "os" + "path/filepath" + "runtime" + + "github.com/zeebo/xxh3" +) + +// DetectProjectRoot walks upward from cwd looking for a .draft/ directory +// or a CLAUDE.md file. Returns the first directory containing either marker, +// or cwd itself if no marker is found. +func DetectProjectRoot(cwd string) (string, error) { + abs, err := filepath.Abs(cwd) + if err != nil { + return "", fmt.Errorf("resolve path: %w", err) + } + + resolved, err := filepath.EvalSymlinks(abs) + if err != nil { + return "", fmt.Errorf("resolve symlinks: %w", err) + } + + dir := resolved + for { + if hasMarker(dir) { + return dir, nil + } + parent := filepath.Dir(dir) + if parent == dir { + // Reached filesystem root without finding a marker. + return resolved, nil + } + dir = parent + } +} + +func hasMarker(dir string) bool { + for _, marker := range []string{".draft", "CLAUDE.md"} { + info, err := os.Stat(filepath.Join(dir, marker)) + if err != nil { + continue + } + if marker == ".draft" && info.IsDir() { + return true + } + if marker == "CLAUDE.md" && !info.IsDir() { + return true + } + } + return false +} + +// IndexPath returns the path to the SQLite database for the given project root. +// If overridePath is non-empty, it is returned directly (supports --db flag). +func IndexPath(projectRoot, overridePath string) (string, error) { + if overridePath != "" { + return overridePath, nil + } + + resolved, err := filepath.EvalSymlinks(projectRoot) + if err != nil { + return "", fmt.Errorf("resolve symlinks: %w", err) + } + + abs, err := filepath.Abs(resolved) + if err != nil { + return "", fmt.Errorf("resolve path: %w", err) + } + + cacheDir, err := cacheBaseDir() + if err != nil { + return "", err + } + + hash := xxh3.HashString128(abs) + b := hash.Bytes() + name := hex.EncodeToString(b[:]) + return filepath.Join(cacheDir, name+".db"), nil +} + +// cacheBaseDir returns the platform-appropriate cache directory for draft indexes. +func cacheBaseDir() (string, error) { + var base string + switch runtime.GOOS { + case "darwin": + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("home dir: %w", err) + } + base = filepath.Join(home, "Library", "Caches") + default: // linux and others + if xdg := os.Getenv("XDG_CACHE_HOME"); xdg != "" { + base = xdg + } else { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("home dir: %w", err) + } + base = filepath.Join(home, ".cache") + } + } + return filepath.Join(base, "draft"), nil +} diff --git a/internal/search/project_test.go b/internal/search/project_test.go new file mode 100644 index 0000000..dd046a9 --- /dev/null +++ b/internal/search/project_test.go @@ -0,0 +1,118 @@ +package search + +import ( + "os" + "path/filepath" + "testing" +) + +func TestDetectProjectRoot_WithDraftMarker(t *testing.T) { + root := t.TempDir() + sub := filepath.Join(root, "a", "b", "c") + if err := os.MkdirAll(sub, 0755); err != nil { + t.Fatal(err) + } + if err := os.Mkdir(filepath.Join(root, ".draft"), 0755); err != nil { + t.Fatal(err) + } + + got, err := DetectProjectRoot(sub) + if err != nil { + t.Fatal(err) + } + + // Resolve symlinks on root for comparison (macOS /tmp → /private/tmp). + want, _ := filepath.EvalSymlinks(root) + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestDetectProjectRoot_WithClaudeMD(t *testing.T) { + root := t.TempDir() + sub := filepath.Join(root, "src") + if err := os.MkdirAll(sub, 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(root, "CLAUDE.md"), []byte("hi"), 0644); err != nil { + t.Fatal(err) + } + + got, err := DetectProjectRoot(sub) + if err != nil { + t.Fatal(err) + } + + want, _ := filepath.EvalSymlinks(root) + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestDetectProjectRoot_FallbackToCwd(t *testing.T) { + dir := t.TempDir() + + got, err := DetectProjectRoot(dir) + if err != nil { + t.Fatal(err) + } + + want, _ := filepath.EvalSymlinks(dir) + if got != want { + t.Errorf("got %q, want %q", got, want) + } +} + +func TestIndexPath_Deterministic(t *testing.T) { + dir := t.TempDir() + + p1, err := IndexPath(dir, "") + if err != nil { + t.Fatal(err) + } + p2, err := IndexPath(dir, "") + if err != nil { + t.Fatal(err) + } + + if p1 != p2 { + t.Errorf("not deterministic: %q vs %q", p1, p2) + } + if filepath.Ext(p1) != ".db" { + t.Errorf("expected .db extension, got %q", p1) + } +} + +func TestIndexPath_Override(t *testing.T) { + override := "/tmp/test-custom.db" + got, err := IndexPath("/some/project", override) + if err != nil { + t.Fatal(err) + } + if got != override { + t.Errorf("got %q, want %q", got, override) + } +} + +func TestIndexPath_SymlinkResolution(t *testing.T) { + real := t.TempDir() + parent := t.TempDir() + link := filepath.Join(parent, "link") + + if err := os.Symlink(real, link); err != nil { + t.Skip("symlinks not supported") + } + + p1, err := IndexPath(real, "") + if err != nil { + t.Fatal(err) + } + p2, err := IndexPath(link, "") + if err != nil { + t.Fatal(err) + } + + if p1 != p2 { + t.Errorf("symlink produced different path: %q vs %q", p1, p2) + } +} diff --git a/internal/search/searcher.go b/internal/search/searcher.go new file mode 100644 index 0000000..1ba2ec2 --- /dev/null +++ b/internal/search/searcher.go @@ -0,0 +1,271 @@ +package search + +import ( + "fmt" + "math" + "regexp" + "sort" + "strings" +) + +const ( + weightFTS = 0.6 + weightTrigram = 0.4 + defaultLimit = 20 +) + +// QueryType classifies a search query. +type QueryType int + +const ( + QueryNaturalLanguage QueryType = iota + QuerySubstring + QueryMixed +) + +// SearchResult represents a single search hit. +type SearchResult struct { + Path string + Score float64 + Snippet string +} + +// Search runs a query against the index and returns ranked results. +func Search(store *Store, query string, limit int) ([]SearchResult, error) { + if limit <= 0 { + limit = defaultLimit + } + + qt := ClassifyQuery(query) + + var ftsResults, triResults []SearchResult + var err error + + switch qt { + case QueryNaturalLanguage: + ftsResults, err = searchFTS(store, query, limit) + if err != nil { + return nil, err + } + return ftsResults, nil + + case QuerySubstring: + if len(query) < 3 { + // Trigram requires at least 3 chars. + return nil, nil + } + triResults, err = searchTrigram(store, query, limit) + if err != nil { + return nil, err + } + return triResults, nil + + case QueryMixed: + ftsResults, err = searchFTS(store, query, limit) + if err != nil { + return nil, err + } + if len(query) >= 3 { + triResults, err = searchTrigram(store, query, limit) + if err != nil { + return nil, err + } + } + return mergeResults(ftsResults, triResults, limit), nil + } + + return nil, nil +} + +// ClassifyQuery determines how to route a query. +func ClassifyQuery(query string) QueryType { + hasSpaces := strings.Contains(query, " ") + + // camelCase or PascalCase: uppercase letter mid-word + camelCase := regexp.MustCompile(`[a-z][A-Z]`) + isCamel := camelCase.MatchString(query) + + // snake_case + isSnake := strings.Contains(query, "_") && !hasSpaces + + if hasSpaces && !isCamel && !isSnake { + return QueryNaturalLanguage + } + if !hasSpaces && (isCamel || isSnake) { + return QuerySubstring + } + if hasSpaces { + return QueryMixed + } + // Single word without special patterns — could be either. + // Route to both to maximize recall. + return QueryMixed +} + +func searchFTS(store *Store, query string, limit int) ([]SearchResult, error) { + // Escape special FTS5 characters in query. + escaped := escapeFTS5(query) + + rows, err := store.DB().Query(` + SELECT f.path, + snippet(fts, 1, '»', '«', '…', 32) as snippet, + bm25(fts, 5.0, 1.0) as score + FROM fts + JOIN files f ON f.id = fts.rowid + WHERE fts MATCH ? + ORDER BY score + LIMIT ? + `, escaped, limit) + if err != nil { + return nil, fmt.Errorf("fts query: %w", err) + } + defer rows.Close() + + var results []SearchResult + for rows.Next() { + var r SearchResult + if err := rows.Scan(&r.Path, &r.Snippet, &r.Score); err != nil { + return nil, err + } + // BM25 returns negative scores (lower = better). Negate for consistent sorting. + r.Score = -r.Score + results = append(results, r) + } + return results, rows.Err() +} + +func searchTrigram(store *Store, query string, limit int) ([]SearchResult, error) { + rows, err := store.DB().Query(` + SELECT f.path, + bm25(fts_trigram, 5.0, 1.0) as score + FROM fts_trigram + JOIN files f ON f.id = fts_trigram.rowid + WHERE fts_trigram MATCH ? + ORDER BY score + LIMIT ? + `, query, limit) + if err != nil { + return nil, fmt.Errorf("trigram query: %w", err) + } + defer rows.Close() + + var results []SearchResult + for rows.Next() { + var r SearchResult + if err := rows.Scan(&r.Path, &r.Score); err != nil { + return nil, err + } + r.Score = -r.Score + results = append(results, r) + } + return results, rows.Err() +} + +func mergeResults(fts, tri []SearchResult, limit int) []SearchResult { + normScores(fts) + normScores(tri) + + merged := make(map[string]*SearchResult) + + for i := range fts { + r := fts[i] + merged[r.Path] = &SearchResult{ + Path: r.Path, + Score: weightFTS * r.Score, + Snippet: r.Snippet, + } + } + + for i := range tri { + r := tri[i] + if existing, ok := merged[r.Path]; ok { + existing.Score += weightTrigram * r.Score + } else { + merged[r.Path] = &SearchResult{ + Path: r.Path, + Score: weightTrigram * r.Score, + } + } + } + + results := make([]SearchResult, 0, len(merged)) + for _, r := range merged { + results = append(results, *r) + } + + sort.Slice(results, func(i, j int) bool { + return results[i].Score > results[j].Score + }) + + if len(results) > limit { + results = results[:limit] + } + + return results +} + +func normScores(results []SearchResult) { + if len(results) == 0 { + return + } + + min, max := math.Inf(1), math.Inf(-1) + for _, r := range results { + if r.Score < min { + min = r.Score + } + if r.Score > max { + max = r.Score + } + } + + spread := max - min + if spread == 0 { + for i := range results { + results[i].Score = 1.0 + } + return + } + + for i := range results { + results[i].Score = (results[i].Score - min) / spread + } +} + +// escapeFTS5 wraps each token in double quotes to avoid FTS5 syntax errors +// from special characters like -, *, etc. +func escapeFTS5(query string) string { + tokens := strings.Fields(query) + for i, t := range tokens { + // Wrap in double quotes, escaping any internal double quotes. + t = strings.ReplaceAll(t, `"`, `""`) + tokens[i] = `"` + t + `"` + } + return strings.Join(tokens, " ") +} + +// FormatResults formats search results for CLI output. +func FormatResults(results []SearchResult) string { + if len(results) == 0 { + return "No results found.\n" + } + + var sb strings.Builder + for _, r := range results { + fmt.Fprintf(&sb, "%s (score: %.2f)\n", r.Path, r.Score) + if r.Snippet != "" { + fmt.Fprintf(&sb, " %s\n", r.Snippet) + } + sb.WriteString("\n") + } + return sb.String() +} + +// StatusInfo holds information for the --status flag. +type StatusInfo struct { + DBPath string + ProjectRoot string + FileCount int + LastIndexed string + SizeBytes int64 +} diff --git a/internal/search/searcher_test.go b/internal/search/searcher_test.go new file mode 100644 index 0000000..44405b5 --- /dev/null +++ b/internal/search/searcher_test.go @@ -0,0 +1,214 @@ +package search + +import ( + "path/filepath" + "testing" +) + +func indexedStore(t *testing.T, files map[string]string) *Store { + t.Helper() + root := t.TempDir() + dbPath := filepath.Join(t.TempDir(), "test.db") + + for rel, content := range files { + writeFile(t, root, rel, content) + } + + s, err := OpenStore(dbPath, root) + if err != nil { + t.Fatal(err) + } + + if _, err := Index(s, root, false); err != nil { + t.Fatal(err) + } + + t.Cleanup(func() { s.Close() }) + return s +} + +func TestSearch_FTSRanking(t *testing.T) { + s := indexedStore(t, map[string]string{ + "auth.go": "package auth\n// authentication middleware handles token validation\nfunc AuthMiddleware() {}\n", + "readme.md": "# Project\nThis project has nothing to do with authentication.\n", + "handler.go": "package handler\n// generic handler for HTTP requests\nfunc Handle() {}\n", + }) + + results, err := Search(s, "authentication middleware", 10) + if err != nil { + t.Fatal(err) + } + + if len(results) == 0 { + t.Fatal("expected results") + } + + // auth.go should rank first (has both terms). + if results[0].Path != "auth.go" { + t.Errorf("expected auth.go first, got %s", results[0].Path) + } +} + +func TestSearch_PathWeighting(t *testing.T) { + s := indexedStore(t, map[string]string{ + "auth_middleware.go": "package auth\nfunc Run() {}\n", + "utils.go": "package utils\n// auth_middleware is referenced here\nfunc Helper() {}\n", + }) + + results, err := Search(s, "auth middleware", 10) + if err != nil { + t.Fatal(err) + } + + if len(results) == 0 { + t.Fatal("expected results") + } + + // File with matching path should rank higher. + if results[0].Path != "auth_middleware.go" { + t.Errorf("expected auth_middleware.go first, got %s", results[0].Path) + } +} + +func TestSearch_TrigramSubstring(t *testing.T) { + s := indexedStore(t, map[string]string{ + "config.go": "package config\ntype AppCfgLoader struct{}\n", + "service.go": "package service\ntype UserConfigService struct{}\n", + }) + + results, err := Search(s, "CfgLoad", 10) + if err != nil { + t.Fatal(err) + } + + // Should find config.go (contains AppCfgLoader). + found := false + for _, r := range results { + if r.Path == "config.go" { + found = true + } + } + if !found { + t.Error("expected config.go in results for CfgLoad") + } +} + +func TestSearch_TrigramMinLength(t *testing.T) { + s := indexedStore(t, map[string]string{ + "a.go": "package a\nfunc AB() {}\n", + }) + + // 2-char query should not crash. + results, err := Search(s, "AB", 10) + if err != nil { + t.Fatalf("expected no error for short query, got: %v", err) + } + + // May return nil or empty — that's fine. + _ = results +} + +func TestSearch_QueryRouting(t *testing.T) { + tests := []struct { + query string + expected QueryType + }{ + {"error handling", QueryNaturalLanguage}, + {"CfgLoader", QuerySubstring}, + {"snake_case", QuerySubstring}, + {"AuthHandler middleware", QueryMixed}, + {"search", QueryMixed}, // single word → mixed + } + + for _, tt := range tests { + got := ClassifyQuery(tt.query) + if got != tt.expected { + t.Errorf("ClassifyQuery(%q) = %d, want %d", tt.query, got, tt.expected) + } + } +} + +func TestSearch_ScoreMerging(t *testing.T) { + fts := []SearchResult{ + {Path: "a.go", Score: 10}, + {Path: "b.go", Score: 5}, + } + tri := []SearchResult{ + {Path: "b.go", Score: 10}, + {Path: "c.go", Score: 5}, + } + + merged := mergeResults(fts, tri, 10) + + // All three files should be present. + if len(merged) != 3 { + t.Fatalf("expected 3 merged results, got %d", len(merged)) + } + + // b.go should have contributions from both backends. + var bScore float64 + for _, r := range merged { + if r.Path == "b.go" { + bScore = r.Score + } + } + if bScore == 0 { + t.Error("b.go should have a non-zero merged score") + } +} + +func TestSearch_SnippetFromFTS(t *testing.T) { + s := indexedStore(t, map[string]string{ + "main.go": "package main\nimport \"fmt\"\nfunc main() {\n\tfmt.Println(\"hello world\")\n}\n", + }) + + results, err := Search(s, "hello world", 10) + if err != nil { + t.Fatal(err) + } + + if len(results) == 0 { + t.Fatal("expected results") + } + + // Snippet should contain the markers. + if results[0].Snippet == "" { + t.Error("expected non-empty snippet from FTS") + } +} + +func TestSearch_Limit(t *testing.T) { + files := map[string]string{} + for i := 0; i < 10; i++ { + name := filepath.Join("pkg", string(rune('a'+i))+".go") + files[name] = "package pkg\nfunc Handler() {}\n" + } + + s := indexedStore(t, files) + + results, err := Search(s, "Handler", 3) + if err != nil { + t.Fatal(err) + } + + if len(results) > 3 { + t.Errorf("expected at most 3 results, got %d", len(results)) + } +} + +func TestFormatResults_Empty(t *testing.T) { + out := FormatResults(nil) + if out != "No results found.\n" { + t.Errorf("unexpected output for empty results: %q", out) + } +} + +func TestFormatResults_WithResults(t *testing.T) { + results := []SearchResult{ + {Path: "a.go", Score: 0.87, Snippet: "…the »test« content…"}, + } + out := FormatResults(results) + if out == "" { + t.Error("expected non-empty formatted output") + } +} diff --git a/internal/search/store.go b/internal/search/store.go new file mode 100644 index 0000000..26c8b87 --- /dev/null +++ b/internal/search/store.go @@ -0,0 +1,318 @@ +package search + +import ( + "database/sql" + "fmt" + "os" + "path/filepath" + "time" + + _ "github.com/mattn/go-sqlite3" +) + +const SchemaVersion = "1" + +// Store wraps a SQLite database with dual FTS5 indexes. +type Store struct { + db *sql.DB + projectRoot string +} + +// OpenStore opens (or creates) the search index database at dbPath. +// projectRoot is recorded in index_meta for diagnostics and prune. +func OpenStore(dbPath, projectRoot string) (*Store, error) { + if err := os.MkdirAll(filepath.Dir(dbPath), 0755); err != nil { + return nil, fmt.Errorf("create cache dir: %w", err) + } + + db, err := sql.Open("sqlite3", dbPath+"?_journal=WAL&_fk=1") + if err != nil { + return nil, fmt.Errorf("open db: %w", err) + } + + s := &Store{db: db, projectRoot: projectRoot} + + if err := s.ensureSchema(); err != nil { + db.Close() + return nil, err + } + + return s, nil +} + +// Close closes the underlying database connection. +func (s *Store) Close() error { + return s.db.Close() +} + +// DB exposes the underlying *sql.DB for use by indexer and searcher. +func (s *Store) DB() *sql.DB { + return s.db +} + +// ProjectRoot returns the project root stored in this store. +func (s *Store) ProjectRoot() string { + return s.projectRoot +} + +// ForceRebuild drops all tables and recreates the schema. +func (s *Store) ForceRebuild() error { + tables := []string{"fts_trigram", "fts", "files", "index_meta"} + for _, t := range tables { + if _, err := s.db.Exec("DROP TABLE IF EXISTS " + t); err != nil { + return fmt.Errorf("drop %s: %w", t, err) + } + } + return s.createSchema() +} + +func (s *Store) ensureSchema() error { + // Check if index_meta exists to determine if this is a fresh db. + var count int + err := s.db.QueryRow("SELECT count(*) FROM sqlite_master WHERE type='table' AND name='index_meta'").Scan(&count) + if err != nil { + return fmt.Errorf("check schema: %w", err) + } + + if count == 0 { + return s.createSchema() + } + + return s.checkVersion() +} + +func (s *Store) checkVersion() error { + var version string + err := s.db.QueryRow("SELECT value FROM index_meta WHERE key='schema_version'").Scan(&version) + if err != nil { + // Missing or corrupt — recreate. + return s.ForceRebuild() + } + + if version == SchemaVersion { + return nil + } + + // Older version: migrate (for now, rebuild). + if version < SchemaVersion { + return s.ForceRebuild() + } + + // Newer version: refuse. + return fmt.Errorf("index schema version %s is newer than supported %s; run 'draft index --force' to rebuild", version, SchemaVersion) +} + +func (s *Store) createSchema() error { + stmts := []string{ + `CREATE TABLE IF NOT EXISTS index_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + )`, + `CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY, + path TEXT UNIQUE NOT NULL, + hash TEXT NOT NULL, + mtime INTEGER NOT NULL, + indexed INTEGER NOT NULL + )`, + `CREATE VIRTUAL TABLE IF NOT EXISTS fts USING fts5( + path, + content, + content_rowid, + tokenize = 'porter unicode61' + )`, + `CREATE VIRTUAL TABLE IF NOT EXISTS fts_trigram USING fts5( + path, + content, + content = '', + content_rowid = id, + tokenize = 'trigram' + )`, + } + + tx, err := s.db.Begin() + if err != nil { + return fmt.Errorf("begin tx: %w", err) + } + defer tx.Rollback() + + for _, stmt := range stmts { + if _, err := tx.Exec(stmt); err != nil { + return fmt.Errorf("exec schema DDL: %w", err) + } + } + + // Populate index_meta. + now := time.Now().UTC().Format(time.RFC3339) + meta := map[string]string{ + "project_root": s.projectRoot, + "created_at": now, + "schema_version": SchemaVersion, + } + for k, v := range meta { + if _, err := tx.Exec("INSERT OR REPLACE INTO index_meta (key, value) VALUES (?, ?)", k, v); err != nil { + return fmt.Errorf("insert meta %s: %w", k, err) + } + } + + return tx.Commit() +} + +// --- Files table CRUD --- + +// FileRow represents a row in the files table. +type FileRow struct { + ID int64 + Path string + Hash string + Mtime int64 + Indexed int64 +} + +// GetFile returns the file row for the given path, or nil if not found. +func (s *Store) GetFile(path string) (*FileRow, error) { + row := s.db.QueryRow("SELECT id, path, hash, mtime, indexed FROM files WHERE path = ?", path) + f := &FileRow{} + err := row.Scan(&f.ID, &f.Path, &f.Hash, &f.Mtime, &f.Indexed) + if err == sql.ErrNoRows { + return nil, nil + } + if err != nil { + return nil, err + } + return f, nil +} + +// UpsertFile inserts or updates a file row and returns its ID. +func (s *Store) UpsertFile(tx *sql.Tx, path, hash string, mtime int64) (int64, error) { + now := time.Now().Unix() + res, err := tx.Exec( + `INSERT INTO files (path, hash, mtime, indexed) VALUES (?, ?, ?, ?) + ON CONFLICT(path) DO UPDATE SET hash=excluded.hash, mtime=excluded.mtime, indexed=excluded.indexed`, + path, hash, mtime, now, + ) + if err != nil { + return 0, err + } + return res.LastInsertId() +} + +// UpdateMtime updates only the mtime for a file (content unchanged). +func (s *Store) UpdateMtime(tx *sql.Tx, id, mtime int64) error { + _, err := tx.Exec("UPDATE files SET mtime = ? WHERE id = ?", mtime, id) + return err +} + +// AllFilePaths returns all indexed file paths. +func (s *Store) AllFilePaths() (map[string]int64, error) { + rows, err := s.db.Query("SELECT path, id FROM files") + if err != nil { + return nil, err + } + defer rows.Close() + + paths := make(map[string]int64) + for rows.Next() { + var path string + var id int64 + if err := rows.Scan(&path, &id); err != nil { + return nil, err + } + paths[path] = id + } + return paths, rows.Err() +} + +// DeleteFile removes a file and its FTS entries. +// For contentless fts_trigram, we must supply the original content for deletion. +func (s *Store) DeleteFile(tx *sql.Tx, id int64) error { + // Get the original content from the fts table before deleting. + var path, content string + err := tx.QueryRow("SELECT path, content FROM fts WHERE rowid = ?", id).Scan(&path, &content) + if err != nil && err != sql.ErrNoRows { + return fmt.Errorf("read fts for delete: %w", err) + } + + if _, err := tx.Exec("DELETE FROM fts WHERE rowid = ?", id); err != nil { + return err + } + // Contentless FTS5 table requires special delete command with original values. + if path != "" { + if _, err := tx.Exec( + "INSERT INTO fts_trigram(fts_trigram, rowid, path, content) VALUES('delete', ?, ?, ?)", + id, path, content, + ); err != nil { + return err + } + } + if _, err := tx.Exec("DELETE FROM files WHERE id = ?", id); err != nil { + return err + } + return nil +} + +// --- FTS operations --- + +// InsertFTS inserts content into both FTS5 tables for the given file ID. +func (s *Store) InsertFTS(tx *sql.Tx, id int64, path, content string) error { + if _, err := tx.Exec("INSERT INTO fts (rowid, path, content) VALUES (?, ?, ?)", id, path, content); err != nil { + return fmt.Errorf("insert fts: %w", err) + } + if _, err := tx.Exec("INSERT INTO fts_trigram (rowid, path, content) VALUES (?, ?, ?)", id, path, content); err != nil { + return fmt.Errorf("insert fts_trigram: %w", err) + } + return nil +} + +// DeleteFTS removes FTS entries for the given file ID from both tables. +// Reads original content from fts to supply to contentless fts_trigram delete. +func (s *Store) DeleteFTS(tx *sql.Tx, id int64) error { + var path, content string + err := tx.QueryRow("SELECT path, content FROM fts WHERE rowid = ?", id).Scan(&path, &content) + if err != nil && err != sql.ErrNoRows { + return fmt.Errorf("read fts for delete: %w", err) + } + + if _, err := tx.Exec("DELETE FROM fts WHERE rowid = ?", id); err != nil { + return err + } + if path != "" { + if _, err := tx.Exec( + "INSERT INTO fts_trigram(fts_trigram, rowid, path, content) VALUES('delete', ?, ?, ?)", + id, path, content, + ); err != nil { + return err + } + } + return nil +} + +// --- Meta queries --- + +// FileCount returns the number of indexed files. +func (s *Store) FileCount() (int, error) { + var count int + err := s.db.QueryRow("SELECT count(*) FROM files").Scan(&count) + return count, err +} + +// Meta returns a value from index_meta. +func (s *Store) Meta(key string) (string, error) { + var value string + err := s.db.QueryRow("SELECT value FROM index_meta WHERE key = ?", key).Scan(&value) + if err == sql.ErrNoRows { + return "", nil + } + return value, err +} + +// Optimize runs PRAGMA optimize on the database. +func (s *Store) Optimize() error { + _, err := s.db.Exec("PRAGMA optimize") + return err +} + +// Begin starts a new transaction. +func (s *Store) Begin() (*sql.Tx, error) { + return s.db.Begin() +} diff --git a/internal/search/store_test.go b/internal/search/store_test.go new file mode 100644 index 0000000..9d09062 --- /dev/null +++ b/internal/search/store_test.go @@ -0,0 +1,277 @@ +package search + +import ( + "database/sql" + "os" + "path/filepath" + "testing" +) + +func tempStore(t *testing.T) *Store { + t.Helper() + dbPath := filepath.Join(t.TempDir(), "test.db") + s, err := OpenStore(dbPath, "/test/project") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { s.Close() }) + return s +} + +func TestOpenStore_CreatesSchema(t *testing.T) { + s := tempStore(t) + + // Verify all four tables exist. + tables := []string{"index_meta", "files", "fts", "fts_trigram"} + for _, name := range tables { + var count int + err := s.DB().QueryRow( + "SELECT count(*) FROM sqlite_master WHERE name = ?", name, + ).Scan(&count) + if err != nil { + t.Fatal(err) + } + if count == 0 { + t.Errorf("table %q not found", name) + } + } +} + +func TestOpenStore_MetaPopulated(t *testing.T) { + s := tempStore(t) + + root, err := s.Meta("project_root") + if err != nil || root != "/test/project" { + t.Errorf("project_root = %q, err = %v", root, err) + } + + ver, err := s.Meta("schema_version") + if err != nil || ver != SchemaVersion { + t.Errorf("schema_version = %q, err = %v", ver, err) + } + + created, err := s.Meta("created_at") + if err != nil || created == "" { + t.Errorf("created_at = %q, err = %v", created, err) + } +} + +func TestOpenStore_VersionMatch(t *testing.T) { + dbPath := filepath.Join(t.TempDir(), "test.db") + + // First open creates. + s1, err := OpenStore(dbPath, "/test/project") + if err != nil { + t.Fatal(err) + } + s1.Close() + + // Second open with same version succeeds. + s2, err := OpenStore(dbPath, "/test/project") + if err != nil { + t.Fatal(err) + } + s2.Close() +} + +func TestOpenStore_NewerVersionFails(t *testing.T) { + dbPath := filepath.Join(t.TempDir(), "test.db") + + s, err := OpenStore(dbPath, "/test/project") + if err != nil { + t.Fatal(err) + } + // Set version to something newer. + _, err = s.DB().Exec("UPDATE index_meta SET value = '999' WHERE key = 'schema_version'") + if err != nil { + t.Fatal(err) + } + s.Close() + + // Re-open should fail. + _, err = OpenStore(dbPath, "/test/project") + if err == nil { + t.Fatal("expected error for newer schema version") + } +} + +func TestOpenStore_OlderVersionRebuilds(t *testing.T) { + dbPath := filepath.Join(t.TempDir(), "test.db") + + s, err := OpenStore(dbPath, "/test/project") + if err != nil { + t.Fatal(err) + } + // Insert a file to verify rebuild clears data. + tx, _ := s.Begin() + s.UpsertFile(tx, "old.go", "abc", 100) + tx.Commit() + // Set version to something older. + _, err = s.DB().Exec("UPDATE index_meta SET value = '0' WHERE key = 'schema_version'") + if err != nil { + t.Fatal(err) + } + s.Close() + + // Re-open should rebuild (older version). + s2, err := OpenStore(dbPath, "/test/project") + if err != nil { + t.Fatal(err) + } + defer s2.Close() + + count, err := s2.FileCount() + if err != nil { + t.Fatal(err) + } + if count != 0 { + t.Errorf("expected 0 files after rebuild, got %d", count) + } +} + +func TestOpenStore_CacheDirCreated(t *testing.T) { + dir := filepath.Join(t.TempDir(), "deep", "nested", "cache") + dbPath := filepath.Join(dir, "test.db") + + s, err := OpenStore(dbPath, "/test/project") + if err != nil { + t.Fatal(err) + } + s.Close() + + if _, err := os.Stat(dir); os.IsNotExist(err) { + t.Error("cache directory was not created") + } +} + +func TestStore_FilesCRUD(t *testing.T) { + s := tempStore(t) + + // Insert a file. + tx, _ := s.Begin() + id, err := s.UpsertFile(tx, "main.go", "hash1", 1000) + if err != nil { + t.Fatal(err) + } + if err := s.InsertFTS(tx, id, "main.go", "package main"); err != nil { + t.Fatal(err) + } + tx.Commit() + + // Get it back. + f, err := s.GetFile("main.go") + if err != nil { + t.Fatal(err) + } + if f == nil { + t.Fatal("expected file, got nil") + } + if f.Hash != "hash1" || f.Mtime != 1000 { + t.Errorf("got hash=%q mtime=%d", f.Hash, f.Mtime) + } + + // AllFilePaths. + paths, err := s.AllFilePaths() + if err != nil { + t.Fatal(err) + } + if len(paths) != 1 { + t.Errorf("expected 1 path, got %d", len(paths)) + } + + // FileCount. + count, err := s.FileCount() + if err != nil { + t.Fatal(err) + } + if count != 1 { + t.Errorf("expected 1, got %d", count) + } + + // Delete. + tx, _ = s.Begin() + if err := s.DeleteFile(tx, f.ID); err != nil { + t.Fatal(err) + } + tx.Commit() + + f2, err := s.GetFile("main.go") + if err != nil { + t.Fatal(err) + } + if f2 != nil { + t.Error("expected nil after delete") + } +} + +func TestStore_UpdateMtime(t *testing.T) { + s := tempStore(t) + + tx, _ := s.Begin() + id, _ := s.UpsertFile(tx, "a.go", "h", 100) + tx.Commit() + + tx, _ = s.Begin() + if err := s.UpdateMtime(tx, id, 200); err != nil { + t.Fatal(err) + } + tx.Commit() + + f, _ := s.GetFile("a.go") + if f.Mtime != 200 { + t.Errorf("mtime = %d, want 200", f.Mtime) + } +} + +func TestStore_ForceRebuild(t *testing.T) { + s := tempStore(t) + + // Add data. + tx, _ := s.Begin() + id, _ := s.UpsertFile(tx, "x.go", "h", 1) + s.InsertFTS(tx, id, "x.go", "content") + tx.Commit() + + // Rebuild. + if err := s.ForceRebuild(); err != nil { + t.Fatal(err) + } + + count, _ := s.FileCount() + if count != 0 { + t.Errorf("expected 0 after rebuild, got %d", count) + } + + // Verify FTS tables are empty. + var ftsCount int + s.DB().QueryRow("SELECT count(*) FROM fts").Scan(&ftsCount) + if ftsCount != 0 { + t.Errorf("fts not empty after rebuild") + } +} + +func TestStore_FTSContentlessTrigramNoContent(t *testing.T) { + s := tempStore(t) + + tx, _ := s.Begin() + id, _ := s.UpsertFile(tx, "test.go", "h", 1) + s.InsertFTS(tx, id, "test.go", "some content here for testing") + tx.Commit() + + // fts_trigram is contentless — reading content should return empty. + var content sql.NullString + err := s.DB().QueryRow("SELECT content FROM fts_trigram WHERE rowid = ?", id).Scan(&content) + if err != nil { + t.Fatal(err) + } + if content.Valid && content.String != "" { + t.Errorf("contentless table returned content: %q", content.String) + } +} + +func TestStore_Optimize(t *testing.T) { + s := tempStore(t) + if err := s.Optimize(); err != nil { + t.Fatal(err) + } +} diff --git a/specs/search-indexer.md b/specs/search-indexer.md new file mode 100644 index 0000000..8640832 --- /dev/null +++ b/specs/search-indexer.md @@ -0,0 +1,60 @@ +--- +title: Search Indexer +description: Incremental file walker and dual FTS5 indexer with draft index CLI command +status: implemented +author: Heiko Braun +--- + +# Feature: search-indexer + +Source: [docs/project-search.md](../docs/project-search.md) + +Depends on: [search-store](search-store.md) + +## Goal + +Walk the project tree, hash file content, and maintain both FTS5 indexes incrementally. Expose via `draft index` CLI command with `--force`, `--prune`, `--list`, and `--db` flags. First full index of ~10k LOC project under 2 seconds. + +## Acceptance Criteria + +- [x] `draft index` builds/updates both FTS5 indexes from the full project tree +- [x] Incremental: mtime fast-path skips unchanged files; xxh3 hash detects content changes when mtime differs +- [x] mtime-only changes (same content, new mtime) update `files.mtime` without FTS5 re-insert +- [x] Deleted files are removed from `files`, `fts`, and `fts_trigram` +- [x] `.gitignore` patterns respected; binary files (null bytes in first 8192 bytes), files > 1 MB, `.git/`, `node_modules/`, and the index db itself are skipped +- [x] `--force` drops and recreates all tables, does full re-scan +- [x] `--prune` scans cache dir, deletes indexes whose `project_root` no longer exists on disk +- [x] `--list` shows all indexes with project path, file count, size, last-indexed time +- [x] First full index of ~10k LOC project completes in under 2 seconds + +## Approach + +`internal/search/indexer.go`: walk project tree using `filepath.WalkDir`, filter via `go-git/go-git` gitignore matching plus hardcoded skips (binary, size, dotdirs). For each file: stat for mtime, compare against `files` table; if mtime changed, read + xxh3 hash; if hash changed, delete old FTS5 rows, insert into both `fts` and `fts_trigram`, update `files` row. After walk, delete `files` rows for paths no longer on disk (cascade to FTS5). Wrap full index in a transaction. `PRAGMA optimize` at end. + +`cmd/index.go`: Cobra command wiring `draft index [--force] [--prune] [--list] [--db]`. `--force` calls store's drop-and-recreate before indexing. `--prune` and `--list` operate on cache directory contents. + +## Affected Modules + +- `internal/search/indexer.go` — new: file walker, hasher, dual FTS5 upsert, incremental logic +- `cmd/index.go` — new: `draft index` command with flags +- `go.mod` — add `go-git/go-git` (gitignore matching) + +## Test Strategy + +- **Full index**: temp project with Go, Markdown, YAML, binary, `.gitignore`d files. Verify correct files in both FTS5 tables, excluded files absent. +- **Incremental update**: index, modify one file, re-index. Verify only that file's rows changed (check `indexed` timestamp). +- **Incremental delete**: index, delete a file, re-index. Verify removed from all three tables. +- **mtime-only change**: index, touch file (same content), re-index. Verify no FTS5 re-insert. +- **Binary skip**: include .png, verify excluded. +- **Large file skip**: include file > 1 MB, verify excluded. +- **Forced rebuild**: index, then `--force`. Verify all `indexed` timestamps refreshed. +- **Contentless trigram**: verify `fts_trigram` does not store file content. +- **Prune**: index two projects, delete one's directory, run `--prune`. Verify orphaned index deleted, other kept. +- **List**: index projects, run `--list`. Verify output shows path, file count, size, last-indexed time. +- **Performance**: index a ~10k LOC project, verify completes under 2 seconds. + +## Out of Scope + +- Search queries, ranking, output (→ search-query) +- Integration with /spec, /refine, /implement (→ search-integration) +- File watching, background re-indexing diff --git a/specs/search-integration.md b/specs/search-integration.md new file mode 100644 index 0000000..73ebb3f --- /dev/null +++ b/specs/search-integration.md @@ -0,0 +1,49 @@ +--- +title: Search Integration +description: Wire project search into /spec, /refine, and /implement skills for automatic indexing and context retrieval +status: implemented +author: Heiko Braun +--- + +# Feature: search-integration + +Source: [docs/project-search.md](../docs/project-search.md) + +Depends on: [search-indexer](search-indexer.md), [search-query](search-query.md) + +## Goal + +Integrate project search into existing draft workflows: auto-index after spec writes, auto-search before implementation. Agents get relevant codebase context without manual grep exploration. + +## Acceptance Criteria + +- [x] `/spec` triggers `draft index` (incremental) after writing the spec file +- [x] `/refine` triggers `draft index` (incremental) after writing the spec file +- [x] `/implement` runs `draft search` with the spec's title and key terms before generating code, includes top results as agent context +- [x] Indexing side-effects are silent (no agent-visible output unless errors) +- [x] Search results in `/implement` are formatted as context the implementing agent can use + +## Approach + +Add post-write step to `skills/spec.md` and `skills/refine.md`: after the spec file is written, run `draft index`. This is lightweight (~50ms for incremental, only the changed spec re-indexes). + +Add pre-generation step to `skills/implement.md`: read spec title and acceptance criteria, extract key terms (nouns, technical terms — simple heuristic or use title as-is for MVP), run `draft search "" --limit 10`, inject top results into the implementing agent's context prompt. + +## Affected Modules + +- `skills/spec.md` — add post-write instruction: run `draft index` +- `skills/refine.md` — add post-write instruction: run `draft index` +- `skills/implement.md` — add pre-generation step: extract terms from spec, run `draft search`, include results as context + +## Test Strategy + +- **Spec integration**: run `/spec`, verify `draft index` runs after spec file written. Search for new spec content, verify found. +- **Refine integration**: run `/refine`, verify `draft index` runs after spec updated. +- **Implement integration**: create spec with known terms, run `/implement`, verify search results appear in agent context before code generation starts. + +## Out of Scope + +- `/review` triggering index updates +- Git hook integration (post-checkout) +- Key term extraction beyond simple title/criteria parsing +- Automatic staleness detection or age-based re-indexing diff --git a/specs/search-query.md b/specs/search-query.md new file mode 100644 index 0000000..09ce970 --- /dev/null +++ b/specs/search-query.md @@ -0,0 +1,57 @@ +--- +title: Search Query +description: Query classification, dual FTS5 backend dispatch, BM25 score merging, and draft search CLI command +status: implemented +author: Heiko Braun +--- + +# Feature: search-query + +Source: [docs/project-search.md](../docs/project-search.md) + +Depends on: [search-store](search-store.md), [search-indexer](search-indexer.md) + +## Goal + +Provide ranked, token-efficient search over the project index. Classify queries, route to appropriate FTS5 backend(s), merge scores, and return compact results with snippets. Expose via `draft search ` CLI command. + +## Acceptance Criteria + +- [x] `draft search ` returns ranked results with file path, line range, and snippet +- [x] Query routing: natural language (spaces, common words) → `fts` only; code identifiers (camelCase, snake_case, no spaces) → `fts_trigram` only; ambiguous → both with merge +- [x] FTS5 backend uses `bm25(fts, 5.0, 1.0)` weighting path matches 5x over content +- [x] Trigram backend handles substring matches; queries < 3 chars fall back gracefully +- [x] When both backends run, scores are min-max normalized to [0,1] then merged with weights 0.6 (fts) / 0.4 (trigram), deduplicated by file path +- [x] `--limit N` controls max results (default 20) +- [x] `--status` reports index path, project root, file count, last-indexed time, db size +- [x] Snippets use `»` / `«` markers, sourced from `fts` table (not contentless trigram) + +## Approach + +`internal/search/searcher.go`: classify query (regex heuristics for camelCase/snake_case/spaces), dispatch to one or both backends via SQL queries against `fts` and `fts_trigram`. Each backend returns `(path, score, snippet?)`. Merge: min-max normalize each result set, combine with weighted sum, dedup by path keeping highest score, sort descending, apply limit. + +`cmd/search.go`: Cobra command `draft search [--limit N] [--status]`. `--status` reads `index_meta` and `files` count, prints diagnostics. Default mode formats results as `path:lineRange (score: X.XX)` with indented snippet. + +## Affected Modules + +- `internal/search/searcher.go` — new: query classifier, backend dispatch, score merger, result formatting +- `cmd/search.go` — new: `draft search` command with `--limit`, `--status` + +## Test Strategy + +- **FTS5 ranking**: known documents with varying relevance. Natural language query, verify BM25 ordering. +- **Path weighting**: term in filename and body — filename match ranks higher. +- **Trigram substring**: index `AppCfgLoader` and `UserConfigService`, search `CfgLoad`. Verify first found, second not. +- **Trigram min length**: 2-char search falls back gracefully (no crash, no results or warning). +- **Query routing**: `"error handling"` → fts only, `CfgLoader` → trigram only, `AuthHandler` → both. +- **Score merging**: files ranking differently in each backend. Merged result reflects 0.6/0.4 weights. +- **Snippet source**: snippets come from `fts` table, not `fts_trigram`. +- **Status output**: verify reports correct file count, project path, last-indexed time, db size. +- **Limit**: verify `--limit 3` returns at most 3 results. + +## Out of Scope + +- Indexing logic (→ search-indexer) +- Tunable merge weights via config (hardcoded for MVP) +- `--exact` flag for trigram-only mode +- Semantic/vector search diff --git a/specs/search-store.md b/specs/search-store.md new file mode 100644 index 0000000..f9b3e9c --- /dev/null +++ b/specs/search-store.md @@ -0,0 +1,54 @@ +--- +title: Search Store +description: SQLite database layer with FTS5 schema, project root detection, cache directory resolution, and schema versioning +status: implemented +author: Heiko Braun +--- + +# Feature: search-store + +Source: [docs/project-search.md](../docs/project-search.md) + +## Goal + +Provide the persistence layer for project search: SQLite database with dual FTS5 virtual tables (porter-stemmed + trigram), project root detection, deterministic path-to-hash mapping, platform-appropriate cache directory resolution, and schema versioning with migration support. + +## Acceptance Criteria + +- [x] `OpenStore(projectRoot)` creates/opens a SQLite database at the platform cache path derived from `xxh3(realpath(projectRoot))` +- [x] Schema includes `index_meta`, `files`, `fts` (porter unicode61), and `fts_trigram` (contentless, detail=none) tables as specified +- [x] `index_meta` stores `project_root`, `created_at`, and `schema_version` on first creation +- [x] Schema version check on open: matching version proceeds, older version triggers migration, newer version returns error suggesting `--force`, missing/corrupt creates fresh +- [x] `DetectProjectRoot(cwd)` walks upward looking for `.draft/` or `CLAUDE.md`, falls back to cwd +- [x] Symlinks are resolved before hashing so the same project via different symlinks shares one index +- [x] `--db` flag support: `OpenStore` accepts an optional override path via `IndexPath` +- [x] Cache directory is created automatically if it doesn't exist + +## Approach + +New package `internal/search/`. `project.go` handles root detection (walk upward for `.draft/` or `CLAUDE.md`) and cache path computation (`xxh3` hex hash of resolved absolute path). `store.go` manages SQLite connection via `mattn/go-sqlite3` with `fts5` build tag, creates schema on first open, checks `schema_version` on subsequent opens. Store exposes low-level CRUD for `files` table and insert/delete for both FTS5 tables — no indexing or search logic. + +## Affected Modules + +- `internal/search/project.go` — new: project root detection, path-to-hash, cache dir resolution +- `internal/search/store.go` — new: SQLite connection, schema DDL, version check/migration, CRUD methods +- `go.mod` — add `mattn/go-sqlite3`, `zeebo/xxh3` + +## Test Strategy + +- **Project root detection**: temp dir with `.draft/` marker at root, run from subdirectory, verify root found. Repeat with no marker, verify cwd returned. +- **Deterministic hashing**: same project path produces same db filename across calls. +- **Symlink resolution**: project accessed via symlink maps to same hash as real path. +- **Schema creation**: open fresh db, verify all four tables exist with correct structure. +- **Schema version match**: open existing db with matching version, verify no rebuild. +- **Schema version upgrade**: db with older version triggers migration, version updated. +- **Schema version downgrade**: db with newer version returns error mentioning `--force`. +- **Cache dir creation**: remove cache dir, open store, verify dir created. +- **--db override**: open with explicit path, verify db at that path. + +## Out of Scope + +- File walking, content hashing, indexing logic (→ search-indexer) +- Query execution, ranking, output formatting (→ search-query) +- CLI commands (→ search-indexer, search-query) +- Semantic/vector search, file watching, MCP server From dce8852bded6b3eac55a940c365efecb406b1581 Mon Sep 17 00:00:00 2001 From: Heiko Braun Date: Sat, 14 Mar 2026 14:09:17 +0100 Subject: [PATCH 2/6] Switch from mattn/go-sqlite3 to modernc.org/sqlite for CGo-free builds mattn/go-sqlite3 requires CGO_ENABLED=1 and a C compiler, which breaks cross-compilation in goreleaser (CGO_ENABLED=0). modernc.org/sqlite is a pure-Go SQLite implementation that includes FTS5 by default and needs no build tags or CGo. Co-Authored-By: Claude Opus 4.6 --- go.mod | 13 +++++++-- go.sum | 57 +++++++++++++++++++++++++++++++++++++--- internal/search/store.go | 4 +-- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 195849c..27cddd1 100644 --- a/go.mod +++ b/go.mod @@ -3,17 +3,26 @@ module github.com/heiko-braun/draft go 1.24.3 require ( - github.com/mattn/go-sqlite3 v1.14.34 github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 github.com/spf13/cobra v1.10.2 github.com/yuin/goldmark v1.7.16 github.com/zeebo/xxh3 v1.1.0 gopkg.in/yaml.v3 v3.0.1 + modernc.org/sqlite v1.46.1 ) require ( + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/spf13/pflag v1.0.9 // indirect - golang.org/x/sys v0.30.0 // indirect + golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect + golang.org/x/sys v0.37.0 // indirect + modernc.org/libc v1.67.6 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect ) diff --git a/go.sum b/go.sum index 42d085e..d9cc5ea 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,26 @@ github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= -github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk= -github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= @@ -26,10 +38,47 @@ github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY= +golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis= +modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0= +modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc= +modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM= +modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA= +modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc= +modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= +modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= +modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE= +modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= +modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= +modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= +modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI= +modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= +modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= +modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= +modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU= +modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA= +modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= +modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/internal/search/store.go b/internal/search/store.go index 26c8b87..2180e5a 100644 --- a/internal/search/store.go +++ b/internal/search/store.go @@ -7,7 +7,7 @@ import ( "path/filepath" "time" - _ "github.com/mattn/go-sqlite3" + _ "modernc.org/sqlite" ) const SchemaVersion = "1" @@ -25,7 +25,7 @@ func OpenStore(dbPath, projectRoot string) (*Store, error) { return nil, fmt.Errorf("create cache dir: %w", err) } - db, err := sql.Open("sqlite3", dbPath+"?_journal=WAL&_fk=1") + db, err := sql.Open("sqlite", dbPath+"?_pragma=journal_mode(WAL)&_pragma=foreign_keys(1)") if err != nil { return nil, fmt.Errorf("open db: %w", err) } From daf0189ff9b808ef24fd6b576fafadf239fec3d2 Mon Sep 17 00:00:00 2001 From: Heiko Braun Date: Sat, 14 Mar 2026 14:10:45 +0100 Subject: [PATCH 3/6] Add search integration to Cursor skill source files The sync-templates script copies from .cursor/ (source of truth) into cmd/draft/templates/.cursor/. The previous commit only updated the template copies, not the source. This adds the same draft index/search hooks to the source .cursor/ skills. Co-Authored-By: Claude Opus 4.6 --- .cursor/skills/implement/SKILL.md | 10 +++++++++- .cursor/skills/refine/SKILL.md | 13 ++++++++++++- .cursor/skills/spec/SKILL.md | 10 ++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/.cursor/skills/implement/SKILL.md b/.cursor/skills/implement/SKILL.md index 7e6c052..f269e45 100644 --- a/.cursor/skills/implement/SKILL.md +++ b/.cursor/skills/implement/SKILL.md @@ -15,10 +15,18 @@ You implement features as small, complete vertical slices with continuous testin ## Workflow -### 1. Load Spec & Assess Blast Radius +### 1. Load Spec & Search for Context Read the relevant `/specs/{feature}.md` file. If multiple specs exist and it's unclear which one, ask the user. +**Search for related code**: Before writing any code, use the spec's title and key terms to search the codebase for relevant existing code: + +```bash +draft search "" --limit 10 +``` + +Review the search results to understand existing patterns, related modules, and potential conflicts. Use these results to inform your implementation approach. + Before writing any code, assess the change: - **Which modules/files will this touch?** List them. If the spec has an "Affected Modules" section, verify it's still accurate. diff --git a/.cursor/skills/refine/SKILL.md b/.cursor/skills/refine/SKILL.md index 94a9499..137a091 100644 --- a/.cursor/skills/refine/SKILL.md +++ b/.cursor/skills/refine/SKILL.md @@ -58,7 +58,18 @@ Refine an existing specification based on new insights, feedback, or changing re **Refinement 2026-01-25**: Updated approach to use WebSocket instead of polling based on performance testing results. Added new acceptance criterion for connection handling. Blast radius unchanged — change is contained within the `transport` module. ``` -Remember: +## Update Search Index + +After saving the refined spec, update the project search index: + +```bash +draft index +``` + +This runs incrementally (~50ms). Do not show the output to the user unless it fails. + +## Reminders + - Keep refinements focused and minimal - Preserve the spec's history through notes - Suggest new specs for major scope changes diff --git a/.cursor/skills/spec/SKILL.md b/.cursor/skills/spec/SKILL.md index a416c8e..60e5489 100644 --- a/.cursor/skills/spec/SKILL.md +++ b/.cursor/skills/spec/SKILL.md @@ -76,6 +76,16 @@ Present the spec summary and ask: "Does this capture what you want? I'll impleme If the user wants changes, revise the spec and confirm again. +### Phase 4: Update Search Index + +After writing the spec file, update the project search index so the new spec is immediately discoverable: + +```bash +draft index +``` + +This runs incrementally (~50ms) and only re-indexes the changed file. Do not show the output to the user unless it fails. + ## Reference See `/specs/TEMPLATE.md` for the spec file format. From fc54773a983ac8d07aaef6d41b6fd2b4e3326725 Mon Sep 17 00:00:00 2001 From: Heiko Braun Date: Sat, 14 Mar 2026 14:19:56 +0100 Subject: [PATCH 4/6] Fix search-store spec: remove detail=none and update SQLite driver reference detail=none is incompatible with FTS5 trigram tokenizer (trigram matching relies on phrase queries which require detail=full). Also update driver reference from mattn/go-sqlite3 to modernc.org/sqlite to match implementation. Co-Authored-By: Claude Opus 4.6 --- specs/search-store.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/specs/search-store.md b/specs/search-store.md index f9b3e9c..6772ca4 100644 --- a/specs/search-store.md +++ b/specs/search-store.md @@ -16,7 +16,7 @@ Provide the persistence layer for project search: SQLite database with dual FTS5 ## Acceptance Criteria - [x] `OpenStore(projectRoot)` creates/opens a SQLite database at the platform cache path derived from `xxh3(realpath(projectRoot))` -- [x] Schema includes `index_meta`, `files`, `fts` (porter unicode61), and `fts_trigram` (contentless, detail=none) tables as specified +- [x] Schema includes `index_meta`, `files`, `fts` (porter unicode61), and `fts_trigram` (contentless) tables as specified — `detail=none` is incompatible with the trigram tokenizer (trigram matching relies on phrase queries which require `detail=full`) - [x] `index_meta` stores `project_root`, `created_at`, and `schema_version` on first creation - [x] Schema version check on open: matching version proceeds, older version triggers migration, newer version returns error suggesting `--force`, missing/corrupt creates fresh - [x] `DetectProjectRoot(cwd)` walks upward looking for `.draft/` or `CLAUDE.md`, falls back to cwd @@ -26,13 +26,13 @@ Provide the persistence layer for project search: SQLite database with dual FTS5 ## Approach -New package `internal/search/`. `project.go` handles root detection (walk upward for `.draft/` or `CLAUDE.md`) and cache path computation (`xxh3` hex hash of resolved absolute path). `store.go` manages SQLite connection via `mattn/go-sqlite3` with `fts5` build tag, creates schema on first open, checks `schema_version` on subsequent opens. Store exposes low-level CRUD for `files` table and insert/delete for both FTS5 tables — no indexing or search logic. +New package `internal/search/`. `project.go` handles root detection (walk upward for `.draft/` or `CLAUDE.md`) and cache path computation (`xxh3` hex hash of resolved absolute path). `store.go` manages SQLite connection via `modernc.org/sqlite` (CGo-free), creates schema on first open, checks `schema_version` on subsequent opens. Store exposes low-level CRUD for `files` table and insert/delete for both FTS5 tables — no indexing or search logic. ## Affected Modules - `internal/search/project.go` — new: project root detection, path-to-hash, cache dir resolution - `internal/search/store.go` — new: SQLite connection, schema DDL, version check/migration, CRUD methods -- `go.mod` — add `mattn/go-sqlite3`, `zeebo/xxh3` +- `go.mod` — add `modernc.org/sqlite`, `zeebo/xxh3` ## Test Strategy From f34803d7c1af4f84106ba0aa12a3cdad4a872e9f Mon Sep 17 00:00:00 2001 From: Heiko Braun Date: Sat, 14 Mar 2026 17:09:50 +0100 Subject: [PATCH 5/6] Format search output as markdown fenced code blocks with language tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace flat snippet format with path header + fenced code block using language tag inferred from file extension. Keeps »« match markers and score display. Adds langFromExt helper with 40+ extension mappings. Co-Authored-By: Claude Opus 4.6 --- internal/search/searcher.go | 66 ++++++++++++++++++++++++- internal/search/searcher_test.go | 84 ++++++++++++++++++++++++++++++-- specs/search-output-format.md | 45 +++++++++++++++++ 3 files changed, 190 insertions(+), 5 deletions(-) create mode 100644 specs/search-output-format.md diff --git a/internal/search/searcher.go b/internal/search/searcher.go index 1ba2ec2..e468543 100644 --- a/internal/search/searcher.go +++ b/internal/search/searcher.go @@ -3,6 +3,7 @@ package search import ( "fmt" "math" + "path/filepath" "regexp" "sort" "strings" @@ -244,7 +245,7 @@ func escapeFTS5(query string) string { return strings.Join(tokens, " ") } -// FormatResults formats search results for CLI output. +// FormatResults formats search results as markdown with fenced code blocks. func FormatResults(results []SearchResult) string { if len(results) == 0 { return "No results found.\n" @@ -254,13 +255,74 @@ func FormatResults(results []SearchResult) string { for _, r := range results { fmt.Fprintf(&sb, "%s (score: %.2f)\n", r.Path, r.Score) if r.Snippet != "" { - fmt.Fprintf(&sb, " %s\n", r.Snippet) + lang := langFromExt(r.Path) + fmt.Fprintf(&sb, "```%s\n%s\n```\n", lang, r.Snippet) } sb.WriteString("\n") } return sb.String() } +// langFromExt returns a markdown language tag for the given file path. +// Returns empty string for unknown extensions. +func langFromExt(path string) string { + ext := strings.ToLower(filepath.Ext(path)) + lang, ok := extLangs[ext] + if !ok { + return "" + } + return lang +} + +var extLangs = map[string]string{ + ".go": "go", + ".js": "javascript", + ".ts": "typescript", + ".jsx": "jsx", + ".tsx": "tsx", + ".py": "python", + ".rb": "ruby", + ".rs": "rust", + ".java": "java", + ".kt": "kotlin", + ".c": "c", + ".cpp": "cpp", + ".h": "c", + ".hpp": "cpp", + ".cs": "csharp", + ".swift": "swift", + ".sh": "bash", + ".bash": "bash", + ".zsh": "bash", + ".md": "markdown", + ".yaml": "yaml", + ".yml": "yaml", + ".json": "json", + ".toml": "toml", + ".xml": "xml", + ".html": "html", + ".css": "css", + ".scss": "scss", + ".sql": "sql", + ".proto": "protobuf", + ".tf": "hcl", + ".lua": "lua", + ".ex": "elixir", + ".exs": "elixir", + ".erl": "erlang", + ".hs": "haskell", + ".ml": "ocaml", + ".r": "r", + ".php": "php", + ".pl": "perl", + ".vim": "vim", + ".el": "lisp", + ".clj": "clojure", + ".scala": "scala", + ".dart": "dart", + ".zig": "zig", +} + // StatusInfo holds information for the --status flag. type StatusInfo struct { DBPath string diff --git a/internal/search/searcher_test.go b/internal/search/searcher_test.go index 44405b5..27f82cd 100644 --- a/internal/search/searcher_test.go +++ b/internal/search/searcher_test.go @@ -2,6 +2,7 @@ package search import ( "path/filepath" + "strings" "testing" ) @@ -203,12 +204,89 @@ func TestFormatResults_Empty(t *testing.T) { } } -func TestFormatResults_WithResults(t *testing.T) { +func TestFormatResults_GoSnippet(t *testing.T) { + results := []SearchResult{ + {Path: "main.go", Score: 0.87, Snippet: "func »main«() {}"}, + } + out := FormatResults(results) + if !strings.Contains(out, "main.go (score: 0.87)") { + t.Errorf("missing path+score header: %q", out) + } + if !strings.Contains(out, "```go\n") { + t.Errorf("missing go language tag: %q", out) + } + if !strings.Contains(out, "func »main«() {}") { + t.Errorf("missing snippet with markers: %q", out) + } + if !strings.Contains(out, "\n```\n") { + t.Errorf("missing closing fence: %q", out) + } +} + +func TestFormatResults_MarkdownSnippet(t *testing.T) { + results := []SearchResult{ + {Path: "docs/readme.md", Score: 0.50, Snippet: "## »Overview«"}, + } + out := FormatResults(results) + if !strings.Contains(out, "```markdown\n") { + t.Errorf("expected markdown language tag: %q", out) + } +} + +func TestFormatResults_UnknownExtension(t *testing.T) { + results := []SearchResult{ + {Path: "data.xyz", Score: 0.30, Snippet: "some content"}, + } + out := FormatResults(results) + if !strings.Contains(out, "```\n") { + t.Errorf("expected bare fence for unknown extension: %q", out) + } +} + +func TestFormatResults_NoSnippet(t *testing.T) { + results := []SearchResult{ + {Path: "config.go", Score: 0.60, Snippet: ""}, + } + out := FormatResults(results) + if !strings.Contains(out, "config.go (score: 0.60)") { + t.Errorf("missing path+score: %q", out) + } + if strings.Contains(out, "```") { + t.Errorf("should not have code fence for empty snippet: %q", out) + } +} + +func TestFormatResults_MarkersPreserved(t *testing.T) { results := []SearchResult{ {Path: "a.go", Score: 0.87, Snippet: "…the »test« content…"}, } out := FormatResults(results) - if out == "" { - t.Error("expected non-empty formatted output") + if !strings.Contains(out, "»test«") { + t.Errorf("markers should be preserved: %q", out) + } + if !strings.Contains(out, "…") { + t.Errorf("truncation marker should be preserved: %q", out) + } +} + +func TestLangFromExt(t *testing.T) { + tests := []struct { + path string + want string + }{ + {"main.go", "go"}, + {"app.ts", "typescript"}, + {"README.md", "markdown"}, + {"config.yaml", "yaml"}, + {"config.yml", "yaml"}, + {"script.sh", "bash"}, + {"data.unknown", ""}, + {"noext", ""}, + } + for _, tt := range tests { + got := langFromExt(tt.path) + if got != tt.want { + t.Errorf("langFromExt(%q) = %q, want %q", tt.path, got, tt.want) + } } } diff --git a/specs/search-output-format.md b/specs/search-output-format.md new file mode 100644 index 0000000..74ef2c4 --- /dev/null +++ b/specs/search-output-format.md @@ -0,0 +1,45 @@ +--- +title: Search Output Format +description: Replace inline snippet format with markdown code-fenced output using language-appropriate syntax highlighting +status: implemented +author: Heiko Braun +--- + +# Feature: search-output-format + +Source: improves [search-query](search-query.md) + +## Goal + +Make `draft search` output directly useful as context for agents and humans. Replace the current flat snippet format with `path (score)` header followed by a fenced code block with language tag inferred from file extension. Keep `»«` markers for match highlighting. + +## Acceptance Criteria + +- [x] Each result rendered as `path (score: X.XX)` on its own line, followed by a fenced code block with language tag +- [x] Language tag inferred from file extension (`.go` → `go`, `.md` → `markdown`, `.yaml`/`.yml` → `yaml`, etc.); unknown extensions use no tag +- [x] `»«` match markers preserved in snippet content; `…` truncation marker preserved +- [x] Trigram-only results (no snippet) show path and score only, no empty code block + +## Approach + +Update `FormatResults` to emit `path (score: X.XX)\n` + fenced block with language tag from a `langFromExt(path)` helper. Add extension-to-language map covering common project file types. FTS5 `snippet()` call unchanged — already produces the right content with `»«` markers. + +## Affected Modules + +- `internal/search/searcher.go` — update `FormatResults`, add `langFromExt` helper +- `internal/search/searcher_test.go` — update format tests + +## Test Strategy + +- **Format with snippet**: result with `.go` path produces ` ```go ` fenced block containing snippet +- **Format with markdown file**: `.md` path produces ` ```markdown ` block +- **Format unknown extension**: unknown ext produces bare ` ``` ` block +- **Format without snippet**: trigram-only result (empty snippet) renders path and score only +- **No results**: still returns `"No results found.\n"` +- **Markers preserved**: output contains `»` and `«` around matched terms + +## Out of Scope + +- Line numbers or line ranges in output +- Syntax highlighting beyond language tag +- Changes to score merging or ranking logic From dc6d24a2c55487adf0119a83b7fa4a7165991dc3 Mon Sep 17 00:00:00 2001 From: Heiko Braun Date: Sat, 14 Mar 2026 17:16:19 +0100 Subject: [PATCH 6/6] Add draft-search rules to Claude and Cursor template sync Include .claude/rules/ and .cursor/rules/ in template sync so draft init installs the draft-search usage guide. Updates findConflicts and sync-templates.sh for both agents. Co-Authored-By: Claude Opus 4.6 --- .claude/rules/draft-search.md | 19 +++++++++++++++++++ .cursor/rules/draft-search.md | 19 +++++++++++++++++++ internal/cli/init.go | 2 ++ scripts/sync-templates.sh | 11 +++++++++++ 4 files changed, 51 insertions(+) create mode 100644 .claude/rules/draft-search.md create mode 100644 .cursor/rules/draft-search.md diff --git a/.claude/rules/draft-search.md b/.claude/rules/draft-search.md new file mode 100644 index 0000000..2d94224 --- /dev/null +++ b/.claude/rules/draft-search.md @@ -0,0 +1,19 @@ +## Code Search + +Use `draft search` for finding relevant code and context: +- `draft search "query"` — find files by concept, feature, or pattern +- `draft search "query" --files-only` — file paths only + +Prefer `draft search` over Grep/Glob when: +- Looking for where a feature or concept is implemented +- Checking what exists before writing new code or specs +- Searching with natural language rather than exact patterns +- Searching for partial identifiers or symbol names + +Prefer Grep when: +- Matching an exact string or regex +- Counting occurrences +- Searching within a single known file + +The search index updates automatically after `/spec` and `/refine`. +Run `draft index` manually after `git pull` or branch switches. diff --git a/.cursor/rules/draft-search.md b/.cursor/rules/draft-search.md new file mode 100644 index 0000000..2d94224 --- /dev/null +++ b/.cursor/rules/draft-search.md @@ -0,0 +1,19 @@ +## Code Search + +Use `draft search` for finding relevant code and context: +- `draft search "query"` — find files by concept, feature, or pattern +- `draft search "query" --files-only` — file paths only + +Prefer `draft search` over Grep/Glob when: +- Looking for where a feature or concept is implemented +- Checking what exists before writing new code or specs +- Searching with natural language rather than exact patterns +- Searching for partial identifiers or symbol names + +Prefer Grep when: +- Matching an exact string or regex +- Counting occurrences +- Searching within a single known file + +The search index updates automatically after `/spec` and `/refine`. +Run `draft index` manually after `git pull` or branch switches. diff --git a/internal/cli/init.go b/internal/cli/init.go index f3fa3fc..a04b525 100644 --- a/internal/cli/init.go +++ b/internal/cli/init.go @@ -218,6 +218,7 @@ func findConflicts(targetDir string, agents []string) ([]string, error) { ".claude/commands/refine.md", ".claude/commands/verify.md", ".claude/agents/verify-agent.md", + ".claude/rules/draft-search.md", ) case "cursor": filesToCheck = append(filesToCheck, @@ -225,6 +226,7 @@ func findConflicts(targetDir string, agents []string) ([]string, error) { ".cursor/skills/implement/SKILL.md", ".cursor/skills/refine/SKILL.md", ".cursor/skills/verify/SKILL.md", + ".cursor/rules/draft-search.md", ) } } diff --git a/scripts/sync-templates.sh b/scripts/sync-templates.sh index 577e186..c203414 100755 --- a/scripts/sync-templates.sh +++ b/scripts/sync-templates.sh @@ -27,6 +27,7 @@ rm -rf "$PRINCIPLES_DEST_DIR" # Create destination directories mkdir -p "$CLAUDE_DEST_DIR/commands" mkdir -p "$CLAUDE_DEST_DIR/agents" +mkdir -p "$CLAUDE_DEST_DIR/rules" mkdir -p "$CURSOR_DEST_DIR" mkdir -p "$SPECS_DEST_DIR" mkdir -p "$PRINCIPLES_DEST_DIR" @@ -43,11 +44,21 @@ if [ -d "$CLAUDE_SOURCE_DIR/agents" ]; then echo " ✓ Copied Claude agents: $(ls -1 "$CLAUDE_SOURCE_DIR/agents"/*.md 2>/dev/null | wc -l | tr -d ' ') files" fi +# Copy all Claude rule files +if [ -d "$CLAUDE_SOURCE_DIR/rules" ]; then + cp "$CLAUDE_SOURCE_DIR/rules"/*.md "$CLAUDE_DEST_DIR/rules/" 2>/dev/null || true + echo " ✓ Copied Claude rules: $(ls -1 "$CLAUDE_SOURCE_DIR/rules"/*.md 2>/dev/null | wc -l | tr -d ' ') files" +fi + # Copy all Cursor skill files if [ -d "$CURSOR_SOURCE_DIR" ]; then cp -r "$CURSOR_SOURCE_DIR"/* "$CURSOR_DEST_DIR/" 2>/dev/null || true skill_count=$(find "$CURSOR_SOURCE_DIR/skills" -name "SKILL.md" 2>/dev/null | wc -l | tr -d ' ') echo " ✓ Copied Cursor skills: $skill_count files" + rule_count=$(ls -1 "$CURSOR_SOURCE_DIR/rules"/*.md 2>/dev/null | wc -l | tr -d ' ') + if [ "$rule_count" -gt 0 ]; then + echo " ✓ Copied Cursor rules: $rule_count files" + fi fi # Copy only TEMPLATE.md from specs (exclude actual spec files)