From 0d2b4aced1ceadf7b6993e018bf83ace4fe795fa Mon Sep 17 00:00:00 2001 From: drompincen Date: Thu, 2 Apr 2026 19:55:37 -0600 Subject: [PATCH] Add LightRAG-parity features: knowledge graph, semantic tags, graph search Implement 10-chapter plan for graph-augmented RAG to move beyond naive vector search. Adds knowledge graph with entity/relationship extraction, community detection, multi-level retrieval (local/global/hybrid/mix), semantic tagging (4-10 tags per file), entity merging with duplicate detection, incremental graph updates, and enrichment pipeline orchestration. New services: KnowledgeGraphService, GraphSearchService, SemanticTagService, CommunityDetectionService, GraphUpdateService. New MCP tool classes: KnowledgeGraphTools, SemanticTagTools, EnrichmentTools. Total MCP tools grows from 49 to 72. 800 tests passing, 77% instruction coverage. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../skills/add-javaducker/add-javaducker.md | 114 +++ .claude/skills/architect/architect.md | 12 +- .claude/skills/debugger/debugger.md | 12 +- .claude/skills/implementer/implementer.md | 11 +- .claude/skills/orchestrator/orchestrator.md | 13 +- .claude/skills/planner/planner.md | 11 +- .claude/skills/refactorer/refactorer.md | 13 +- .../remove-javaducker/remove-javaducker.md | 39 + .claude/skills/reviewer/reviewer.md | 11 +- JavaDuckerMcpServer.java | 751 ------------------ README.md | 25 +- VERSION | 2 +- drom-plans/lightrag-parity.md | 432 ++++++++++ drom-plans/spring-ai-mcp.md | 233 ++++++ pom.xml | 20 +- script-instructions/run-scripts.md | 14 +- .../javaducker/server/db/SchemaBootstrap.java | 94 +++ .../javaducker/server/mcp/AnalysisTools.java | 462 +++++++++++ .../server/mcp/ContentIntelligenceTools.java | 271 +++++++ .../com/javaducker/server/mcp/CoreTools.java | 282 +++++++ .../server/mcp/EnrichmentTools.java | 157 ++++ .../server/mcp/KnowledgeGraphTools.java | 342 ++++++++ .../javaducker/server/mcp/ReladomoTools.java | 132 +++ .../server/mcp/SemanticTagTools.java | 104 +++ .../javaducker/server/mcp/SessionTools.java | 129 +++ .../com/javaducker/server/mcp/WatchTools.java | 90 +++ .../server/rest/JavaDuckerRestController.java | 103 ++- .../service/CommunityDetectionService.java | 284 +++++++ .../server/service/ExplainService.java | 18 + .../server/service/GraphSearchService.java | 369 +++++++++ .../server/service/GraphUpdateService.java | 70 ++ .../server/service/KnowledgeGraphService.java | 688 ++++++++++++++++ .../server/service/SemanticTagService.java | 283 +++++++ src/main/resources/application-mcp.yml | 14 + src/main/resources/application-server.yml | 7 + src/main/resources/application.yml | 8 + .../integration/McpToolRegistrationTest.java | 187 +++++ .../server/db/SchemaBootstrapTest.java | 58 ++ .../server/mcp/AnalysisToolsTest.java | 468 +++++++++++ .../mcp/ContentIntelligenceToolsTest.java | 373 +++++++++ .../javaducker/server/mcp/CoreToolsTest.java | 350 ++++++++ .../server/mcp/EnrichmentToolsTest.java | 116 +++ .../server/mcp/KnowledgeGraphToolsTest.java | 273 +++++++ .../server/mcp/ReladomoToolsTest.java | 154 ++++ .../server/mcp/SemanticTagToolsTest.java | 181 +++++ .../server/mcp/SessionToolsTest.java | 171 ++++ .../javaducker/server/mcp/WatchToolsTest.java | 119 +++ .../JavaDuckerRestControllerExtendedTest.java | 2 + .../rest/JavaDuckerRestControllerTest.java | 2 + .../CommunityDetectionServiceTest.java | 185 +++++ .../server/service/ExplainServiceTest.java | 4 +- .../service/GraphSearchServiceTest.java | 263 ++++++ .../service/GraphUpdateServiceTest.java | 171 ++++ .../service/KnowledgeGraphServiceTest.java | 328 ++++++++ .../service/SemanticTagServiceTest.java | 188 +++++ start-here.md | 2 + workflows/bug-fix.md | 1 + workflows/closed-loop.md | 4 + workflows/code-review.md | 3 +- workflows/javaducker-hygiene.md | 134 ++++ workflows/new-feature.md | 1 + workflows/refactor.md | 1 + 62 files changed, 8571 insertions(+), 788 deletions(-) create mode 100644 .claude/skills/add-javaducker/add-javaducker.md create mode 100644 .claude/skills/remove-javaducker/remove-javaducker.md delete mode 100644 JavaDuckerMcpServer.java create mode 100644 drom-plans/lightrag-parity.md create mode 100644 drom-plans/spring-ai-mcp.md create mode 100644 src/main/java/com/javaducker/server/mcp/AnalysisTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/ContentIntelligenceTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/CoreTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/EnrichmentTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/KnowledgeGraphTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/ReladomoTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/SemanticTagTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/SessionTools.java create mode 100644 src/main/java/com/javaducker/server/mcp/WatchTools.java create mode 100644 src/main/java/com/javaducker/server/service/CommunityDetectionService.java create mode 100644 src/main/java/com/javaducker/server/service/GraphSearchService.java create mode 100644 src/main/java/com/javaducker/server/service/GraphUpdateService.java create mode 100644 src/main/java/com/javaducker/server/service/KnowledgeGraphService.java create mode 100644 src/main/java/com/javaducker/server/service/SemanticTagService.java create mode 100644 src/main/resources/application-mcp.yml create mode 100644 src/main/resources/application-server.yml create mode 100644 src/test/java/com/javaducker/integration/McpToolRegistrationTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/AnalysisToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/ContentIntelligenceToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/CoreToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/EnrichmentToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/KnowledgeGraphToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/ReladomoToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/SemanticTagToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/SessionToolsTest.java create mode 100644 src/test/java/com/javaducker/server/mcp/WatchToolsTest.java create mode 100644 src/test/java/com/javaducker/server/service/CommunityDetectionServiceTest.java create mode 100644 src/test/java/com/javaducker/server/service/GraphSearchServiceTest.java create mode 100644 src/test/java/com/javaducker/server/service/GraphUpdateServiceTest.java create mode 100644 src/test/java/com/javaducker/server/service/KnowledgeGraphServiceTest.java create mode 100644 src/test/java/com/javaducker/server/service/SemanticTagServiceTest.java create mode 100644 workflows/javaducker-hygiene.md diff --git a/.claude/skills/add-javaducker/add-javaducker.md b/.claude/skills/add-javaducker/add-javaducker.md new file mode 100644 index 0000000..071ee0b --- /dev/null +++ b/.claude/skills/add-javaducker/add-javaducker.md @@ -0,0 +1,114 @@ +--- +name: add-javaducker +description: Configure JavaDucker as an optional companion tool for semantic code search and indexing +user-invocable: true +--- + +# Add JavaDucker + +You are setting up JavaDucker as a companion tool for this project. JavaDucker provides semantic code indexing, search, dependency analysis, and project mapping via MCP tools. + +## What you need + +The user must provide the **JavaDucker root folder** — the directory containing `JavaDuckerMcpServer.java` and `run-mcp.sh`. + +If the user doesn't provide a path, look for it in sibling directories: +- `../code-helper` +- `../javaducker` + +## Setup Process + +1. **Get the path** — ask the user or auto-detect from sibling directories + +2. **Validate** — confirm these files exist at the root: + - `JavaDuckerMcpServer.java` + - `run-mcp.sh` + - `run-server.sh` + If any are missing, stop and report the error. + +3. **Create local data directory** — create `.claude/.javaducker/` in the project root: + ```bash + mkdir -p .claude/.javaducker/intake + ``` + This is where the DuckDB database and intake files live — per-project, gitignored. + +4. **Find a free port** — scan ports 8080-8180 to find one that's not in use: + ```bash + for port in $(seq 8080 8180); do + if ! (echo >/dev/tcp/localhost/$port) 2>/dev/null; then + echo "Using port $port" + break + fi + done + ``` + +5. **Write config** — create `.claude/.state/javaducker.conf`: + ``` + JAVADUCKER_ROOT=/absolute/path/to/javaducker + JAVADUCKER_HTTP_PORT= + JAVADUCKER_DB=/absolute/path/to/project/.claude/.javaducker/javaducker.duckdb + JAVADUCKER_INTAKE=/absolute/path/to/project/.claude/.javaducker/intake + ``` + All paths must be absolute. + +6. **Register MCP server** — create or merge `.mcp.json` in the project root: + ```json + { + "mcpServers": { + "javaducker": { + "command": "jbang", + "args": ["JAVADUCKER_ROOT/JavaDuckerMcpServer.java"], + "env": { + "PROJECT_ROOT": "", + "HTTP_PORT": "" + } + } + } + } + ``` + Replace placeholders with actual absolute paths and port. + + **If `.mcp.json` already exists**, read it first and merge the `javaducker` key into the existing `mcpServers` object. Do not overwrite other MCP servers. + +7. **Start the server** — launch with project-local data paths: + ```bash + DB= HTTP_PORT= INTAKE_DIR= \ + nohup bash /run-server.sh >/dev/null 2>&1 & + ``` + Wait up to 10 seconds for it to become healthy (poll `/api/health`). The server auto-starts on future sessions via the memory-sync hook using `javaducker_start()`. + +8. **Index the project** — once the server is healthy, use `javaducker_index_directory` with the project root. Or via CLI: + ```bash + bash /run-client.sh --port upload-dir --root --ext .java,.xml,.md,.yml,.yaml,.json,.properties,.gradle,.kt,.py,.go,.rs,.ts,.js + ``` + +9. **Index past sessions** (optional) — ask the user if they want to index past Claude Code sessions. If yes, use `javaducker_index_sessions`. + +10. **Confirm setup** — print a short confirmation: + ``` + JavaDucker ready. Look for "JD" in the statusline. + Root: /path/to/javaducker + Port: + Database: .claude/.javaducker/javaducker.duckdb + Intake: .claude/.javaducker/intake/ + Index: started for current project + ``` + +## How it works for the user + +After setup, JavaDucker is invisible: +- **Statusline** shows `JD` when active, `JD(off)` if the server is down +- **Server auto-starts** on each session — finds a free port if the saved one is taken +- **Data stays local** — each project has its own database in `.claude/.javaducker/` +- **Edited files auto-index** via the post-edit hook +- **All drom-flow skills** automatically use JavaDucker for deeper search when available +- **No CLI commands needed** — everything happens through MCP tools and hooks + +To remove: use `/remove-javaducker` + +## Important notes + +- First MCP connection may take 10-20 seconds (jbang compiles the Java file on first run) +- `.mcp.json` and `.claude/.javaducker/` are gitignored (machine-specific) +- The config is machine-specific — each developer runs `/add-javaducker` once +- Multiple projects can run simultaneously — each gets its own port and database diff --git a/.claude/skills/architect/architect.md b/.claude/skills/architect/architect.md index 25a34a3..9ba44c9 100644 --- a/.claude/skills/architect/architect.md +++ b/.claude/skills/architect/architect.md @@ -11,7 +11,8 @@ You are a software architect. Your job is to design systems and make technology ## Responsibilities 1. **Analyze requirements** — understand what the system needs to do, now and in the near future -2. **Evaluate trade-offs** — compare approaches by complexity, performance, maintainability +2. **If JavaDucker is available** — use `javaducker_search` to find existing implementations of similar patterns. Use `javaducker_map` for project structure orientation. Use `javaducker_dependencies` to understand the current dependency graph. Use `javaducker_concepts` for the concept map across the corpus. Use `javaducker_find_by_type` with `ADR` or `DESIGN_DOC` to find existing architecture decisions. Use `javaducker_recent_decisions` to check for decisions made in past sessions. Use `javaducker_session_context` for historical discussion on the topic. +3. **Evaluate trade-offs** — compare approaches by complexity, performance, maintainability 3. **Design interfaces** — define how components talk to each other 4. **Document decisions** — write ADRs in `context/DECISIONS.md` 5. **Consider constraints** — team size, timeline, existing tech stack @@ -44,6 +45,15 @@ You are a software architect. Your job is to design systems and make technology **Consequences:** [What follows from this] ``` +## Knowledge curation (when JavaDucker is available) + +After completing your design work, you are responsible for curating the knowledge you produced: + +1. **Record the decision** — `javaducker_extract_decisions` with the session ID and each decision you made (what, why, alternatives rejected). Tag with the domain area. These become searchable via `javaducker_recent_decisions` in future sessions. +2. **Check for invalidated decisions** — `javaducker_find_points` with `DECISION` type. Read each prior decision that overlaps with your new design. If your new decision supersedes an old one, use `javaducker_set_freshness` to mark the old artifact as `superseded` (with `superseded_by` pointing to the new one). Then `javaducker_synthesize` the old artifact — write a summary that says what it decided, why it's no longer valid, and what replaced it. +3. **Link concepts** — `javaducker_link_concepts` to connect your new design's concepts to related artifacts. This builds the concept graph that `javaducker_concepts` and `javaducker_concept_timeline` expose. +4. **Classify your output** — if the ADR or design doc gets indexed, `javaducker_classify` it as `ADR` or `DESIGN_DOC` so future architects can find it with `javaducker_find_by_type`. + ## Principles - Design for what you know, not what you imagine diff --git a/.claude/skills/debugger/debugger.md b/.claude/skills/debugger/debugger.md index 7908754..2113b7a 100644 --- a/.claude/skills/debugger/debugger.md +++ b/.claude/skills/debugger/debugger.md @@ -21,7 +21,8 @@ You are a debugger. Your job is to systematically find and fix the root cause of 1. Gather symptoms: error messages, stack traces, logs, steps to reproduce 2. Read the code path involved — trace from entry point to failure -3. Form 2-3 hypotheses ranked by likelihood +3. **If JavaDucker is available** — use `javaducker_search` for error messages and exception names. Use `javaducker_explain` on suspect files for dependencies and co-change history. Use `javaducker_dependents` to find callers that might be passing bad input. Use `javaducker_related` to find co-changed files that may also be involved. Use `javaducker_blame` on the suspect region to see who changed it and when. Use `javaducker_search_sessions` to check if this bug was discussed in prior sessions. +4. Form 2-3 hypotheses ranked by likelihood 4. Test each hypothesis with targeted investigation (grep, read, run) 5. Once root cause is found, implement the minimal fix 6. Add a test that fails without the fix and passes with it @@ -45,6 +46,15 @@ You are a debugger. Your job is to systematically find and fix the root cause of - [Test that was added or run] ``` +## Knowledge curation (when JavaDucker is available) + +After resolving a bug, record what you learned: + +1. **Record the root cause as a point** — `javaducker_extract_points` on the affected file's artifact with type `INSIGHT` describing the root cause and why it was non-obvious. Future debuggers searching for similar symptoms will find this. +2. **Record the decision** — `javaducker_extract_decisions` with the fix decision (what was changed, why this approach, what alternatives were considered). +3. **Tag the file** — `javaducker_tag` the affected artifact with bug-related tags (e.g., the error type, the component area) so `javaducker_find_by_tag` can surface it later. +4. **Check for stale workarounds** — if your fix resolves a root cause that had prior workarounds documented, `javaducker_set_freshness` → `superseded` on those workaround artifacts. Synthesize them with a note that the root cause is now fixed. + ## Principles - Never guess — verify each assumption by reading code or running tests diff --git a/.claude/skills/implementer/implementer.md b/.claude/skills/implementer/implementer.md index 1febabb..47f5970 100644 --- a/.claude/skills/implementer/implementer.md +++ b/.claude/skills/implementer/implementer.md @@ -20,10 +20,19 @@ You are a code implementer. Your job is to write clean, correct, production-read 1. Read the relevant files to understand existing patterns 2. Check `context/CONVENTIONS.md` for naming, imports, error handling patterns -3. Implement the change with minimal diff +3. **If JavaDucker is available** — use `javaducker_search` (semantic mode) to find related patterns, similar implementations, and conventions that Grep alone might miss. Use `javaducker_explain` on key files for full context. For Java/Reladomo projects, use `javaducker_reladomo_relationships` to understand object models, `javaducker_reladomo_graph` to visualize relationship chains, `javaducker_reladomo_finders` for query patterns, and `javaducker_reladomo_deepfetch` for eager loading profiles. Use `javaducker_related` to find co-changed files that should be updated together. +4. Implement the change with minimal diff 4. Run tests to verify nothing broke 5. Self-review: is this the simplest correct solution? +## Knowledge curation (when JavaDucker is available) + +After implementing a change, update the knowledge base: + +1. **Tag new patterns** — if you introduced a new pattern or convention, `javaducker_tag` the file with descriptive tags so future implementers can find it via `javaducker_find_by_tag`. +2. **Record non-obvious decisions** — if you made a judgment call (chose approach A over B), `javaducker_extract_decisions` to record it with context. Future sessions will surface it via `javaducker_recent_decisions`. +3. **Mark superseded code** — if your change replaces or deprecates an older implementation, `javaducker_set_freshness` → `superseded` on the old artifact, then `javaducker_synthesize` with a summary of what it did and why it was replaced. + ## Principles - Prefer editing existing files over creating new ones diff --git a/.claude/skills/orchestrator/orchestrator.md b/.claude/skills/orchestrator/orchestrator.md index 18720c5..f186a67 100644 --- a/.claude/skills/orchestrator/orchestrator.md +++ b/.claude/skills/orchestrator/orchestrator.md @@ -20,7 +20,8 @@ You are a pipeline orchestrator. Your job is to run closed-loop workflows that c ## Process 1. Read the workflow file (e.g., `workflows/closed-loop.md`) -2. Run the check command or orchestration script +2. **If JavaDucker is available** — use `javaducker_index_health` to check overall index freshness. Use `javaducker_stale` with `git_diff_ref: "HEAD~1"` to find stale files. Re-index stale files before starting. After each iteration, use `javaducker_extract_points` to record key findings (RISK, ACTION, INSIGHT) from the iteration. Use `javaducker_concept_health` to monitor concept trends across iterations. Use `javaducker_synthesize` on completed/obsolete artifacts to keep the index compact. +3. Run the check command or orchestration script 3. Parse the JSON report 4. Group issues into independent categories 5. For each category, spawn an Agent with `run_in_background: true`: @@ -74,6 +75,16 @@ After each iteration, log: - Next: [continue/revert/done] ``` +## Post-loop: Knowledge curation (when JavaDucker is available) + +After the loop exits, you are responsible for curating what was learned: + +1. **Record what worked and what didn't** — `javaducker_extract_decisions` with each key decision: what fix strategies worked, what regressed and why, the final approach chosen. Tag with the domain area. This is critical — future orchestrators will find these via `javaducker_recent_decisions` and avoid repeating failed approaches. +2. **Extract insights** — for each file that was fixed, `javaducker_extract_points` with type `INSIGHT` recording what the root issue was. Type `RISK` for any fragile areas you noticed. Type `ACTION` for any follow-up work needed. +3. **Enrich new artifacts** — `javaducker_enrich_queue` for files edited during the loop. Read each, then `javaducker_classify`, `javaducker_extract_points`, `javaducker_tag`, `javaducker_mark_enriched`. Don't classify blindly — read the content first. +4. **Supersede obsolete intermediate states** — iterations that were reverted produced artifacts that are now noise. `javaducker_set_freshness` → `superseded` on those. `javaducker_synthesize` with a note: "Reverted in iteration N because [reason]. Replaced by [final approach]." +5. **Check for invalidated decisions** — `javaducker_find_points` with `DECISION` type. If the loop's outcome contradicts a prior recorded decision, supersede it and record the new decision. + ## Principles - Always parallel — never fix issues sequentially when they're independent diff --git a/.claude/skills/planner/planner.md b/.claude/skills/planner/planner.md index 18766aa..bcaf075 100644 --- a/.claude/skills/planner/planner.md +++ b/.claude/skills/planner/planner.md @@ -11,7 +11,8 @@ You are a task planner. Your job is to break down the user's request into a chap ## Responsibilities 1. **Decompose** the task into chapters — each chapter is a logical phase of work -2. **Break chapters into steps** — discrete, independently completable items within each chapter +2. **If JavaDucker is available** — use `javaducker_search` to identify all files that will be affected by the task. Use `javaducker_dependents` on key files to discover downstream impact. Use `javaducker_session_context` to find prior conversations and decisions related to this area. Use `javaducker_recent_decisions` to check for relevant past decisions. This produces more accurate chapter breakdowns and catches files that Grep-based search might miss. +3. **Break chapters into steps** — discrete, independently completable items within each chapter 3. **Identify dependencies** — which chapters/steps must complete before others can start 4. **Maximize parallelism** — default to parallel; only serialize when there's a true data dependency 5. **Identify loops** — flag steps that need repeat-until-pass iteration @@ -110,6 +111,14 @@ At session start, the memory-sync hook scans `drom-plans/` for plans with `statu 4. **Resume from there** — do not redo completed steps 5. **Continue tracking** progress as normal +## Knowledge curation (when JavaDucker is available) + +When creating or completing a plan, curate the knowledge: + +1. **Before planning** — `javaducker_recent_decisions` and `javaducker_find_points` with `CONSTRAINT` and `RISK` types. Read the results. If any prior constraint or risk applies to your plan, incorporate it. If a prior decision has been invalidated by the current task, note it. +2. **After plan completion** — `javaducker_extract_decisions` to record key decisions made during planning (scope choices, trade-offs, rejected approaches). Tag them so future planners find them. +3. **Supersede old plans** — if this plan replaces or invalidates a prior plan that was indexed, `javaducker_set_freshness` → `superseded` on the old plan artifact, then `javaducker_synthesize` it with a summary of what it planned, why it's superseded, and what replaced it. + ## Principles - **Parallel by default** — every step is parallel unless proven otherwise diff --git a/.claude/skills/refactorer/refactorer.md b/.claude/skills/refactorer/refactorer.md index dba7563..3fa9fc1 100644 --- a/.claude/skills/refactorer/refactorer.md +++ b/.claude/skills/refactorer/refactorer.md @@ -19,8 +19,9 @@ You are a refactorer. Your job is to improve code structure without changing beh ## Process 1. Read the code to understand current structure -2. Run existing tests to establish a passing baseline -3. Identify specific refactoring targets with clear justification +2. **If JavaDucker is available** — use `javaducker_dependents` on files you plan to refactor to discover all callers and importers. Use `javaducker_related` to find co-changed files. For Reladomo projects, use `javaducker_reladomo_object_files` to find all files for an object (generated, hand-written, xml, config). This ensures no reference is missed during renaming or restructuring. +3. Run existing tests to establish a passing baseline +4. Identify specific refactoring targets with clear justification 4. For each change: a. Make one small structural change b. Run tests — must still pass @@ -28,6 +29,14 @@ You are a refactorer. Your job is to improve code structure without changing beh 5. Remove any dead code left behind 6. Final test run to confirm everything passes +## Knowledge curation (when JavaDucker is available) + +After completing a refactor, clean up the knowledge base: + +1. **Synthesize removed/renamed files** — if files were deleted or renamed, `javaducker_set_freshness` → `superseded` on the old artifact, then `javaducker_synthesize` with a summary noting the rename/removal and where the functionality moved to. +2. **Update concept links** — `javaducker_link_concepts` to connect the new file structure to existing concepts. This keeps the concept graph accurate after restructuring. +3. **Record the refactor decision** — `javaducker_extract_decisions` with why the refactor was done and the approach taken. This prevents future refactors from undoing your work. + ## Principles - Behavior must not change — if tests break, the refactor is wrong diff --git a/.claude/skills/remove-javaducker/remove-javaducker.md b/.claude/skills/remove-javaducker/remove-javaducker.md new file mode 100644 index 0000000..52e8312 --- /dev/null +++ b/.claude/skills/remove-javaducker/remove-javaducker.md @@ -0,0 +1,39 @@ +--- +name: remove-javaducker +description: Remove JavaDucker companion tool configuration from this project +user-invocable: true +--- + +# Remove JavaDucker + +You are removing the JavaDucker companion tool integration from this project. + +## Removal Process + +1. **Stop watch** — if the `javaducker_watch` MCP tool is available, call it with `action: "stop"` to stop any active file watchers. Ignore errors if the server is not running. + +2. **Remove config** — delete `.claude/.state/javaducker.conf` if it exists. + +3. **Remove project-local data** — delete `.claude/.javaducker/` if it exists. This directory contains the per-project DuckDB database and intake folder created by the auto-start lifecycle. + +4. **Clean MCP registration** — read `.mcp.json` in the project root: + - If it contains only the `javaducker` entry, delete the entire `.mcp.json` file + - If it contains other MCP servers too, remove only the `javaducker` key from `mcpServers` and write the file back + +5. **Confirm removal** — print what was removed: + ``` + JavaDucker removed: + Deleted: .claude/.state/javaducker.conf + Deleted: .claude/.javaducker/ (project-local data) + Cleaned: .mcp.json (javaducker entry removed) + ``` + Only list items that actually existed and were removed. + +## What is preserved + +- The JavaDucker installation itself (at its own root directory) is untouched +- drom-flow hooks and skills gracefully degrade — they check for the config file and skip JavaDucker features when it's absent + +## To re-add later + +Run `/add-javaducker` with the JavaDucker root path. diff --git a/.claude/skills/reviewer/reviewer.md b/.claude/skills/reviewer/reviewer.md index 610e4a1..6235f46 100644 --- a/.claude/skills/reviewer/reviewer.md +++ b/.claude/skills/reviewer/reviewer.md @@ -11,7 +11,8 @@ You are a code reviewer. Your job is to evaluate code changes for correctness, s ## Responsibilities 1. **Read the full diff** — understand the change holistically before commenting -2. **Check each dimension**: correctness, security, performance, readability, maintainability +2. **If JavaDucker is available** — use `javaducker_dependents` on changed files to assess impact. Check if downstream consumers need updates too. Use `javaducker_find_points` with `DECISION` or `RISK` type to check for known risks in the affected area. Use `javaducker_related` to find co-changed files that might also need review. Use `javaducker_latest` on the topic to find the most current documentation. +3. **Check each dimension**: correctness, security, performance, readability, maintainability 3. **Rate issues by severity**: Blocker, Major, Minor, Nit 4. **Note positives** — acknowledge good patterns and decisions 5. **Give a verdict**: Approve, Approve with comments, Request changes @@ -48,6 +49,14 @@ Summary of review. - **Minor**: Improvement opportunity. Fix if convenient. - **Nit**: Style or preference. Optional. +## Knowledge curation (when JavaDucker is available) + +During review, actively check and update the knowledge base: + +1. **Check for contradicted decisions** — `javaducker_find_points` with `DECISION` type for the affected area. If the change contradicts a prior recorded decision, flag it as a Blocker and ask whether the old decision should be superseded. +2. **Flag new risks** — if you identify a risk during review, `javaducker_extract_points` with type `RISK` on the artifact. This makes the risk discoverable by future reviewers and planners. +3. **Supersede stale docs** — if the change makes existing documentation or design docs inaccurate, `javaducker_set_freshness` → `stale` on those artifacts. Don't synthesize yet — let the author update them first. + ## Principles - Be specific — reference exact file and line diff --git a/JavaDuckerMcpServer.java b/JavaDuckerMcpServer.java deleted file mode 100644 index 763fd50..0000000 --- a/JavaDuckerMcpServer.java +++ /dev/null @@ -1,751 +0,0 @@ -///usr/bin/env jbang "$0" "$@" ; exit $? -//JAVA 21 -//DEPS io.modelcontextprotocol.sdk:mcp:0.8.1 -//DEPS org.slf4j:slf4j-nop:2.0.16 - -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import io.modelcontextprotocol.spec.McpSchema; -import io.modelcontextprotocol.server.McpServer; -import io.modelcontextprotocol.server.transport.StdioServerTransportProvider; - -import java.io.ByteArrayOutputStream; -import java.net.Socket; -import java.net.URI; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.*; -import java.util.stream.Stream; - -public class JavaDuckerMcpServer { - - static final String HOST = System.getenv().getOrDefault("JAVADUCKER_HOST", "localhost"); - static final int PORT = Integer.parseInt(System.getenv().getOrDefault("HTTP_PORT", "8080")); - static final String PROJECT_ROOT = System.getenv().getOrDefault("PROJECT_ROOT", "."); - static final String BASE_URL = "http://" + HOST + ":" + PORT + "/api"; - static final ObjectMapper MAPPER = new ObjectMapper(); - static final HttpClient HTTP = HttpClient.newHttpClient(); - static final boolean STALENESS_CHECK_ENABLED = - !"false".equalsIgnoreCase(System.getenv("JAVADUCKER_STALENESS_CHECK")); - - public static void main(String[] args) throws Exception { - ensureServerRunning(); - - McpServer.sync(new StdioServerTransportProvider(MAPPER)) - .serverInfo("javaducker", "1.0.0") - .capabilities(McpSchema.ServerCapabilities.builder().tools(true).build()) - .tool( - tool("javaducker_health", - "Check if the JavaDucker server is running. Returns status and version.", - "{}"), - (ex, a) -> call(JavaDuckerMcpServer::health)) - .tool(tool("javaducker_index_file", - "Upload and index a single file. Returns artifact_id. Async — use javaducker_wait_for_indexed to confirm.", - schema(props("file_path", str("Absolute path to the file to index")), "file_path")), - (ex, a) -> call(() -> indexFile((String) a.get("file_path")))) - .tool(tool("javaducker_index_directory", - "Recursively index all source files in a directory. Async — use javaducker_stats to monitor.", - schema(props("directory", str("Absolute path to the root directory to index"), - "extensions", str("Comma-separated extensions, e.g. .java,.xml,.md (optional)")), "directory")), - (ex, a) -> call(() -> indexDirectory((String) a.get("directory"), (String) a.getOrDefault("extensions", "")))) - .tool( - tool("javaducker_search", - "Search the indexed codebase. Modes: " + - "exact=literal substring (best for @Annotations, class names, constants), " + - "semantic=concept/intent matching, " + - "hybrid=weighted combination (default, best general use). " + - "Returns ranked results with file, score, chunk index, and text preview.", - schema(props( - "phrase", str("Search query or phrase"), - "mode", str("exact, semantic, or hybrid (default)"), - "max_results", intParam("Max results to return (default 20)")), - "phrase")), - (ex, a) -> call(() -> { - Map result = search( - (String) a.get("phrase"), - (String) a.getOrDefault("mode", "hybrid"), - a.containsKey("max_results") ? ((Number) a.get("max_results")).intValue() : 20); - try { - if (STALENESS_CHECK_ENABLED && result.containsKey("staleness_warning")) { - result.put("_footer", "\n⚠️ " + result.get("staleness_warning") - + " Use javaducker_index_file to refresh."); - } - } catch (Exception ignored) { } - return result; - })) - .tool( - tool("javaducker_get_file_text", - "Retrieve the full extracted text of an indexed file by artifact_id. " + - "Use after a search to read the complete file content rather than just a chunk preview.", - schema(props( - "artifact_id", str("Artifact ID from search or index results")), - "artifact_id")), - (ex, a) -> call(() -> getFileText((String) a.get("artifact_id")))) - .tool( - tool("javaducker_get_artifact_status", - "Check the ingestion status of a specific artifact. " + - "Lifecycle: RECEIVED→STORED_IN_INTAKE→PARSING→CHUNKED→EMBEDDED→INDEXED (or FAILED). " + - "Returns status, timestamps, and any error message.", - schema(props( - "artifact_id", str("Artifact ID to check")), - "artifact_id")), - (ex, a) -> call(() -> getArtifactStatus((String) a.get("artifact_id")))) - .tool( - tool("javaducker_wait_for_indexed", - "Block and poll until an artifact reaches INDEXED or FAILED status. " + - "Use after javaducker_index_file to confirm a file is searchable before querying.", - schema(props( - "artifact_id", str("Artifact ID to poll"), - "timeout_seconds", intParam("Max seconds to wait (default 120)")), - "artifact_id")), - (ex, a) -> call(() -> waitForIndexed( - (String) a.get("artifact_id"), - a.containsKey("timeout_seconds") ? ((Number) a.get("timeout_seconds")).intValue() : 120))) - .tool( - tool("javaducker_stats", - "Return aggregate indexing statistics: total artifacts, how many are indexed vs " + - "pending vs failed, total chunks, and total bytes. Use after javaducker_index_directory " + - "to monitor bulk ingestion progress.", - "{}"), - (ex, a) -> call(JavaDuckerMcpServer::stats)) - .tool( - tool("javaducker_summarize", - "Get a structural summary of an indexed file: class names, method names, imports, " + - "line count. One-call overview without reading the full text.", - schema(props( - "artifact_id", str("Artifact ID to summarize")), - "artifact_id")), - (ex, a) -> call(() -> { - String artifactId = (String) a.get("artifact_id"); - Map summary = summarize(artifactId); - if (STALENESS_CHECK_ENABLED) { - try { - Map status = getArtifactStatus(artifactId); - String path = (String) status.get("original_client_path"); - if (path != null && !path.isBlank()) { - Map staleness = httpPost("/stale", Map.of("file_paths", List.of(path))); - List staleList = (List) staleness.get("stale"); - if (staleList != null && !staleList.isEmpty()) { - summary.put("_warning", "⚠️ This file has changed since indexing — summary may be outdated."); - } - } - } catch (Exception ignored) { } - } - return summary; - })) - .tool( - tool("javaducker_map", - "Get a project map showing directory structure, file counts, largest files, and " + - "recently indexed files. Use for codebase orientation.", - "{}"), - (ex, a) -> call(JavaDuckerMcpServer::projectMap)) - .tool( - tool("javaducker_stale", - "Check which indexed files are stale (modified on disk since last indexing). " + - "Accepts file_paths (list of absolute paths) or git_diff_ref (e.g. HEAD~3) to auto-detect changed files.", - schema(props( - "file_paths", str("JSON array of absolute file paths to check (optional if git_diff_ref given)"), - "git_diff_ref", str("Git ref for diff, e.g. HEAD~3 or main (optional if file_paths given)")))), - (ex, a) -> call(() -> checkStale( - (String) a.getOrDefault("file_paths", ""), - (String) a.getOrDefault("git_diff_ref", "")))) - .tool( - tool("javaducker_dependencies", - "Get the import/dependency list for an indexed file. Shows what this file imports " + - "and which indexed artifacts those imports resolve to.", - schema(props( - "artifact_id", str("Artifact ID to get dependencies for")), - "artifact_id")), - (ex, a) -> call(() -> dependencies((String) a.get("artifact_id")))) - .tool( - tool("javaducker_dependents", - "Find which indexed files import/depend on this file. Useful for impact analysis.", - schema(props( - "artifact_id", str("Artifact ID to find dependents of")), - "artifact_id")), - (ex, a) -> call(() -> dependents((String) a.get("artifact_id")))) - .tool( - tool("javaducker_watch", - "Start or stop auto-indexing a directory. When watching, file changes are " + - "automatically detected and re-indexed. Use action=start with a directory, or action=stop.", - schema(props( - "action", str("start or stop"), - "directory", str("Absolute path to watch (required for start)"), - "extensions", str("Comma-separated extensions, e.g. .java,.xml,.md (optional)")), - "action")), - (ex, a) -> call(() -> watch( - (String) a.get("action"), - (String) a.getOrDefault("directory", ""), - (String) a.getOrDefault("extensions", "")))) - // ── Explain tool ───────────────────────────────────────────── - .tool(tool("javaducker_explain", - "Get everything JavaDucker knows about a file: summary, dependencies, dependents, tags, " + - "classification, related plans, blame highlights, and co-change partners. One call for full context.", - schema(props("file_path", str("Absolute path to the file to explain")), "file_path")), - (ex, a) -> call(() -> httpPost("/explain", Map.of("filePath", a.get("file_path"))))) - // ── Git Blame tool ─────────────────────────────────────────── - .tool(tool("javaducker_blame", - "Show who last changed each line of a file, with commit info. Groups consecutive lines by same commit. Optionally narrow to a line range.", - schema(props( - "file_path", str("Absolute path to the file"), - "start_line", intParam("Start line number (optional)"), - "end_line", intParam("End line number (optional)")), - "file_path")), - (ex, a) -> call(() -> { - Map body = new LinkedHashMap<>(); - body.put("filePath", a.get("file_path")); - if (a.containsKey("start_line")) body.put("startLine", ((Number) a.get("start_line")).intValue()); - if (a.containsKey("end_line")) body.put("endLine", ((Number) a.get("end_line")).intValue()); - return httpPost("/blame", body); - })) - // ── Co-Change / Related Files tool ───────────────────────── - .tool(tool("javaducker_related", - "Find files commonly edited together with this file, based on git co-change history. " + - "Helps identify related files you might need to update.", - schema(props( - "file_path", str("Absolute path to the file"), - "max_results", intParam("Max results (default 10)")), - "file_path")), - (ex, a) -> call(() -> httpPost("/related", Map.of( - "filePath", a.get("file_path"), - "maxResults", ((Number) a.getOrDefault("max_results", 10)).intValue())))) - // ── Content Intelligence: write tools ──────────────────────── - .tool(tool("javaducker_classify", - "Classify an artifact by doc type (ADR, DESIGN_DOC, PLAN, MEETING_NOTES, THREAD, SCRATCH, CODE, REFERENCE, TICKET).", - schema(props("artifact_id", str("Artifact ID"), "doc_type", str("Document type"), - "confidence", intParam("Confidence 0-1 (default 1)"), "method", str("Classification method (default llm)")), - "artifact_id", "doc_type")), - (ex, a) -> call(() -> httpPost("/classify", Map.of( - "artifactId", a.get("artifact_id"), "docType", a.get("doc_type"), - "confidence", a.getOrDefault("confidence", 1.0), "method", a.getOrDefault("method", "llm"))))) - .tool(tool("javaducker_tag", - "Add tags to an artifact. Replaces existing tags.", - schema(props("artifact_id", str("Artifact ID"), "tags", str("JSON array of {tag, tag_type, source} objects")), - "artifact_id", "tags")), - (ex, a) -> call(() -> { - List> tags = MAPPER.readValue((String) a.get("tags"), new TypeReference<>() {}); - return httpPost("/tag", Map.of("artifactId", a.get("artifact_id"), "tags", tags)); - })) - .tool(tool("javaducker_extract_points", - "Write salient points for an artifact: DECISION, IDEA, QUESTION, ACTION, RISK, INSIGHT, CONSTRAINT, STATUS.", - schema(props("artifact_id", str("Artifact ID"), "points", str("JSON array of {point_type, point_text} objects")), - "artifact_id", "points")), - (ex, a) -> call(() -> { - List> points = MAPPER.readValue((String) a.get("points"), new TypeReference<>() {}); - return httpPost("/salient-points", Map.of("artifactId", a.get("artifact_id"), "points", points)); - })) - .tool(tool("javaducker_set_freshness", - "Mark an artifact as current, stale, or superseded.", - schema(props("artifact_id", str("Artifact ID"), "freshness", str("current, stale, or superseded"), - "superseded_by", str("Artifact ID that supersedes this one (optional)")), - "artifact_id", "freshness")), - (ex, a) -> call(() -> httpPost("/freshness", Map.of( - "artifactId", a.get("artifact_id"), "freshness", a.get("freshness"), - "supersededBy", a.getOrDefault("superseded_by", ""))))) - .tool(tool("javaducker_synthesize", - "Write a synthesis record and prune full text/embeddings. Only works on stale/superseded artifacts.", - schema(props("artifact_id", str("Artifact ID"), "summary_text", str("Compact summary"), - "tags", str("Comma-separated tags"), "key_points", str("Key points"), - "outcome", str("Outcome/resolution"), "original_file_path", str("Path to original file on disk")), - "artifact_id", "summary_text")), - (ex, a) -> call(() -> httpPost("/synthesize", Map.of( - "artifactId", a.get("artifact_id"), "summaryText", a.get("summary_text"), - "tags", a.getOrDefault("tags", ""), "keyPoints", a.getOrDefault("key_points", ""), - "outcome", a.getOrDefault("outcome", ""), "originalFilePath", a.getOrDefault("original_file_path", ""))))) - .tool(tool("javaducker_link_concepts", - "Create cross-document concept links.", - schema(props("links", str("JSON array of {concept, artifact_a, artifact_b, strength} objects")), - "links")), - (ex, a) -> call(() -> { - List> links = MAPPER.readValue((String) a.get("links"), new TypeReference<>() {}); - return httpPost("/link-concepts", Map.of("links", links)); - })) - .tool(tool("javaducker_enrich_queue", - "List artifacts queued for enrichment (INDEXED but not yet ENRICHED).", - schema(props("limit", intParam("Max results (default 50)")))), - (ex, a) -> call(() -> httpGet("/enrich-queue?limit=" + ((Number) a.getOrDefault("limit", 50)).intValue()))) - .tool(tool("javaducker_mark_enriched", - "Mark an artifact as ENRICHED after post-processing is complete.", - schema(props("artifact_id", str("Artifact ID")), "artifact_id")), - (ex, a) -> call(() -> httpPost("/mark-enriched", Map.of("artifactId", a.get("artifact_id"))))) - // ── Content Intelligence: read tools ───────────────────────── - .tool(tool("javaducker_latest", - "Get the most recent, non-superseded artifact on a topic — the 'current truth'.", - schema(props("topic", str("Topic to search for")), "topic")), - (ex, a) -> call(() -> httpGet("/latest?topic=" + encode((String) a.get("topic"))))) - .tool(tool("javaducker_find_by_type", - "Find artifacts by document type (ADR, PLAN, DESIGN_DOC, etc.).", - schema(props("doc_type", str("Document type to search for")), "doc_type")), - (ex, a) -> call(() -> httpGet("/find-by-type?docType=" + encode((String) a.get("doc_type"))))) - .tool(tool("javaducker_find_by_tag", - "Find artifacts matching a tag.", - schema(props("tag", str("Tag to search for")), "tag")), - (ex, a) -> call(() -> httpGet("/find-by-tag?tag=" + encode((String) a.get("tag"))))) - .tool(tool("javaducker_find_points", - "Search salient points by type (DECISION, RISK, ACTION, etc.) across all documents.", - schema(props("point_type", str("Point type: DECISION, IDEA, QUESTION, ACTION, RISK, INSIGHT, CONSTRAINT, STATUS"), - "tag", str("Optional tag filter")), "point_type")), - (ex, a) -> call(() -> httpGet("/find-points?pointType=" + encode((String) a.get("point_type")) - + (a.containsKey("tag") ? "&tag=" + encode((String) a.get("tag")) : "")))) - .tool(tool("javaducker_concepts", - "List all concepts across the corpus with mention counts and doc counts.", - "{}"), - (ex, a) -> call(() -> httpGet("/concepts"))) - .tool(tool("javaducker_concept_timeline", - "Show the evolution of a concept: all related docs ordered by time with freshness status.", - schema(props("concept", str("Concept name")), "concept")), - (ex, a) -> call(() -> httpGet("/concept-timeline/" + encode((String) a.get("concept"))))) - .tool(tool("javaducker_stale_content", - "List artifacts flagged as stale or superseded, with what replaced them.", - "{}"), - (ex, a) -> call(() -> httpGet("/stale-content"))) - .tool(tool("javaducker_synthesis", - "Retrieve synthesis records for pruned artifacts (summary + file path). Provide artifact_id for a specific record or keyword to search.", - schema(props("artifact_id", str("Artifact ID (optional)"), - "keyword", str("Search keyword (optional)")))), - (ex, a) -> call(() -> { - if (a.containsKey("artifact_id")) return httpGet("/synthesis/" + a.get("artifact_id")); - if (a.containsKey("keyword")) return httpGet("/synthesis/search?keyword=" + encode((String) a.get("keyword"))); - return Map.of("error", "Provide artifact_id or keyword"); - })) - .tool(tool("javaducker_concept_health", - "Health report for all concepts: active/stale doc counts, trend (active/fading/cold).", - "{}"), - (ex, a) -> call(() -> httpGet("/concept-health"))) - // ── Index Health tool ──────────────────────────────────────── - .tool(tool("javaducker_index_health", - "Check index health: how many files are current vs stale. Returns actionable recommendation. " + - "No parameters required — scans all indexed files.", - "{}"), - (ex, a) -> call(JavaDuckerMcpServer::indexHealth)) - // ── Reladomo tools ─────────────────────────────────────────── - .tool(tool("javaducker_reladomo_relationships", - "Get a Reladomo object's attributes, relationships, and metadata in one call.", - schema(props("object_name", str("Reladomo object name, e.g. Order")), "object_name")), - (ex, a) -> call(() -> httpGet("/reladomo/relationships/" + a.get("object_name")))) - .tool(tool("javaducker_reladomo_graph", - "Traverse the Reladomo relationship graph from a root object up to N hops.", - schema(props("object_name", str("Root object name"), "depth", intParam("Max depth (default 3)")), "object_name")), - (ex, a) -> call(() -> httpGet("/reladomo/graph/" + a.get("object_name") + "?depth=" + ((Number) a.getOrDefault("depth", 3)).intValue()))) - .tool(tool("javaducker_reladomo_path", - "Find the shortest relationship path between two Reladomo objects.", - schema(props("from_object", str("Source object"), "to_object", str("Target object")), "from_object", "to_object")), - (ex, a) -> call(() -> httpGet("/reladomo/path?from=" + a.get("from_object") + "&to=" + a.get("to_object")))) - .tool(tool("javaducker_reladomo_schema", - "Derive SQL DDL from a Reladomo object: column types, PK, temporal columns, indices.", - schema(props("object_name", str("Reladomo object name")), "object_name")), - (ex, a) -> call(() -> httpGet("/reladomo/schema/" + a.get("object_name")))) - .tool(tool("javaducker_reladomo_object_files", - "List all files for a Reladomo object grouped by type (generated, hand-written, xml, config).", - schema(props("object_name", str("Reladomo object name")), "object_name")), - (ex, a) -> call(() -> httpGet("/reladomo/files/" + a.get("object_name")))) - .tool(tool("javaducker_reladomo_finders", - "Show Finder query patterns for a Reladomo object, ranked by frequency with locations.", - schema(props("object_name", str("Reladomo object name")), "object_name")), - (ex, a) -> call(() -> httpGet("/reladomo/finders/" + a.get("object_name")))) - .tool(tool("javaducker_reladomo_deepfetch", - "Show deep fetch profiles — which relationships are eagerly loaded together.", - schema(props("object_name", str("Reladomo object name")), "object_name")), - (ex, a) -> call(() -> httpGet("/reladomo/deepfetch/" + a.get("object_name")))) - .tool(tool("javaducker_reladomo_temporal", - "Temporal classification of all Reladomo objects with column info and query patterns.", "{}"), - (ex, a) -> call(() -> httpGet("/reladomo/temporal"))) - .tool(tool("javaducker_reladomo_config", - "Runtime config for a Reladomo object: DB connection, cache strategy. Omit name for full topology.", - schema(props("object_name", str("Object name (optional)")))), - (ex, a) -> call(() -> httpGet("/reladomo/config" + (a.containsKey("object_name") ? "?objectName=" + a.get("object_name") : "")))) - // ── Session Transcript tools ───────────────────────────────── - .tool(tool("javaducker_index_sessions", - "Index Claude Code session transcripts from a project directory. Makes past conversations searchable.", - schema(props( - "project_path", str("Path to project sessions directory (e.g. ~/.claude/projects//)"), - "max_sessions", intParam("Max sessions to index (default: all)"), - "incremental", str("true to skip unchanged files (default: false)")), - "project_path")), - (ex, a) -> call(() -> { - Map body = new LinkedHashMap<>(); - body.put("projectPath", a.get("project_path")); - if (a.containsKey("max_sessions")) body.put("maxSessions", ((Number) a.get("max_sessions")).intValue()); - if ("true".equals(a.get("incremental"))) body.put("incremental", true); - return httpPost("/index-sessions", body); - })) - .tool(tool("javaducker_search_sessions", - "Search past Claude Code conversations. Returns matching excerpts with session ID and role.", - schema(props( - "phrase", str("Search phrase"), - "max_results", intParam("Max results (default 20)")), - "phrase")), - (ex, a) -> call(() -> httpPost("/search-sessions", Map.of( - "phrase", a.get("phrase"), - "max_results", ((Number) a.getOrDefault("max_results", 20)).intValue())))) - .tool(tool("javaducker_session_context", - "Get full historical context for a topic: session excerpts + related artifacts. One call for complete history.", - schema(props("topic", str("Topic or query to search for")), "topic")), - (ex, a) -> call(() -> sessionContext((String) a.get("topic")))) - // ── Session Decision tools ────────────────────────────────── - .tool(tool("javaducker_extract_decisions", - "Store decisions extracted from a session. Claude calls this after reading a session to record key decisions.", - schema(props( - "session_id", str("Session ID"), - "decisions", str("JSON array of {text, context?, tags?} objects")), - "session_id", "decisions")), - (ex, a) -> call(() -> { - List> decisions = MAPPER.readValue((String) a.get("decisions"), new TypeReference<>() {}); - return httpPost("/extract-session-decisions", Map.of("sessionId", a.get("session_id"), "decisions", decisions)); - })) - .tool(tool("javaducker_recent_decisions", - "List recent decisions from past sessions, optionally filtered by tag.", - schema(props( - "max_sessions", intParam("Max sessions to look back (default 5)"), - "tag", str("Optional tag filter")))), - (ex, a) -> call(() -> httpGet("/session-decisions?maxSessions=" + - ((Number) a.getOrDefault("max_sessions", 5)).intValue() + - (a.containsKey("tag") ? "&tag=" + encode((String) a.get("tag")) : "")))) - .build(); - } - - // ── Tool implementations ────────────────────────────────────────────────── - - static Map sessionContext(String topic) throws Exception { - Map sessionHits = httpPost("/search-sessions", Map.of("phrase", topic, "max_results", 5)); - Map artifactHits = search(topic, "hybrid", 5); - Map result = new LinkedHashMap<>(); - result.put("topic", topic); - result.put("session_excerpts", sessionHits.get("results")); - result.put("related_artifacts", artifactHits.get("results")); - return result; - } - - static Map health() throws Exception { - return httpGet("/health"); - } - - static Map indexFile(String filePath) throws Exception { - return httpUpload(Path.of(filePath)); - } - - static final Set EXCLUDED_DIRS = Set.of( - "node_modules", ".git", ".svn", ".hg", - "target", "build", "dist", "out", ".gradle", - "__pycache__", ".pytest_cache", ".mypy_cache", - "vendor", ".idea", ".vscode", "coverage", - "temp", "test-corpus" - ); - - static Map indexDirectory(String directory, String extensions) throws Exception { - Path root = Path.of(directory); - Set exts = Set.of((extensions.isBlank() - ? ".java,.xml,.md,.yml,.json,.txt,.pdf,.docx,.pptx,.xlsx,.doc,.ppt,.xls,.odt,.odp,.ods,.html,.htm,.epub,.rtf,.eml" : extensions) - .toLowerCase().split(",")); - - List> uploaded = new ArrayList<>(); - int[] skipped = {0}, failed = {0}; - - try (Stream walk = Files.walk(root).filter(p -> - !p.equals(root) && - (Files.isRegularFile(p) || EXCLUDED_DIRS.stream().noneMatch( - ex -> p.getFileName() != null && p.getFileName().toString().equals(ex))))) { - for (Path file : walk.filter(Files::isRegularFile).toList()) { - boolean inExcluded = false; - for (Path part : file) { - if (EXCLUDED_DIRS.contains(part.toString())) { inExcluded = true; break; } - } - if (inExcluded) { skipped[0]++; continue; } - - String name = file.getFileName().toString().toLowerCase(); - String ext = name.contains(".") ? name.substring(name.lastIndexOf('.')) : ""; - if (!exts.contains(ext)) { skipped[0]++; continue; } - try { - Map r = httpUpload(file); - uploaded.add(Map.of("file", file.toString(), "artifact_id", (String) r.get("artifact_id"))); - } catch (Exception e) { failed[0]++; } - } - } - return Map.of( - "uploaded", uploaded, - "summary", Map.of("uploaded", uploaded.size(), "skipped", skipped[0], "failed", failed[0])); - } - - static Map search(String phrase, String mode, int maxResults) throws Exception { - return httpPost("/search", Map.of("phrase", phrase, "mode", mode, "max_results", maxResults)); - } - - static Map getFileText(String artifactId) throws Exception { - Map r = httpGet("/text/" + artifactId); - if (r == null) throw new RuntimeException("Artifact not found: " + artifactId); - return r; - } - - static Map getArtifactStatus(String artifactId) throws Exception { - Map r = httpGet("/status/" + artifactId); - if (r == null) throw new RuntimeException("Artifact not found: " + artifactId); - return r; - } - - static Map waitForIndexed(String artifactId, int timeoutSeconds) throws Exception { - long deadline = System.currentTimeMillis() + (long) timeoutSeconds * 1000; - long start = System.currentTimeMillis(); - while (System.currentTimeMillis() < deadline) { - Map r = getArtifactStatus(artifactId); - String status = (String) r.get("status"); - if ("INDEXED".equals(status) || "FAILED".equals(status)) { - return Map.of( - "artifact_id", artifactId, - "final_status", status, - "elapsed_seconds", (System.currentTimeMillis() - start) / 1000.0); - } - Thread.sleep(3000); - } - throw new RuntimeException( - "Artifact " + artifactId + " did not reach INDEXED within " + timeoutSeconds + "s"); - } - - static Map stats() throws Exception { - return httpGet("/stats"); - } - - static Map summarize(String artifactId) throws Exception { - Map r = httpGet("/summary/" + artifactId); - if (r == null) throw new RuntimeException("Artifact not found or no summary available: " + artifactId); - return r; - } - - static Map projectMap() throws Exception { - return httpGet("/map"); - } - - @SuppressWarnings("unchecked") - static Map checkStale(String filePathsJson, String gitDiffRef) throws Exception { - List paths = new ArrayList<>(); - - // If git_diff_ref is given, run git diff to get file paths - if (gitDiffRef != null && !gitDiffRef.isBlank()) { - ProcessBuilder pb = new ProcessBuilder("git", "diff", "--name-only", gitDiffRef); - pb.directory(Path.of(PROJECT_ROOT).toFile()); - pb.redirectErrorStream(true); - Process proc = pb.start(); - String output = new String(proc.getInputStream().readAllBytes()).trim(); - proc.waitFor(); - if (!output.isEmpty()) { - Path root = Path.of(PROJECT_ROOT).toAbsolutePath(); - for (String line : output.split("\n")) { - paths.add(root.resolve(line.trim()).toString()); - } - } - } - - // If file_paths is given, parse it - if (filePathsJson != null && !filePathsJson.isBlank()) { - try { - List parsed = MAPPER.readValue(filePathsJson, List.class); - paths.addAll(parsed); - } catch (Exception e) { - // Try as comma-separated - for (String p : filePathsJson.split(",")) { - if (!p.isBlank()) paths.add(p.trim()); - } - } - } - - if (paths.isEmpty()) { - throw new RuntimeException("Provide file_paths or git_diff_ref"); - } - - return httpPost("/stale", Map.of("file_paths", paths)); - } - - static Map watch(String action, String directory, String extensions) throws Exception { - if ("stop".equalsIgnoreCase(action)) { - return httpPost("/watch/stop", Map.of()); - } - if ("start".equalsIgnoreCase(action)) { - Map body = new LinkedHashMap<>(); - body.put("directory", directory); - if (extensions != null && !extensions.isBlank()) body.put("extensions", extensions); - return httpPost("/watch/start", body); - } - if ("status".equalsIgnoreCase(action)) { - return httpGet("/watch/status"); - } - throw new RuntimeException("Unknown action: " + action + ". Use start, stop, or status."); - } - - static Map dependencies(String artifactId) throws Exception { - Map r = httpGet("/dependencies/" + artifactId); - if (r == null) throw new RuntimeException("Artifact not found: " + artifactId); - return r; - } - - static Map dependents(String artifactId) throws Exception { - Map r = httpGet("/dependents/" + artifactId); - if (r == null) throw new RuntimeException("Artifact not found: " + artifactId); - return r; - } - - @SuppressWarnings("unchecked") - static Map indexHealth() throws Exception { - Map summary = httpGet("/stale/summary"); - int staleCount = ((Number) summary.getOrDefault("stale_count", 0)).intValue(); - double stalePercent = ((Number) summary.getOrDefault("stale_percentage", 0.0)).doubleValue(); - long total = ((Number) summary.getOrDefault("total_checked", 0)).longValue(); - - String recommendation; - if (staleCount == 0) { - recommendation = "All " + total + " indexed files are current. No action needed."; - } else if (stalePercent > 10) { - recommendation = "More than 10% of indexed files are stale (" + staleCount + "/" + total - + "). Consider running a full re-index with javaducker_index_directory."; - } else { - List> staleFiles = (List>) summary.get("stale"); - List paths = staleFiles != null - ? staleFiles.stream().limit(5) - .map(f -> (String) f.get("original_client_path")) - .filter(Objects::nonNull).toList() - : List.of(); - recommendation = staleCount + " file(s) are stale. Re-index them with javaducker_index_file: " + paths; - } - summary.put("recommendation", recommendation); - summary.put("health_status", stalePercent > 10 ? "degraded" : "healthy"); - return summary; - } - - // ── HTTP helpers ────────────────────────────────────────────────────────── - - static Map httpGet(String path) throws Exception { - var req = HttpRequest.newBuilder() - .uri(URI.create(BASE_URL + path)) - .GET().build(); - var resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString()); - if (resp.statusCode() == 404) return null; - if (resp.statusCode() >= 400) - throw new RuntimeException("HTTP " + resp.statusCode() + ": " + resp.body()); - return MAPPER.readValue(resp.body(), new TypeReference<>() {}); - } - - static Map httpPost(String path, Object body) throws Exception { - String json = MAPPER.writeValueAsString(body); - var req = HttpRequest.newBuilder() - .uri(URI.create(BASE_URL + path)) - .header("Content-Type", "application/json") - .POST(HttpRequest.BodyPublishers.ofString(json)) - .build(); - var resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString()); - if (resp.statusCode() >= 400) - throw new RuntimeException("HTTP " + resp.statusCode() + ": " + resp.body()); - return MAPPER.readValue(resp.body(), new TypeReference<>() {}); - } - - static Map httpUpload(Path path) throws Exception { - byte[] content = Files.readAllBytes(path); - String mediaType = Files.probeContentType(path); - if (mediaType == null) mediaType = "application/octet-stream"; - String fileName = path.getFileName().toString(); - String boundary = "----JavaDuckerBoundary" + System.currentTimeMillis(); - - var baos = new ByteArrayOutputStream(); - baos.write(("--" + boundary + "\r\n").getBytes()); - baos.write(("Content-Disposition: form-data; name=\"file\"; filename=\"" + fileName + "\"\r\n").getBytes()); - baos.write(("Content-Type: " + mediaType + "\r\n\r\n").getBytes()); - baos.write(content); - baos.write("\r\n".getBytes()); - baos.write(("--" + boundary + "\r\n").getBytes()); - baos.write("Content-Disposition: form-data; name=\"originalClientPath\"\r\n\r\n".getBytes()); - baos.write(path.toAbsolutePath().toString().getBytes()); - baos.write("\r\n".getBytes()); - baos.write(("--" + boundary + "--\r\n").getBytes()); - - var req = HttpRequest.newBuilder() - .uri(URI.create(BASE_URL + "/upload")) - .header("Content-Type", "multipart/form-data; boundary=" + boundary) - .POST(HttpRequest.BodyPublishers.ofByteArray(baos.toByteArray())) - .build(); - var resp = HTTP.send(req, HttpResponse.BodyHandlers.ofString()); - if (resp.statusCode() >= 400) - throw new RuntimeException("HTTP " + resp.statusCode() + ": " + resp.body()); - return MAPPER.readValue(resp.body(), new TypeReference<>() {}); - } - - // ── Server lifecycle ────────────────────────────────────────────────────── - - static final boolean WINDOWS = - System.getProperty("os.name", "").toLowerCase().contains("win"); - - static void ensureServerRunning() throws Exception { - if (isHealthy()) return; - System.err.println("[javaducker-mcp] Starting JavaDucker server..."); - Path script = Path.of(PROJECT_ROOT) - .resolve(WINDOWS ? "run-server.cmd" : "run-server.sh"); - ProcessBuilder pb = WINDOWS - ? new ProcessBuilder("cmd", "/c", script.toString()) - : new ProcessBuilder(script.toString()); - pb.redirectOutput(ProcessBuilder.Redirect.DISCARD) - .redirectError(ProcessBuilder.Redirect.DISCARD) - .start(); - long deadline = System.currentTimeMillis() + 60_000; - while (System.currentTimeMillis() < deadline) { - Thread.sleep(2000); - if (isHealthy()) { - System.err.println("[javaducker-mcp] Server ready."); - return; - } - } - throw new RuntimeException( - "JavaDucker server did not start within 60s. Build first: mvn package -DskipTests"); - } - - static boolean isHealthy() { - try (var s = new Socket(HOST, PORT)) { return true; } catch (Exception e) { return false; } - } - - // ── MCP helpers ─────────────────────────────────────────────────────────── - - static McpSchema.Tool tool(String name, String description, String schemaJson) { - return new McpSchema.Tool(name, description, schemaJson); - } - - static McpSchema.CallToolResult call(ThrowingSupplier fn) { - try { - String json = MAPPER.writeValueAsString(fn.get()); - return new McpSchema.CallToolResult(List.of(new McpSchema.TextContent(json)), false); - } catch (Exception e) { - return new McpSchema.CallToolResult(List.of(new McpSchema.TextContent("Error: " + e.getMessage())), true); - } - } - - @FunctionalInterface - interface ThrowingSupplier { Object get() throws Exception; } - - static Map str(String description) { - return Map.of("type", "string", "description", description); - } - - static Map intParam(String description) { - return Map.of("type", "integer", "description", description); - } - - @SuppressWarnings("unchecked") - static Map props(Object... pairs) { - Map m = new LinkedHashMap<>(); - for (int i = 0; i < pairs.length; i += 2) m.put((String) pairs[i], pairs[i + 1]); - return m; - } - - static String encode(String s) { - return URLEncoder.encode(s, StandardCharsets.UTF_8); - } - - static String schema(Map properties, String... required) { - try { - Map s = new LinkedHashMap<>(); - s.put("type", "object"); - s.put("properties", properties); - if (required.length > 0) s.put("required", List.of(required)); - return MAPPER.writeValueAsString(s); - } catch (Exception e) { throw new RuntimeException(e); } - } -} diff --git a/README.md b/README.md index e98bf27..4a4e361 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Code and content intelligence server for Claude Code. Indexes source files, docu - **Java 21** + **Spring Boot 3.2** - **REST API** (Spring MVC) — 59 endpoints - **DuckDB** file-based persistence (JDBC) — 22 tables -- **MCP Server** (JBang + MCP SDK) — 49 tools for Claude Code +- **MCP Server** (Spring AI) — 49 tools for Claude Code via stdio or SSE - **picocli** CLI client - **Apache PDFBox** for PDF extraction - **Apache POI** for Office documents (DOCX, XLSX, PPTX, DOC, XLS, PPT) @@ -168,13 +168,13 @@ Point the CLI at the correct port for your instance: ## MCP Server (Claude Code Integration) -JavaDucker ships a JBang-based MCP server (`JavaDuckerMcpServer.java`) that exposes 49 tools for Claude Code. +JavaDucker uses Spring AI's built-in MCP server with stdio transport. 49 tools are exposed as `@Tool`-annotated Spring beans. ### Setup -1. Start the JavaDucker server: +1. Build the project: ```bash - ./run-server.sh # or run-server.cmd on Windows + mvn package -DskipTests ``` 2. Register the MCP server in your Claude Code config (`.claude/settings.json` or `claude_desktop_config.json`): @@ -182,19 +182,14 @@ JavaDucker ships a JBang-based MCP server (`JavaDuckerMcpServer.java`) that expo { "mcpServers": { "javaducker": { - "command": "/path/to/code-helper/run-mcp.sh" + "command": "java", + "args": ["-jar", "/path/to/code-helper/target/javaducker-1.0.0.jar", "--spring.profiles.active=mcp"] } } } ``` -3. Environment variables (all optional): - ``` - JAVADUCKER_HOST=localhost (default: localhost) - HTTP_PORT=8080 (default: 8080) - PROJECT_ROOT=. (default: .) - JAVADUCKER_STALENESS_CHECK=true (default: true, set false to disable) - ``` + Or use the run script: `"command": "/path/to/code-helper/scripts/local/run-mcp.sh"` ### Multiple MCP Instances (per-project) @@ -333,14 +328,12 @@ javaducker_reladomo_schema object_name=OrderItem ## Architecture ``` -Claude Code ←─── MCP (stdio) ───→ JavaDuckerMcpServer.java (JBang) - │ - HTTP REST +Claude Code ←─── MCP (stdio) ───→ Spring Boot + Spring AI MCP │ - ▼ ┌──────────────────────┐ │ Spring Boot Server │ │ │ + │ MCP Tools (49) │─── @Tool beans │ RestController │─── 59 endpoints │ Services (14) │ │ Ingestion Pipeline │ diff --git a/VERSION b/VERSION index 0ea3a94..d15723f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.0 +0.3.2 diff --git a/drom-plans/lightrag-parity.md b/drom-plans/lightrag-parity.md new file mode 100644 index 0000000..dfcf67d --- /dev/null +++ b/drom-plans/lightrag-parity.md @@ -0,0 +1,432 @@ +--- +title: LightRAG Feature Parity — Knowledge Graph + Semantic Tags +status: completed +created: 2026-04-02 +updated: 2026-04-02 +current_chapter: 10 +--- + +# LightRAG Feature Parity — Knowledge Graph + Semantic Tags + +## Problem + +code-helper currently provides chunk-level vector search (TF-IDF + HNSW) and basic concept tracking, but this is **naive RAG** — it misses structural relationships between concepts across files. LightRAG (github.com/HKUDS/LightRAG) demonstrates that graph-augmented RAG dramatically improves retrieval by: + +1. **Building a knowledge graph** from extracted entities and relationships +2. **Multi-level retrieval** — entity-local, relationship-global, and hybrid modes +3. **Entity merging** — deduplicating and consolidating entity descriptions across documents +4. **Community detection** — grouping related entities for high-level summaries + +Additionally, Claude needs **functional semantic tags** (4-10 per file) that capture what a file *does* and *means* beyond its filename and imports — e.g., "error-handling", "authentication", "rate-limiting", "database-migration", "event-sourcing". + +## Gap Analysis + +| Capability | LightRAG | code-helper | Gap | +|---|---|---|---| +| Chunk-level vector search | Yes (naive mode) | Yes (exact/semantic/hybrid) | None | +| Entity extraction (LLM) | Yes, with gleaning | No (regex imports only) | **Full gap** | +| Relationship extraction | Yes, typed + described | concept_links (shallow) | **Major gap** | +| Knowledge graph storage | Yes (Neo4j, NetworkX, PG) | No dedicated graph tables | **Full gap** | +| Entity merging/dedup | Yes (LLM summarization) | No | **Full gap** | +| Local retrieval (entity-centric) | Yes | No | **Full gap** | +| Global retrieval (relationship) | Yes | No | **Full gap** | +| Hybrid graph+vector retrieval | Yes (mix mode) | No | **Full gap** | +| Community detection | Yes | No | **Full gap** | +| Source provenance per entity | Yes (source_ids) | No | **Full gap** | +| Semantic tags (4-10 per file) | No | artifact_tags (manual) | **Partial** | +| Incremental graph updates | Yes | N/A | **Full gap** | +| Citation in search results | Yes (include_references) | No | **Full gap** | + +## Architecture + +``` +Current: Claude → search → chunk embeddings → cosine similarity → results + +Target: Claude → search → ┬─ chunk embeddings (naive) + ├─ entity graph (local) ← entity embeddings + ├─ relationship graph (global) ← rel embeddings + └─ combined (hybrid/mix) + + Claude → tag_synthesis → index file → extract entities → build graph + → generate 4-10 semantic tags + → link entities via relationships +``` + +All LLM-powered extraction happens via Claude Code calling MCP tools — no embedded LLM. The pipeline is: +1. File indexed (existing) → chunks + embeddings created +2. Claude calls `javaducker_extract_entities` → entities + relationships extracted and stored +3. Claude calls `javaducker_synthesize_tags` → 4-10 semantic tags generated and stored +4. Graph is queryable immediately via new retrieval modes + +--- + +## Chapter 1: Secondary Semantic Tags Table & Synthesis +**Status:** completed +**Depends on:** none + +New table `artifact_semantic_tags` with richer schema than existing `artifact_tags`. Each file gets 4-10 tags capturing functional/semantic meaning. Tags are synthesized by Claude Code calling an MCP tool that stores the result. + +- [ ] Add `artifact_semantic_tags` table to `SchemaBootstrap.java`: + ```sql + CREATE TABLE IF NOT EXISTS artifact_semantic_tags ( + artifact_id VARCHAR NOT NULL, + tag VARCHAR NOT NULL, + category VARCHAR NOT NULL, -- functional, architectural, domain, pattern, concern + confidence FLOAT DEFAULT 1.0, + rationale VARCHAR, -- why this tag was assigned (for Claude to explain) + source VARCHAR DEFAULT 'llm', -- llm, manual, rule + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (artifact_id, tag) + ) + ``` +- [ ] Add index `idx_semantic_tags_tag` on `artifact_semantic_tags(tag)` +- [ ] Add index `idx_semantic_tags_category` on `artifact_semantic_tags(category)` +- [ ] Add `SemanticTagService.java` in `server/mcp/` (or appropriate service package): + - `writeTags(artifactId, List tags)` — DELETE existing + INSERT new (4-10 tags) + - `findByTag(tag)` — find all artifacts with a given semantic tag + - `findByCategory(category)` — find all artifacts in a tag category + - `searchByTags(List tags)` — find artifacts matching any/all of a tag set + - `getTagCloud()` — all tags with artifact counts, grouped by category + - `getSuggestedTags(artifactId)` — return existing tags for similar files (by embedding similarity) as suggestions +- [ ] Add `javaducker_synthesize_tags` MCP tool — accepts `artifactId` + `tags[]` (each with tag, category, confidence, rationale). Calls `SemanticTagService.writeTags()`. Returns stored tags +- [ ] Add `javaducker_search_by_tags` MCP tool — accepts `tags[]`, optional `matchMode` (any/all), optional `category` filter. Returns matching artifacts with scores +- [ ] Add `javaducker_tag_cloud` MCP tool — returns all semantic tags grouped by category with counts +- [ ] Add `javaducker_suggest_tags` MCP tool — given an `artifactId`, finds similar files and returns their tags as suggestions for the current file +- [ ] Add REST endpoints: `POST /api/semantic-tags`, `GET /api/semantic-tags/search`, `GET /api/semantic-tags/cloud`, `GET /api/semantic-tags/suggest/{id}` +- [ ] Write tests: `SemanticTagServiceTest.java` — write/read/search/cloud/suggest operations, verify 4-10 tag constraint enforcement +- [ ] Write tests: MCP tool integration tests for all 4 new tools + +**Tag categories and examples:** +- **functional**: error-handling, authentication, authorization, validation, caching, logging, retry-logic, rate-limiting, pagination, serialization +- **architectural**: controller, service, repository, middleware, event-handler, scheduler, interceptor, filter, adapter, facade +- **domain**: user-management, payment-processing, order-fulfillment, notification, reporting, onboarding, billing, inventory +- **pattern**: factory, builder, observer, strategy, decorator, singleton, template-method, chain-of-responsibility, event-sourcing, CQRS +- **concern**: performance-critical, security-sensitive, backwards-compatible, deprecated, experimental, tech-debt, high-complexity + +--- + +## Chapter 2: Knowledge Graph Schema & Storage +**Status:** completed +**Depends on:** none (parallel with Chapter 1) + +Create the entity-relationship graph tables in DuckDB. This is the foundation for graph-based retrieval. + +- [ ] Add `entities` table to `SchemaBootstrap.java`: + ```sql + CREATE TABLE IF NOT EXISTS entities ( + entity_id VARCHAR PRIMARY KEY, + entity_name VARCHAR NOT NULL, + entity_type VARCHAR NOT NULL, -- class, interface, method, function, module, concept, pattern, service, config, enum, annotation, exception, table, endpoint + description VARCHAR, + summary VARCHAR, -- LLM-merged summary when entity appears in multiple files + source_artifact_ids VARCHAR, -- JSON array of artifact_ids where entity was found + source_chunk_ids VARCHAR, -- JSON array of chunk_ids for provenance + mention_count INTEGER DEFAULT 1, + embedding DOUBLE[256], -- entity description embedding for local retrieval + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ``` +- [ ] Add `entity_relationships` table: + ```sql + CREATE TABLE IF NOT EXISTS entity_relationships ( + relationship_id VARCHAR PRIMARY KEY, + source_entity_id VARCHAR NOT NULL, + target_entity_id VARCHAR NOT NULL, + relationship_type VARCHAR NOT NULL, -- uses, extends, implements, calls, depends-on, configures, tests, creates, contains, references + description VARCHAR, + weight FLOAT DEFAULT 1.0, + source_artifact_ids VARCHAR, -- JSON array: which files contain this relationship + source_chunk_ids VARCHAR, + embedding DOUBLE[256], -- relationship description embedding for global retrieval + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + UNIQUE (source_entity_id, target_entity_id, relationship_type) + ) + ``` +- [ ] Add `entity_communities` table (for future community detection): + ```sql + CREATE TABLE IF NOT EXISTS entity_communities ( + community_id VARCHAR PRIMARY KEY, + community_name VARCHAR, + summary VARCHAR, -- LLM-generated community summary + entity_ids VARCHAR, -- JSON array of entity_ids in this community + level INTEGER DEFAULT 0, -- hierarchy level (0 = leaf community) + parent_community_id VARCHAR, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ``` +- [ ] Add indices: `idx_entities_name` on entities(entity_name), `idx_entities_type` on entities(entity_type), `idx_rel_source` on entity_relationships(source_entity_id), `idx_rel_target` on entity_relationships(target_entity_id), `idx_rel_type` on entity_relationships(relationship_type), `idx_community_level` on entity_communities(level) +- [ ] Write `SchemaBootstrapTest` additions — verify all 3 new tables created with correct columns, verify indices exist +- [ ] Add `KnowledgeGraphService.java`: + - `upsertEntity(entity)` — insert or merge entity (update description/summary, append source_ids, increment mention_count) + - `upsertRelationship(rel)` — insert or update relationship (merge descriptions, update weight) + - `getEntity(entityId)` — by ID + - `findEntitiesByName(name)` — fuzzy match on entity_name + - `findEntitiesByType(type)` — filter by entity_type + - `getRelationships(entityId)` — all relationships for an entity (both directions) + - `getNeighborhood(entityId, depth)` — BFS traversal N hops from entity + - `getPath(fromEntityId, toEntityId)` — shortest path between entities + - `getEntityCount()`, `getRelationshipCount()` — stats + - `deleteEntitiesForArtifact(artifactId)` — remove entities only sourced from this artifact +- [ ] Write tests: `KnowledgeGraphServiceTest.java` — upsert, merge, traversal, path, deletion + +--- + +## Chapter 3: Entity & Relationship Extraction (LLM-powered via MCP) +**Status:** completed +**Depends on:** Chapter 2 + +MCP tools that Claude Code calls to extract entities and relationships from indexed files. Claude reads the file text, identifies entities/relationships, and writes them via these tools. This is the equivalent of LightRAG's extraction pipeline but using Claude as the LLM. + +- [ ] Add `javaducker_extract_entities` MCP tool — accepts: + - `artifactId` — the file to extract from + - `entities[]` — each with: name, type, description + - `relationships[]` — each with: sourceName, targetName, type, description + - Implementation: resolve entity names to IDs (create if new, merge if existing), upsert all entities and relationships, compute embeddings for descriptions, update source_artifact_ids/source_chunk_ids +- [ ] Add `javaducker_get_entities` MCP tool — accepts optional `artifactId`, `entityType`, `namePattern`. Returns entities with their relationships +- [ ] Add `javaducker_merge_entities` MCP tool — accepts `sourceEntityId`, `targetEntityId`. Merges source into target: combine descriptions, union source_ids, sum mention_counts, update all relationships pointing to source to point to target, delete source +- [ ] Add `javaducker_delete_entities` MCP tool — accepts `artifactId`. Removes all entities and relationships that are solely sourced from this artifact (decrement mention_count, remove from source_ids, delete if mention_count reaches 0) +- [ ] Add `javaducker_graph_stats` MCP tool — returns entity count, relationship count, top entity types, most connected entities, orphan entities +- [ ] Add REST endpoints: `POST /api/entities/extract`, `GET /api/entities`, `POST /api/entities/merge`, `DELETE /api/entities/by-artifact/{id}`, `GET /api/graph/stats` +- [ ] Add entity extraction prompt template in `EntityExtractionPrompt.java` — a reusable prompt template that Claude can use to self-prompt for extraction: + - Input: file text, file type, existing entity names (for merge candidates) + - Output: structured JSON with entities[] and relationships[] + - Include code-specific entity types: class, interface, method, function, module, endpoint, table, config-key, event, exception + - Include gleaning instruction: "re-read the text and check for missed entities" +- [ ] Write tests: entity extraction tool tests, merge tests, deletion cascade tests +- [ ] Write tests: verify embeddings are computed for entity/relationship descriptions + +**Entity type taxonomy for code:** +- `class`, `interface`, `enum`, `annotation` — type declarations +- `method`, `function` — callable units +- `module`, `package` — organizational units +- `endpoint` — REST/API endpoints (e.g., "POST /api/search") +- `table` — database tables referenced in code +- `config-key` — configuration properties (e.g., "spring.datasource.url") +- `event` — events published/consumed +- `exception` — custom exception types +- `concept` — abstract concepts (e.g., "caching", "retry logic") +- `service` — logical services (e.g., "search service", "ingestion pipeline") +- `pattern` — design patterns detected (e.g., "builder pattern", "factory") + +--- + +## Chapter 4: Graph-Based Retrieval Modes +**Status:** completed +**Depends on:** Chapter 2, Chapter 3 + +Add LightRAG-style retrieval modes that use the knowledge graph alongside vector search. Extend `SearchService` with new modes. + +- [ ] Add `GraphSearchService.java`: + - `localSearch(query, topK)` — **entity-centric retrieval**: + 1. Embed the query + 2. Find top-K entities by cosine similarity on entity.embedding + 3. For each entity, gather: entity description, connected relationships, source chunks + 4. Assemble context with source provenance + 5. Return ranked results with entity references + - `globalSearch(query, topK)` — **relationship-centric retrieval**: + 1. Embed the query + 2. Find top-K relationships by cosine similarity on relationship.embedding + 3. For each relationship, gather: source entity, target entity, relationship description, community context + 4. Assemble higher-level context about how concepts relate + 5. Return ranked results with relationship references + - `hybridGraphSearch(query, topK)` — combine local + global results + - `mixSearch(query, topK)` — combine graph search (hybrid) + chunk vector search (existing), deduplicate, rerank +- [ ] Extend `SearchService.java` to support new search modes: + - Add `mode` parameter options: `exact`, `semantic`, `hybrid` (existing), `local`, `global`, `graph_hybrid`, `mix` + - `mix` becomes the recommended default for Claude when graph is populated + - Fall back to existing `hybrid` mode when graph has no entities +- [ ] Update `javaducker_search` MCP tool to accept new mode values +- [ ] Add `javaducker_graph_search` MCP tool — dedicated tool for graph-only search with more options: + - `query` — search text + - `mode` — local/global/hybrid + - `topK` — max results + - `entityTypes[]` — optional filter by entity type + - `includeProvenance` — include source file references +- [ ] Add provenance/citation to search results — each result includes `sourceFiles[]` with file paths and line ranges +- [ ] Write tests: `GraphSearchServiceTest.java` — local, global, hybrid, mix modes with fixture data +- [ ] Write tests: verify fallback to chunk search when graph is empty +- [ ] Write tests: verify provenance/citation data in results + +--- + +## Chapter 5: Entity Merging & Deduplication +**Status:** completed +**Depends on:** Chapter 3 + +When the same logical entity appears in multiple files (e.g., "SearchService" referenced in tests, controllers, and config), merge descriptions into a consolidated summary. This is equivalent to LightRAG's map-reduce entity summarization. + +- [ ] Add merge detection to `KnowledgeGraphService`: + - `findDuplicateCandidates()` — find entities with similar names (Levenshtein distance < 3, or same name different case) + - `findMergeCandidates(entityId)` — find entities with high embedding similarity to a given entity (cosine > 0.85) + - Score candidates by: name similarity + embedding similarity + shared relationships +- [ ] Add `javaducker_merge_candidates` MCP tool — returns pairs of entities that may be duplicates, with confidence scores. Claude reviews and confirms merges +- [ ] Add merge summarization prompt template in `EntityMergePrompt.java`: + - Input: entity A description, entity B description, shared relationships + - Output: merged description that preserves key information from both +- [ ] Add auto-merge for exact name matches (same entity_name, same entity_type) — happens automatically during `upsertEntity` +- [ ] Add `javaducker_confirm_merge` MCP tool — Claude provides merged description after reviewing candidates, tool executes the merge +- [ ] Write tests: duplicate detection, merge execution, relationship rewiring, mention count accumulation +- [ ] Write tests: auto-merge on exact match during upsert + +--- + +## Chapter 6: Community Detection +**Status:** completed +**Depends on:** Chapter 4, Chapter 5 + +Group related entities into communities for global-level retrieval. Uses a simple modularity-based approach (no external library needed — Louvain-like algorithm on the entity relationship graph). + +- [ ] Add `CommunityDetectionService.java`: + - `detectCommunities()` — run community detection on the entity relationship graph: + 1. Build adjacency list from entity_relationships + 2. Apply label propagation algorithm (simpler than Louvain, sufficient for code graphs) + 3. Each community gets a generated name based on its most prominent entities + 4. Store communities in `entity_communities` table + - `summarizeCommunity(communityId)` — returns the entities and their descriptions for Claude to generate a summary + - `getCommunities()` — list all communities with stats + - `getCommunity(communityId)` — community details with member entities + - `rebuildCommunities()` — full re-detection +- [ ] Add `javaducker_detect_communities` MCP tool — triggers community detection, returns community list +- [ ] Add `javaducker_summarize_community` MCP tool — accepts communityId + summary text (Claude generates the summary after reading community members). Stores summary in entity_communities +- [ ] Add `javaducker_communities` MCP tool — list communities with member counts +- [ ] Update `globalSearch` in `GraphSearchService` to use community summaries when available — search community summary embeddings first, then drill into member entity relationships +- [ ] Write tests: community detection on fixture graph, community CRUD, search integration +- [ ] Write tests: verify community summaries improve global search relevance + +--- + +## Chapter 7: Incremental Graph Updates +**Status:** completed +**Depends on:** Chapter 3, Chapter 5 + +When a file is re-indexed (content changed), the knowledge graph must be updated incrementally — remove stale entities/relationships, extract new ones, merge updated descriptions. This parallels LightRAG's incremental insert capability. + +- [ ] Add `GraphUpdateService.java`: + - `onArtifactReindexed(artifactId)` — called after a file is re-indexed: + 1. Decrement mention_count for all entities sourced from this artifact + 2. Remove artifact from source_artifact_ids arrays + 3. Delete entities with mention_count = 0 + 4. Delete relationships with empty source_artifact_ids + 5. Mark affected communities as stale + - `onArtifactDeleted(artifactId)` — same as reindexed but without re-extraction +- [ ] Hook into `UploadService` re-index flow — after chunks/embeddings are updated, call `onArtifactReindexed` +- [ ] Add `javaducker_reindex_graph` MCP tool — given an `artifactId`, Claude can trigger entity re-extraction after viewing the updated file. Calls `onArtifactReindexed` + prompts Claude to call `extract_entities` again +- [ ] Add `javaducker_graph_stale` MCP tool — list entities/relationships that reference artifacts which have been re-indexed since last extraction +- [ ] Write tests: incremental update scenarios (file changed, file deleted, entity shared across files) +- [ ] Write tests: verify entity survives deletion of one source if it has other sources +- [ ] Write tests: verify community staleness detection + +--- + +## Chapter 8: Enrichment Pipeline Orchestration +**Status:** completed +**Depends on:** Chapter 1, Chapter 3, Chapter 6 + +Create an MCP tool that gives Claude a structured enrichment pipeline to run on newly indexed files. Claude calls this tool to get the next batch of files needing enrichment, then processes each one. + +- [ ] Add `javaducker_enrichment_pipeline` MCP tool — returns a structured work plan: + ```json + { + "pending_files": [...], + "steps_per_file": [ + "1. Read file text via javaducker_get_file_text", + "2. Call javaducker_synthesize_tags with 4-10 semantic tags", + "3. Call javaducker_extract_entities with entities and relationships", + "4. Call javaducker_classify if not yet classified", + "5. Call javaducker_mark_enriched when done" + ], + "batch_size": 10, + "graph_stats": { "entities": N, "relationships": N, "communities": N } + } + ``` +- [ ] Add `javaducker_enrichment_status` MCP tool — returns progress: total files, enriched count, pending count, graph stats, community count +- [ ] Add `javaducker_rebuild_graph` MCP tool — nuclear option: clear all entities/relationships/communities, return list of all indexed artifacts for full re-extraction +- [ ] After enrichment of a batch, auto-trigger community re-detection if entity count changed by >10% +- [ ] Write tests: pipeline tool returns correct work plan, status reports accurate counts +- [ ] Write tests: rebuild clears graph cleanly + +--- + +## Chapter 9: Search UX Improvements +**Status:** completed +**Depends on:** Chapter 4, Chapter 1 + +Make the graph-augmented search and semantic tags useful in practice for Claude Code. + +- [ ] Update `javaducker_search` tool description to explain when to use each mode: + - `exact` — known string/identifier lookup + - `semantic` — conceptual similarity (existing TF-IDF) + - `hybrid` — default, combines exact + semantic + - `local` — find specific entities/classes/methods (graph) + - `global` — understand how concepts relate across the codebase (graph) + - `mix` — best overall: graph + vector combined (recommended when graph is populated) +- [ ] Add semantic tag search to `javaducker_search` — when mode is `hybrid` or `mix`, also match against semantic tags and boost results that match tag queries +- [ ] Add `javaducker_find_related` MCP tool — given an `artifactId`, find related files via: + 1. Shared semantic tags (same tags → high relevance) + 2. Entity co-occurrence (share entities → medium relevance) + 3. Relationship paths (connected via graph → lower relevance) + 4. Existing co-change data (git history) + - Return unified ranked list with relationship explanation per result +- [ ] Add `javaducker_explain` enhancement — include semantic tags and entity participation in the explain output +- [ ] Write tests: tag-boosted search, find_related ranking, explain output includes graph data + +--- + +## Chapter 10: Coverage & Integration Tests (Closed Loop) +**Status:** completed +**Depends on:** all previous chapters + +Ensure all new functionality has tests and overall coverage stays at or above ~70%. **This chapter runs as a closed-loop** per `workflows/closed-loop.md`. + +### Closed-loop protocol +- **Pass condition:** instruction coverage >= 70%, all tests green, zero regressions +- **Max iterations:** 5 +- **Capture per iteration:** total tests, pass/fail counts, instruction coverage %, branch coverage %, files with lowest coverage + +### Steps + +- [ ] Run `mvn test` — establish baseline: total test count, pass/fail +- [ ] Run `mvn jacoco:report` — capture baseline instruction coverage % +- [ ] Identify new classes with < 70% coverage (SemanticTagService, KnowledgeGraphService, GraphSearchService, CommunityDetectionService, GraphUpdateService, enrichment tools) +- [ ] **Iteration loop** — for each under-covered class: + - [ ] Add targeted tests to raise its coverage + - [ ] Re-run `mvn test` — verify no regressions (test count must not decrease, failures must be 0) + - [ ] Re-run `mvn jacoco:report` — check coverage delta + - [ ] If coverage regressed vs previous iteration → revert, try different approach + - [ ] Log iteration to context/MEMORY.md: iteration #, coverage %, tests added, classes covered +- [ ] Add integration tests: + - [ ] `GraphSearchServiceTest.java` — end-to-end: index files → extract entities → search via all graph modes + - [ ] `SemanticTagIntegrationTest.java` — full pipeline: index → synthesize tags → search by tags + - [ ] `CommunityDetectionIntegrationTest.java` — detect communities → global search + - [ ] `IncrementalGraphUpdateTest.java` — re-index changed file → verify graph updated + - [ ] `EnrichmentPipelineTest.java` — pipeline tool output, status, rebuild +- [ ] Final verification: all tests green, coverage >= 70%, no regressions from baseline 740+ tests +- [ ] Log final summary to context/MEMORY.md + +--- + +## Implementation Notes + +### What Claude Code does vs what the server does +- **Server (Java)**: stores entities/relationships/tags, computes embeddings, runs graph traversal, executes search queries +- **Claude Code (LLM)**: reads file text, identifies entities and relationships, generates semantic tags, writes merged entity descriptions, generates community summaries +- This split mirrors LightRAG's architecture but uses Claude Code as the LLM instead of an embedded model + +### DuckDB considerations +- Entity embeddings use same DOUBLE[256] format as chunk_embeddings +- HNSW index can be extended to entity/relationship embeddings +- JSON arrays for source_ids stored as VARCHAR (DuckDB has json_extract functions if needed) +- DELETE+INSERT pattern for upserts (known DuckDB ART index constraint) + +### Semantic tag synthesis prompt (for Claude to use) +When Claude calls `javaducker_synthesize_tags`, it should: +1. Read the file text and summary +2. Consider: what does this file DO? (functional), how is it structured? (architectural), what domain does it serve? (domain), what patterns does it use? (pattern), what cross-cutting concerns? (concern) +3. Generate 4-10 tags covering at least 3 categories +4. Include rationale for each tag (helps future Claude understand tag assignment) + +### Migration from existing concept tables +- `artifact_concepts` and `concept_links` tables remain — they serve a different (simpler) purpose +- New `entities` and `entity_relationships` tables are the knowledge graph +- Over time, concept data can be migrated to entities if desired diff --git a/drom-plans/spring-ai-mcp.md b/drom-plans/spring-ai-mcp.md new file mode 100644 index 0000000..b6b1699 --- /dev/null +++ b/drom-plans/spring-ai-mcp.md @@ -0,0 +1,233 @@ +--- +title: Migrate MCP Server from JBang to Spring AI +status: completed +created: 2026-03-29 +updated: 2026-03-29 +current_chapter: 7 +--- + +# Migrate MCP Server from JBang to Spring AI + +## Problem + +The current MCP server (`JavaDuckerMcpServer.java`) is a standalone JBang script that: +- Uses outdated MCP SDK 0.8.1 (latest is 1.1.1) — Claude Code can't see tools +- Proxies all calls via HTTP to the Spring Boot REST API (unnecessary round-trip) +- Must start and manage the Spring Boot server as a subprocess +- Has fragile stdout/stderr handling that can corrupt JSON-RPC stdio messages +- References moved scripts (`run-server.sh`) that no longer exist at expected paths + +## Solution + +Replace the JBang script with Spring AI's built-in MCP server support. Tools become Spring beans that call services directly — no HTTP proxy, no subprocess management, no SDK version drift. + +## Architecture + +``` +Before: Claude Code → stdio → JBang MCP (0.8.1) → HTTP → Spring Boot REST → Services +After: Claude Code → stdio → Spring Boot + Spring AI MCP → Services (direct) +``` + +## Key Decisions + +- **Spring Boot 3.4+** required for Spring AI 1.0 compatibility (currently 3.2.5) +- **`spring-ai-mcp-server-spring-boot-starter`** provides stdio + SSE transport auto-config +- **`@McpTool`/`@McpToolParam` annotations** on methods in `@Component` classes (Spring AI 1.1+) +- **`spring-ai-starter-mcp-server-webmvc`** allows SSE alongside existing REST + optional stdio +- **Two Spring profiles**: `mcp` (stdio, no web server) and `server` (REST API, web) +- Tools call injected services directly — no HTTP client, no REST controller in the loop +- The REST API continues to work independently for non-MCP clients + +## Tool Inventory (40 tools from JBang → Spring AI) + +### Group 1: Core (8 tools) — UploadService, ArtifactService, SearchService, StatsService +- `javaducker_health`, `javaducker_stats` +- `javaducker_index_file`, `javaducker_index_directory` +- `javaducker_search` +- `javaducker_get_file_text`, `javaducker_get_artifact_status`, `javaducker_wait_for_indexed` + +### Group 2: Analysis (8 tools) — ExplainService, GitBlameService, CoChangeService, DependencyService, ProjectMapService, StalenessService +- `javaducker_explain`, `javaducker_blame`, `javaducker_related` +- `javaducker_dependencies`, `javaducker_dependents` +- `javaducker_map`, `javaducker_stale`, `javaducker_index_health`, `javaducker_summarize` + +### Group 3: Watch (1 tool) — FileWatcher +- `javaducker_watch` + +### Group 4: Content Intelligence Write (8 tools) — ContentIntelligenceService +- `javaducker_classify`, `javaducker_tag`, `javaducker_extract_points` +- `javaducker_set_freshness`, `javaducker_synthesize`, `javaducker_link_concepts` +- `javaducker_enrich_queue`, `javaducker_mark_enriched` + +### Group 5: Content Intelligence Read (8 tools) — ContentIntelligenceService +- `javaducker_latest`, `javaducker_find_by_type`, `javaducker_find_by_tag`, `javaducker_find_points` +- `javaducker_concepts`, `javaducker_concept_timeline`, `javaducker_concept_health` +- `javaducker_stale_content`, `javaducker_synthesis` + +### Group 6: Reladomo (9 tools) — ReladomoQueryService +- `javaducker_reladomo_relationships`, `javaducker_reladomo_graph`, `javaducker_reladomo_path` +- `javaducker_reladomo_schema`, `javaducker_reladomo_object_files` +- `javaducker_reladomo_finders`, `javaducker_reladomo_deepfetch` +- `javaducker_reladomo_temporal`, `javaducker_reladomo_config` + +### Group 7: Session Transcripts (5 tools) — SessionIngestionService +- `javaducker_index_sessions`, `javaducker_search_sessions`, `javaducker_session_context` +- `javaducker_extract_decisions`, `javaducker_recent_decisions` + +--- + +## Chapter 1: Spring Boot Upgrade and Spring AI Dependencies + +**Status:** completed + +Upgrade Spring Boot to 3.4+ and add Spring AI MCP server starter. + +- [ ] Update `pom.xml`: Spring Boot parent `3.2.5` → `3.4.4` (or latest 3.4.x) +- [ ] Add Spring AI BOM to `` (version 1.1.4) +- [ ] Add dependency: `spring-ai-starter-mcp-server-webmvc` +- [ ] Verify `mvn compile` succeeds with no breaking changes from Boot upgrade +- [ ] Run `mvn test` — fix any compilation or deprecation issues from Spring Boot 3.4 +- [ ] **TEST**: Verify all existing tests still pass (baseline: 585 tests, 0 failures) +- [ ] **TEST**: Verify existing REST API still works: `mvn spring-boot:run` + `curl localhost:8080/api/health` + +## Chapter 2: MCP Tool Beans — Core Group + +**Status:** completed + +Create tool provider class for the 8 core tools (health, index, search, stats, text, status, wait). + +- [ ] Create `src/main/java/com/javaducker/server/mcp/CoreTools.java` as `@Component` +- [ ] Inject `UploadService`, `ArtifactService`, `SearchService`, `StatsService`, `AppConfig` +- [ ] Implement `@Tool` methods: `health`, `indexFile`, `indexDirectory`, `search`, `getFileText`, `getArtifactStatus`, `waitForIndexed`, `stats` +- [ ] Preserve tool names (e.g., `@Tool(name = "javaducker_search")`) and descriptions from JBang +- [ ] Preserve the staleness warning enrichment on search results +- [ ] Add `@ToolParam` annotations with descriptions matching JBang schema +- [ ] **TEST**: Create `src/test/java/com/javaducker/server/mcp/CoreToolsTest.java` +- [ ] **TEST**: Test `health` returns status map with expected keys +- [ ] **TEST**: Test `indexFile` delegates to UploadService and returns artifact_id +- [ ] **TEST**: Test `search` delegates to SearchService with correct mode/limit params +- [ ] **TEST**: Test `search` appends staleness warning when present +- [ ] **TEST**: Test `getFileText` and `getArtifactStatus` delegate correctly +- [ ] **TEST**: Test `waitForIndexed` polls and returns on INDEXED status +- [ ] **TEST**: Test `waitForIndexed` returns error on FAILED status +- [ ] **TEST**: Run `mvn test` — all pass including new tests + +## Chapter 3: MCP Tool Beans — Analysis & Watch Groups + +**Status:** completed + +Create tool providers for analysis (9 tools) and watch (1 tool). + +- [ ] Create `src/main/java/com/javaducker/server/mcp/AnalysisTools.java` as `@Component` +- [ ] Inject `ExplainService`, `GitBlameService`, `CoChangeService`, `DependencyService`, `ProjectMapService`, `StalenessService`, `ArtifactService` +- [ ] Implement `@Tool` methods: `explain`, `blame`, `related`, `dependencies`, `dependents`, `map`, `stale`, `indexHealth`, `summarize` +- [ ] Create `src/main/java/com/javaducker/server/mcp/WatchTools.java` as `@Component` +- [ ] Inject `FileWatcher` (or the service that wraps it) +- [ ] Implement `@Tool` method: `watch` (start/stop actions) +- [ ] **TEST**: Create `src/test/java/com/javaducker/server/mcp/AnalysisToolsTest.java` +- [ ] **TEST**: Test `explain` delegates to ExplainService with file path +- [ ] **TEST**: Test `blame` passes optional start_line/end_line params +- [ ] **TEST**: Test `related` passes file path and max_results to CoChangeService +- [ ] **TEST**: Test `dependencies` and `dependents` delegate by artifact_id +- [ ] **TEST**: Test `stale` handles both file_paths and git_diff_ref params +- [ ] **TEST**: Test `indexHealth` returns recommendation and health_status fields +- [ ] **TEST**: Test `summarize` appends staleness warning when file changed on disk +- [ ] **TEST**: Create `src/test/java/com/javaducker/server/mcp/WatchToolsTest.java` +- [ ] **TEST**: Test `watch` start action passes directory and extensions +- [ ] **TEST**: Test `watch` stop action works without directory param +- [ ] **TEST**: Run `mvn test` — all pass + +## Chapter 4: MCP Tool Beans — Content Intelligence Groups + +**Status:** completed + +Create tool provider for content intelligence write (8) and read (8) tools. + +- [ ] Create `src/main/java/com/javaducker/server/mcp/ContentIntelligenceTools.java` as `@Component` +- [ ] Inject `ContentIntelligenceService` +- [ ] Implement write `@Tool` methods: `classify`, `tag`, `extractPoints`, `setFreshness`, `synthesize`, `linkConcepts`, `enrichQueue`, `markEnriched` +- [ ] Implement read `@Tool` methods: `latest`, `findByType`, `findByTag`, `findPoints`, `concepts`, `conceptTimeline`, `conceptHealth`, `staleContent`, `synthesis` +- [ ] Handle JSON array string parameters (tags, points, links) — parse with ObjectMapper +- [ ] **TEST**: Create `src/test/java/com/javaducker/server/mcp/ContentIntelligenceToolsTest.java` +- [ ] **TEST**: Test `classify` delegates with artifactId, docType, confidence, method +- [ ] **TEST**: Test `tag` parses JSON array string and delegates tag list +- [ ] **TEST**: Test `extractPoints` parses JSON array string and delegates points list +- [ ] **TEST**: Test `setFreshness` delegates with freshness enum and optional superseded_by +- [ ] **TEST**: Test `synthesize` delegates all fields including optional ones +- [ ] **TEST**: Test `linkConcepts` parses JSON array of link objects +- [ ] **TEST**: Test `enrichQueue` passes limit param with default fallback +- [ ] **TEST**: Test `latest` delegates topic to service +- [ ] **TEST**: Test `findByType`, `findByTag`, `findPoints` delegate correctly +- [ ] **TEST**: Test `synthesis` routes to by-id or by-keyword based on params +- [ ] **TEST**: Test malformed JSON input returns error, not exception +- [ ] **TEST**: Run `mvn test` — all pass + +## Chapter 5: MCP Tool Beans — Reladomo & Session Groups + +**Status:** completed + +Create tool providers for Reladomo (9 tools) and session transcript (5 tools). + +- [ ] Create `src/main/java/com/javaducker/server/mcp/ReladomoTools.java` as `@Component` +- [ ] Inject `ReladomoQueryService`, `ReladomoService` +- [ ] Implement `@Tool` methods: `relationships`, `graph`, `path`, `schema`, `objectFiles`, `finders`, `deepfetch`, `temporal`, `config` +- [ ] Create `src/main/java/com/javaducker/server/mcp/SessionTools.java` as `@Component` +- [ ] Inject `SessionIngestionService` +- [ ] Implement `@Tool` methods: `indexSessions`, `searchSessions`, `sessionContext`, `extractDecisions`, `recentDecisions` +- [ ] **TEST**: Create `src/test/java/com/javaducker/server/mcp/ReladomoToolsTest.java` +- [ ] **TEST**: Test `relationships` delegates object_name to ReladomoQueryService +- [ ] **TEST**: Test `graph` passes depth param with default of 3 +- [ ] **TEST**: Test `path` passes from_object and to_object params +- [ ] **TEST**: Test `config` handles optional object_name (present vs absent) +- [ ] **TEST**: Test all 9 Reladomo tools delegate to correct service method +- [ ] **TEST**: Create `src/test/java/com/javaducker/server/mcp/SessionToolsTest.java` +- [ ] **TEST**: Test `indexSessions` passes projectPath, maxSessions, incremental +- [ ] **TEST**: Test `searchSessions` passes phrase and max_results with defaults +- [ ] **TEST**: Test `sessionContext` delegates topic to service +- [ ] **TEST**: Test `extractDecisions` parses JSON decisions array +- [ ] **TEST**: Test `recentDecisions` passes maxSessions and optional tag filter +- [ ] **TEST**: Run `mvn test` — all pass + +## Chapter 6: Transport Configuration and Profiles + +**Status:** completed + +Configure Spring profiles for stdio (MCP) vs web (REST API) operation. + +- [ ] Add to `application.yml` under `spring.ai.mcp.server`: `name: javaducker`, `version: 1.0.0` +- [ ] Create `application-mcp.yml`: `spring.main.web-application-type: none`, stdio transport enabled +- [ ] Create `application-server.yml` (or keep default): web enabled, MCP disabled +- [ ] Update run-mcp script instructions to use `--spring.profiles.active=mcp` +- [ ] Update run-server script instructions to use `--spring.profiles.active=server` +- [ ] Ensure no `System.out` calls in any tool or service code (would corrupt stdio) +- [ ] **TEST**: Create `src/test/java/com/javaducker/server/mcp/McpProfileTest.java` +- [ ] **TEST**: Test MCP profile loads with `web-application-type: none` (no port binding) +- [ ] **TEST**: Test server profile loads with web enabled and MCP beans excluded +- [ ] **TEST**: Test default profile (no profile) loads REST API normally +- [ ] **TEST**: Grep all src/main/java for `System.out` — assert zero occurrences (except main methods) +- [ ] **TEST**: Run `mvn test` — all pass including profile tests + +## Chapter 7: Integration Testing and JBang Retirement + +**Status:** completed + +End-to-end validation, tool parity check, and cleanup. + +- [ ] **TEST**: Create `src/test/java/com/javaducker/integration/McpToolRegistrationTest.java` +- [ ] **TEST**: Load Spring context with MCP profile, inject `ToolCallbackProvider` beans +- [ ] **TEST**: Assert exactly 40 tools registered (match count from JBang) +- [ ] **TEST**: Assert every tool name from JBang inventory is present (hardcoded list of all 40 names) +- [ ] **TEST**: Assert every tool has a non-empty description +- [ ] **TEST**: Create `src/test/java/com/javaducker/integration/McpToolCallTest.java` +- [ ] **TEST**: Call `javaducker_health` tool via ToolCallback, verify response contains status +- [ ] **TEST**: Call `javaducker_stats` tool, verify response contains expected keys +- [ ] **TEST**: Call `javaducker_search` tool with phrase param, verify response structure +- [ ] **TEST**: Call a tool with missing required param, verify error response (not exception) +- [ ] **TEST**: Run full `mvn test` — all existing (65+) and new tests pass +- [ ] **TEST**: Record final test count and verify coverage >= 75% +- [ ] Move `JavaDuckerMcpServer.java` to `script-instructions/jbang-mcp-server-legacy.md` (preserve for reference) +- [ ] Remove JBang from run-mcp script instructions (update `script-instructions/run-scripts.md`) +- [ ] Update `script-instructions/run-scripts.md` with new run-mcp command: `java -jar target/javaducker-1.0.0.jar --spring.profiles.active=mcp` +- [ ] Update README: remove JBang mention, document `--spring.profiles.active=mcp` usage +- [ ] Update `start-here.md`: remove JBang prerequisite, simplify setup +- [ ] Verify Claude Code can see all tools when connected via stdio diff --git a/pom.xml b/pom.xml index 91cadeb..1fd2716 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ org.springframework.boot spring-boot-starter-parent - 3.2.5 + 3.4.4 com.javaducker @@ -23,6 +23,18 @@ 3.0.1 + + + + org.springframework.ai + spring-ai-bom + 1.1.4 + pom + import + + + + @@ -35,6 +47,12 @@ spring-boot-starter-web + + + org.springframework.ai + spring-ai-starter-mcp-server-webmvc + + org.duckdb diff --git a/script-instructions/run-scripts.md b/script-instructions/run-scripts.md index 2f4b925..607e29b 100644 --- a/script-instructions/run-scripts.md +++ b/script-instructions/run-scripts.md @@ -73,21 +73,21 @@ java -cp target/javaducker-1.0.0.jar \ ## run-mcp.sh +Uses Spring AI MCP server with stdio transport (no JBang needed). + ```bash #!/bin/bash cd "$(dirname "$0")/.." DB="${DB:-data/javaducker.duckdb}" -HTTP_PORT="${HTTP_PORT:-8080}" INTAKE_DIR="${INTAKE_DIR:-temp/intake}" mvn -q package -DskipTests 1>&2 java -jar target/javaducker-1.0.0.jar \ + --spring.profiles.active=mcp \ --javaducker.db-path="$DB" \ - --server.port="$HTTP_PORT" \ - --javaducker.intake-dir="$INTAKE_DIR" \ - --spring.main.web-application-type=none "$@" + --javaducker.intake-dir="$INTAKE_DIR" "$@" ``` --- @@ -105,14 +105,12 @@ echo Building project... 1>&2 call mvn -q package -DskipTests 1>&2 if "%DB%"=="" set DB=data\javaducker.duckdb -if "%HTTP_PORT%"=="" set HTTP_PORT=8080 if "%INTAKE_DIR%"=="" set INTAKE_DIR=temp\intake java -jar target\javaducker-1.0.0.jar ^ + --spring.profiles.active=mcp ^ --javaducker.db-path="%DB%" ^ - --server.port=%HTTP_PORT% ^ - --javaducker.intake-dir="%INTAKE_DIR%" ^ - --spring.main.web-application-type=none %* + --javaducker.intake-dir="%INTAKE_DIR%" %* ``` --- diff --git a/src/main/java/com/javaducker/server/db/SchemaBootstrap.java b/src/main/java/com/javaducker/server/db/SchemaBootstrap.java index f7928ab..67d7ee5 100644 --- a/src/main/java/com/javaducker/server/db/SchemaBootstrap.java +++ b/src/main/java/com/javaducker/server/db/SchemaBootstrap.java @@ -427,6 +427,100 @@ ON session_transcripts (role) ON session_decisions (tags) """); + // Semantic tags for LightRAG parity + stmt.execute(""" + CREATE TABLE IF NOT EXISTS artifact_semantic_tags ( + artifact_id VARCHAR NOT NULL, + tag VARCHAR NOT NULL, + category VARCHAR NOT NULL, + confidence FLOAT DEFAULT 1.0, + rationale VARCHAR, + source VARCHAR DEFAULT 'llm', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (artifact_id, tag) + ) + """); + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_semantic_tags_tag + ON artifact_semantic_tags (tag) + """); + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_semantic_tags_category + ON artifact_semantic_tags (category) + """); + + // Knowledge Graph: entities + stmt.execute(""" + CREATE TABLE IF NOT EXISTS entities ( + entity_id VARCHAR PRIMARY KEY, + entity_name VARCHAR NOT NULL, + entity_type VARCHAR NOT NULL, + description VARCHAR, + summary VARCHAR, + source_artifact_ids VARCHAR, + source_chunk_ids VARCHAR, + mention_count INTEGER DEFAULT 1, + embedding DOUBLE[], + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """); + + // Knowledge Graph: entity relationships + stmt.execute(""" + CREATE TABLE IF NOT EXISTS entity_relationships ( + relationship_id VARCHAR PRIMARY KEY, + source_entity_id VARCHAR NOT NULL, + target_entity_id VARCHAR NOT NULL, + relationship_type VARCHAR NOT NULL, + description VARCHAR, + weight FLOAT DEFAULT 1.0, + source_artifact_ids VARCHAR, + source_chunk_ids VARCHAR, + embedding DOUBLE[], + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """); + + // Knowledge Graph: entity communities + stmt.execute(""" + CREATE TABLE IF NOT EXISTS entity_communities ( + community_id VARCHAR PRIMARY KEY, + community_name VARCHAR, + summary VARCHAR, + entity_ids VARCHAR, + level INTEGER DEFAULT 0, + parent_community_id VARCHAR, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """); + + // Knowledge Graph: indices + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_entities_name + ON entities (entity_name) + """); + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_entities_type + ON entities (entity_type) + """); + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_rel_source + ON entity_relationships (source_entity_id) + """); + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_rel_target + ON entity_relationships (target_entity_id) + """); + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_rel_type + ON entity_relationships (relationship_type) + """); + stmt.execute(""" + CREATE INDEX IF NOT EXISTS idx_community_level + ON entity_communities (level) + """); + log.info("Database schema created/verified"); } } diff --git a/src/main/java/com/javaducker/server/mcp/AnalysisTools.java b/src/main/java/com/javaducker/server/mcp/AnalysisTools.java new file mode 100644 index 0000000..81459f0 --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/AnalysisTools.java @@ -0,0 +1,462 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.*; +import com.javaducker.server.service.GitBlameService.BlameEntry; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.io.BufferedReader; +import java.io.File; +import java.io.InputStreamReader; +import java.util.*; +import java.util.stream.Collectors; + +@Component +public class AnalysisTools { + + private static final Logger log = LoggerFactory.getLogger(AnalysisTools.class); + private static final ObjectMapper objectMapper = new ObjectMapper(); + + private final ExplainService explainService; + private final GitBlameService gitBlameService; + private final CoChangeService coChangeService; + private final DependencyService dependencyService; + private final ProjectMapService projectMapService; + private final StalenessService stalenessService; + private final ArtifactService artifactService; + private final SemanticTagService semanticTagService; + private final KnowledgeGraphService knowledgeGraphService; + + public AnalysisTools(ExplainService explainService, + GitBlameService gitBlameService, + CoChangeService coChangeService, + DependencyService dependencyService, + ProjectMapService projectMapService, + StalenessService stalenessService, + ArtifactService artifactService, + SemanticTagService semanticTagService, + KnowledgeGraphService knowledgeGraphService) { + this.explainService = explainService; + this.gitBlameService = gitBlameService; + this.coChangeService = coChangeService; + this.dependencyService = dependencyService; + this.projectMapService = projectMapService; + this.stalenessService = stalenessService; + this.artifactService = artifactService; + this.semanticTagService = semanticTagService; + this.knowledgeGraphService = knowledgeGraphService; + } + + @Tool(name = "javaducker_explain", + description = "Aggregate everything JavaDucker knows about a file: summary, dependencies, classification, tags, and more") + public Map explain( + @ToolParam(description = "File path relative to PROJECT_ROOT", required = true) String file_path) { + try { + Map result = explainService.explainByPath(file_path); + return result != null ? result : Map.of("error", "File not found in index: " + file_path); + } catch (Exception e) { + log.error("explain failed for: {}", file_path, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_blame", + description = "Run git blame on a file, optionally for a specific line range. Returns commit metadata per line group.") + public Map blame( + @ToolParam(description = "File path relative to PROJECT_ROOT", required = true) String file_path, + @ToolParam(description = "Start line number (optional, requires end_line)", required = false) Integer start_line, + @ToolParam(description = "End line number (optional, requires start_line)", required = false) Integer end_line) { + try { + List entries; + if (start_line != null && end_line != null) { + entries = gitBlameService.blameForLines(file_path, start_line, end_line); + } else { + entries = gitBlameService.blame(file_path); + } + + List> converted = entries.stream() + .map(AnalysisTools::blameEntryToMap) + .collect(Collectors.toList()); + + Map result = new LinkedHashMap<>(); + result.put("file_path", file_path); + result.put("entry_count", converted.size()); + result.put("entries", converted); + return result; + } catch (Exception e) { + log.error("blame failed for: {}", file_path, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_related", + description = "Find files that frequently change together with the given file (co-change analysis)") + public Map related( + @ToolParam(description = "File path relative to PROJECT_ROOT", required = true) String file_path, + @ToolParam(description = "Maximum number of related files to return (default: 10)", required = false) Integer max_results) { + try { + int effectiveMax = (max_results == null || max_results <= 0) ? 10 : max_results; + List> related = coChangeService.getRelatedFiles(file_path, effectiveMax); + + Map result = new LinkedHashMap<>(); + result.put("file_path", file_path); + result.put("count", related.size()); + result.put("related_files", related); + return result; + } catch (Exception e) { + log.error("related failed for: {}", file_path, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_dependencies", + description = "List artifacts that the given artifact depends on") + public Map dependencies( + @ToolParam(description = "The artifact ID to query dependencies for", required = true) String artifact_id) { + try { + List> deps = dependencyService.getDependencies(artifact_id); + + Map result = new LinkedHashMap<>(); + result.put("artifact_id", artifact_id); + result.put("count", deps.size()); + result.put("dependencies", deps); + return result; + } catch (Exception e) { + log.error("dependencies failed for: {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_dependents", + description = "List artifacts that depend on the given artifact") + public Map dependents( + @ToolParam(description = "The artifact ID to query dependents for", required = true) String artifact_id) { + try { + List> deps = dependencyService.getDependents(artifact_id); + + Map result = new LinkedHashMap<>(); + result.put("artifact_id", artifact_id); + result.put("count", deps.size()); + result.put("dependents", deps); + return result; + } catch (Exception e) { + log.error("dependents failed for: {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_map", + description = "Return a high-level map of all indexed artifacts and their relationships") + public Map map() { + try { + return projectMapService.getProjectMap(); + } catch (Exception e) { + log.error("map failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_stale", + description = "Check which files have changed on disk since they were last indexed") + public Map stale( + @ToolParam(description = "JSON array of file paths to check, e.g. [\"src/Foo.java\",\"src/Bar.java\"]", required = false) String file_paths, + @ToolParam(description = "Git ref to diff against (e.g. HEAD~3, main). Files from git diff --name-only will be checked.", required = false) String git_diff_ref) { + try { + List paths = resolveFilePaths(file_paths, git_diff_ref); + if (paths.isEmpty()) { + return Map.of("error", "Provide file_paths (JSON array) or git_diff_ref to identify files to check"); + } + return stalenessService.checkStaleness(paths); + } catch (Exception e) { + log.error("stale check failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_index_health", + description = "Check overall index freshness: how many indexed artifacts are stale vs current") + public Map indexHealth() { + try { + Map result = stalenessService.checkAll(); + + int staleCount = result.containsKey("stale_count") + ? ((Number) result.get("stale_count")).intValue() : 0; + long totalChecked = result.containsKey("total_checked") + ? ((Number) result.get("total_checked")).longValue() : 0; + double stalePercentage = result.containsKey("stale_percentage") + ? ((Number) result.get("stale_percentage")).doubleValue() : 0.0; + + String healthStatus; + String recommendation; + if (staleCount == 0) { + healthStatus = "healthy"; + recommendation = "All indexed files are up to date."; + } else if (stalePercentage <= 20.0) { + healthStatus = "degraded"; + recommendation = staleCount + " of " + totalChecked + + " files are stale. Consider re-indexing the stale files."; + } else { + healthStatus = "unhealthy"; + recommendation = staleCount + " of " + totalChecked + + " files are stale (" + String.format("%.0f%%", stalePercentage) + + "). A full re-index is recommended."; + } + result.put("health_status", healthStatus); + result.put("recommendation", recommendation); + return result; + } catch (Exception e) { + log.error("index health check failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_summarize", + description = "Get the summary of an indexed artifact, with a staleness warning if the file changed on disk") + public Map summarize( + @ToolParam(description = "The artifact ID to summarize", required = true) String artifact_id) { + try { + Map summary = artifactService.getSummary(artifact_id); + if (summary == null) { + return Map.of("error", "No summary found for artifact: " + artifact_id); + } + + Map result = new LinkedHashMap<>(summary); + + // Check staleness via artifact status path + try { + Map status = artifactService.getStatus(artifact_id); + if (status != null) { + String clientPath = status.get("original_client_path"); + if (clientPath == null) { + clientPath = status.get("file_name"); + } + if (clientPath != null) { + Map staleness = stalenessService.checkStaleness(List.of(clientPath)); + Object staleList = staleness.get("stale"); + if (staleList instanceof List list && !list.isEmpty()) { + result.put("staleness_warning", + "This file has changed on disk since it was last indexed. Re-index for accurate results."); + } + } + } + } catch (Exception e) { + log.warn("Staleness check failed for summarize, skipping warning", e); + } + + return result; + } catch (Exception e) { + log.error("summarize failed for: {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_find_related", + description = "Find files related to a given artifact via: shared semantic tags, " + + "entity co-occurrence in the knowledge graph, git co-change history. " + + "Returns a unified ranked list with relationship explanations") + public Map findRelated( + @ToolParam(description = "Artifact ID to find related files for", required = true) String artifact_id, + @ToolParam(description = "Max results (default 10)", required = false) Integer max_results) { + try { + int effectiveMax = (max_results == null || max_results <= 0) ? 10 : max_results; + Map> merged = new LinkedHashMap<>(); + + // 1. Shared semantic tags + try { + List> myTags = semanticTagService.getTagsForArtifact(artifact_id); + List tagNames = myTags.stream() + .map(t -> (String) t.get("tag")) + .filter(java.util.Objects::nonNull) + .toList(); + if (!tagNames.isEmpty()) { + List> tagMatches = semanticTagService.searchByTags(tagNames, false); + for (Map match : tagMatches) { + String matchId = (String) match.get("artifact_id"); + if (matchId.equals(artifact_id)) continue; + int sharedCount = ((Number) match.get("match_count")).intValue(); + Map entry = merged.computeIfAbsent(matchId, k -> newRelatedEntry(matchId, match)); + double current = ((Number) entry.get("score")).doubleValue(); + entry.put("score", current + sharedCount); + addReason(entry, "shares " + sharedCount + " semantic tag" + (sharedCount > 1 ? "s" : "")); + } + } + } catch (Exception e) { + log.warn("findRelated: semantic tag lookup failed for {}: {}", artifact_id, e.getMessage()); + } + + // 2. Shared entities from knowledge graph + try { + List> myEntities = knowledgeGraphService.getEntitiesForArtifact(artifact_id); + for (Map entity : myEntities) { + String entityId = (String) entity.get("entity_id"); + String entityName = (String) entity.get("entity_name"); + // Find other artifacts sharing this entity by checking source_artifact_ids + Map fullEntity = knowledgeGraphService.getEntity(entityId); + if (fullEntity == null) continue; + String sourceIds = (String) fullEntity.get("source_artifact_ids"); + if (sourceIds == null) continue; + for (String otherId : parseJsonArray(sourceIds)) { + if (otherId.equals(artifact_id)) continue; + Map entry = merged.computeIfAbsent(otherId, k -> { + Map e2 = new LinkedHashMap<>(); + e2.put("artifact_id", otherId); + e2.put("score", 0.0); + e2.put("reasons", new ArrayList()); + return e2; + }); + double current = ((Number) entry.get("score")).doubleValue(); + entry.put("score", current + 1); + addReason(entry, "shares entity " + entityName); + } + } + } catch (Exception e) { + log.warn("findRelated: entity lookup failed for {}: {}", artifact_id, e.getMessage()); + } + + // 3. Co-change history + try { + Map status = artifactService.getStatus(artifact_id); + String filePath = status != null ? status.get("original_client_path") : null; + if (filePath == null && status != null) filePath = status.get("file_name"); + if (filePath != null) { + List> coChanges = coChangeService.getRelatedFiles(filePath, effectiveMax); + for (Map cc : coChanges) { + int count = ((Number) cc.get("co_change_count")).intValue(); + String relatedFile = (String) cc.get("related_file"); + // Use file path as key since we don't have artifact_id for co-changes + Map entry = merged.computeIfAbsent("cochange:" + relatedFile, k -> { + Map e2 = new LinkedHashMap<>(); + e2.put("artifact_id", relatedFile); + e2.put("file_name", relatedFile); + e2.put("score", 0.0); + e2.put("reasons", new ArrayList()); + return e2; + }); + double current = ((Number) entry.get("score")).doubleValue(); + entry.put("score", current + count); + addReason(entry, "co-changed " + count + " time" + (count > 1 ? "s" : "")); + } + } + } catch (Exception e) { + log.warn("findRelated: co-change lookup failed for {}: {}", artifact_id, e.getMessage()); + } + + // Sort by score descending and limit + List> ranked = new ArrayList<>(merged.values()); + ranked.sort((a, b) -> Double.compare( + ((Number) b.get("score")).doubleValue(), + ((Number) a.get("score")).doubleValue())); + if (ranked.size() > effectiveMax) { + ranked = ranked.subList(0, effectiveMax); + } + + Map result = new LinkedHashMap<>(); + result.put("artifact_id", artifact_id); + result.put("count", ranked.size()); + result.put("related", ranked); + return result; + } catch (Exception e) { + log.error("findRelated failed for: {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + // ── Private helpers ───────────────────────────────────────────────── + + private Map newRelatedEntry(String artifactId, Map match) { + Map entry = new LinkedHashMap<>(); + entry.put("artifact_id", artifactId); + if (match.containsKey("file_name")) { + entry.put("file_name", match.get("file_name")); + } + entry.put("score", 0.0); + entry.put("reasons", new ArrayList()); + return entry; + } + + @SuppressWarnings("unchecked") + private void addReason(Map entry, String reason) { + List reasons = (List) entry.get("reasons"); + if (!reasons.contains(reason)) { + reasons.add(reason); + } + } + + static List parseJsonArray(String jsonArray) { + if (jsonArray == null || jsonArray.isBlank()) return List.of(); + String stripped = jsonArray.trim(); + if (stripped.equals("[]")) return List.of(); + stripped = stripped.substring(1, stripped.length() - 1); + List result = new ArrayList<>(); + for (String token : stripped.split(",")) { + String val = token.trim().replace("\"", ""); + if (!val.isEmpty()) result.add(val); + } + return result; + } + + static Map blameEntryToMap(BlameEntry entry) { + Map map = new LinkedHashMap<>(); + map.put("lineStart", entry.lineStart()); + map.put("lineEnd", entry.lineEnd()); + map.put("commitHash", entry.commitHash()); + map.put("author", entry.author()); + map.put("authorDate", entry.authorDate() != null ? entry.authorDate().toString() : null); + map.put("commitMessage", entry.commitMessage()); + map.put("content", entry.content()); + return map; + } + + List resolveFilePaths(String filePathsJson, String gitDiffRef) { + List paths = new ArrayList<>(); + + // Parse JSON array if provided + if (filePathsJson != null && !filePathsJson.isBlank()) { + try { + List parsed = objectMapper.readValue(filePathsJson, new TypeReference<>() {}); + paths.addAll(parsed); + } catch (Exception e) { + log.warn("Failed to parse file_paths JSON: {}", e.getMessage()); + } + } + + // Resolve git diff if provided + if (gitDiffRef != null && !gitDiffRef.isBlank()) { + try { + List gitFiles = runGitDiff(gitDiffRef); + paths.addAll(gitFiles); + } catch (Exception e) { + log.warn("Failed to run git diff for ref {}: {}", gitDiffRef, e.getMessage()); + } + } + + return paths; + } + + List runGitDiff(String ref) throws Exception { + String root = System.getenv("PROJECT_ROOT"); + File projectRoot = new File(root != null ? root : ".").getAbsoluteFile(); + + ProcessBuilder pb = new ProcessBuilder("git", "diff", "--name-only", ref); + pb.directory(projectRoot); + pb.redirectErrorStream(true); + Process process = pb.start(); + + List files; + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + files = reader.lines() + .filter(line -> !line.isBlank()) + .collect(Collectors.toList()); + } + int exitCode = process.waitFor(); + if (exitCode != 0) { + throw new RuntimeException("git diff failed (exit " + exitCode + ")"); + } + return files; + } +} diff --git a/src/main/java/com/javaducker/server/mcp/ContentIntelligenceTools.java b/src/main/java/com/javaducker/server/mcp/ContentIntelligenceTools.java new file mode 100644 index 0000000..c3cc3fd --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/ContentIntelligenceTools.java @@ -0,0 +1,271 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.ContentIntelligenceService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.util.List; +import java.util.Map; + +@Component +public class ContentIntelligenceTools { + + private static final Logger log = LoggerFactory.getLogger(ContentIntelligenceTools.class); + + private final ContentIntelligenceService service; + private final ObjectMapper objectMapper; + + public ContentIntelligenceTools(ContentIntelligenceService service, ObjectMapper objectMapper) { + this.service = service; + this.objectMapper = objectMapper; + } + + // ── Write tools ──────────────────────────────────────────────────────────── + + @Tool(name = "javaducker_classify", + description = "Classify an artifact by document type (e.g. code, config, doc, test)") + public Map classify( + @ToolParam(description = "Artifact ID to classify", required = true) String artifact_id, + @ToolParam(description = "Document type (e.g. code, config, doc, test)", required = true) String doc_type, + @ToolParam(description = "Confidence score 0.0-1.0", required = false) Double confidence, + @ToolParam(description = "Classification method (e.g. llm, rule)", required = false) String method) { + try { + double conf = confidence != null ? confidence : 1.0; + String meth = method != null ? method : "llm"; + return service.classify(artifact_id, doc_type, conf, meth); + } catch (Exception e) { + log.error("classify failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_tag", + description = "Add tags to an artifact. Tags is a JSON array of {tag, tag_type, source} objects") + public Map tag( + @ToolParam(description = "Artifact ID to tag", required = true) String artifact_id, + @ToolParam(description = "JSON array of tag objects [{tag, tag_type, source}]", required = true) String tags) { + try { + List> tagList = objectMapper.readValue(tags, new TypeReference<>() {}); + return service.tag(artifact_id, tagList); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + return Map.of("error", "Invalid JSON: " + e.getMessage()); + } catch (Exception e) { + log.error("tag failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_extract_points", + description = "Extract key points from an artifact. Points is a JSON array of {point_type, point_text}") + public Map extractPoints( + @ToolParam(description = "Artifact ID", required = true) String artifact_id, + @ToolParam(description = "JSON array of point objects [{point_type, point_text}]", required = true) String points) { + try { + List> pointList = objectMapper.readValue(points, new TypeReference<>() {}); + return service.extractPoints(artifact_id, pointList); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + return Map.of("error", "Invalid JSON: " + e.getMessage()); + } catch (Exception e) { + log.error("extractPoints failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_set_freshness", + description = "Set freshness status for an artifact: current, stale, or superseded") + public Map setFreshness( + @ToolParam(description = "Artifact ID", required = true) String artifact_id, + @ToolParam(description = "Freshness: current, stale, or superseded", required = true) String freshness, + @ToolParam(description = "Artifact ID that supersedes this one (optional)", required = false) String superseded_by) { + try { + return service.setFreshness(artifact_id, freshness, superseded_by); + } catch (Exception e) { + log.error("setFreshness failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_synthesize", + description = "Store a synthesis (summary, tags, key points, outcome) for an artifact") + public Map synthesize( + @ToolParam(description = "Artifact ID", required = true) String artifact_id, + @ToolParam(description = "Summary text", required = true) String summary_text, + @ToolParam(description = "Comma-separated tags", required = false) String tags, + @ToolParam(description = "Key points text", required = false) String key_points, + @ToolParam(description = "Outcome or conclusion", required = false) String outcome, + @ToolParam(description = "Original file path", required = false) String original_file_path) { + try { + return service.synthesize(artifact_id, summary_text, tags, key_points, outcome, original_file_path); + } catch (Exception e) { + log.error("synthesize failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_link_concepts", + description = "Link concepts across artifacts. Links is a JSON array of {concept, artifact_a, artifact_b, strength}") + public Map linkConcepts( + @ToolParam(description = "JSON array of link objects [{concept, artifact_a, artifact_b, strength}]", required = true) String links) { + try { + List> linkList = objectMapper.readValue(links, new TypeReference<>() {}); + return service.linkConcepts(linkList); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + return Map.of("error", "Invalid JSON: " + e.getMessage()); + } catch (Exception e) { + log.error("linkConcepts failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_enrich_queue", + description = "Get the queue of artifacts awaiting enrichment") + public Map enrichQueue( + @ToolParam(description = "Max items to return (default 50)", required = false) Integer limit) { + try { + int lim = limit != null ? limit : 50; + List> queue = service.getEnrichQueue(lim); + return Map.of("results", queue, "count", queue.size()); + } catch (Exception e) { + log.error("enrichQueue failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_mark_enriched", + description = "Mark an artifact as enriched (remove from enrich queue)") + public Map markEnriched( + @ToolParam(description = "Artifact ID to mark as enriched", required = true) String artifact_id) { + try { + return service.markEnriched(artifact_id); + } catch (Exception e) { + log.error("markEnriched failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + // ── Read tools ───────────────────────────────────────────────────────────── + + @Tool(name = "javaducker_latest", + description = "Get the latest artifact for a given topic") + public Map latest( + @ToolParam(description = "Topic to search for", required = true) String topic) { + try { + return service.getLatest(topic); + } catch (Exception e) { + log.error("latest failed for topic {}", topic, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_find_by_type", + description = "Find all artifacts of a given document type") + public Map findByType( + @ToolParam(description = "Document type to filter by", required = true) String doc_type) { + try { + List> results = service.findByType(doc_type); + return Map.of("results", results, "count", results.size()); + } catch (Exception e) { + log.error("findByType failed for {}", doc_type, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_find_by_tag", + description = "Find all artifacts with a given tag") + public Map findByTag( + @ToolParam(description = "Tag to search for", required = true) String tag) { + try { + List> results = service.findByTag(tag); + return Map.of("results", results, "count", results.size()); + } catch (Exception e) { + log.error("findByTag failed for {}", tag, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_find_points", + description = "Find extracted points by type, optionally filtered by tag") + public Map findPoints( + @ToolParam(description = "Point type (e.g. decision, action, question)", required = true) String point_type, + @ToolParam(description = "Optional tag to filter by", required = false) String tag) { + try { + List> results = service.findPoints(point_type, tag); + return Map.of("results", results, "count", results.size()); + } catch (Exception e) { + log.error("findPoints failed for type={} tag={}", point_type, tag, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_concepts", + description = "List all known concepts and their linked artifacts") + public Map concepts() { + try { + List> results = service.listConcepts(); + return Map.of("results", results, "count", results.size()); + } catch (Exception e) { + log.error("concepts failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_concept_timeline", + description = "Get the timeline of artifacts for a specific concept") + public Map conceptTimeline( + @ToolParam(description = "Concept name", required = true) String concept) { + try { + return service.getConceptTimeline(concept); + } catch (Exception e) { + log.error("conceptTimeline failed for {}", concept, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_concept_health", + description = "Get health metrics for all concepts (coverage, staleness, etc.)") + public Map conceptHealth() { + try { + return service.getConceptHealth(); + } catch (Exception e) { + log.error("conceptHealth failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_stale_content", + description = "List all stale or superseded content") + public Map staleContent() { + try { + List> results = service.getStaleContent(); + return Map.of("results", results, "count", results.size()); + } catch (Exception e) { + log.error("staleContent failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_synthesis", + description = "Get synthesis for an artifact by ID, or search syntheses by keyword") + public Map synthesis( + @ToolParam(description = "Artifact ID to get synthesis for", required = false) String artifact_id, + @ToolParam(description = "Keyword to search syntheses", required = false) String keyword) { + try { + if (artifact_id != null && !artifact_id.isBlank()) { + return service.getSynthesis(artifact_id); + } + if (keyword != null && !keyword.isBlank()) { + List> results = service.searchSynthesis(keyword); + return Map.of("results", results, "count", results.size()); + } + return Map.of("error", "Either artifact_id or keyword must be provided"); + } catch (Exception e) { + log.error("synthesis failed artifact_id={} keyword={}", artifact_id, keyword, e); + return Map.of("error", e.getMessage()); + } + } +} diff --git a/src/main/java/com/javaducker/server/mcp/CoreTools.java b/src/main/java/com/javaducker/server/mcp/CoreTools.java new file mode 100644 index 0000000..1e1e291 --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/CoreTools.java @@ -0,0 +1,282 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.service.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; +import java.sql.SQLException; +import java.util.*; +import java.util.stream.Collectors; + +@Component +public class CoreTools { + + private static final Logger log = LoggerFactory.getLogger(CoreTools.class); + + private final UploadService uploadService; + private final ArtifactService artifactService; + private final SearchService searchService; + private final StatsService statsService; + private final StalenessService stalenessService; + private final GraphSearchService graphSearchService; + + public CoreTools(UploadService uploadService, + ArtifactService artifactService, + SearchService searchService, + StatsService statsService, + StalenessService stalenessService, + GraphSearchService graphSearchService) { + this.uploadService = uploadService; + this.artifactService = artifactService; + this.searchService = searchService; + this.statsService = statsService; + this.stalenessService = stalenessService; + this.graphSearchService = graphSearchService; + } + + @Tool(name = "javaducker_health", description = "Check JavaDucker server health and return basic stats") + public Map health() { + try { + Map stats = statsService.getStats(); + Map result = new LinkedHashMap<>(); + result.put("status", "ok"); + result.putAll(stats); + return result; + } catch (Exception e) { + log.error("Health check failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_index_file", description = "Index a single file from disk into JavaDucker") + public Map indexFile( + @ToolParam(description = "Absolute path to the file to index", required = true) String file_path) { + try { + Path path = Path.of(file_path); + if (!Files.exists(path)) { + return Map.of("error", "File not found: " + file_path); + } + if (!Files.isRegularFile(path)) { + return Map.of("error", "Not a regular file: " + file_path); + } + + String fileName = path.getFileName().toString(); + String mediaType = Files.probeContentType(path); + if (mediaType == null) { + mediaType = "application/octet-stream"; + } + byte[] content = Files.readAllBytes(path); + long size = content.length; + + String artifactId = uploadService.upload(fileName, file_path, mediaType, size, content); + return Map.of("artifact_id", artifactId, "file_name", fileName, "size_bytes", size); + } catch (Exception e) { + log.error("Failed to index file: {}", file_path, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_index_directory", + description = "Index all matching files in a directory into JavaDucker") + public Map indexDirectory( + @ToolParam(description = "Absolute path to the directory to index", required = true) String directory, + @ToolParam(description = "Comma-separated file extensions to include (e.g. java,xml,md). If omitted, all files are indexed.", required = false) String extensions) { + try { + Path dirPath = Path.of(directory); + if (!Files.isDirectory(dirPath)) { + return Map.of("error", "Not a directory: " + directory); + } + + Set extFilter = parseExtensions(extensions); + List indexed = new ArrayList<>(); + List errors = new ArrayList<>(); + + Files.walkFileTree(dirPath, new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { + if (!Files.isRegularFile(file)) return FileVisitResult.CONTINUE; + if (!extFilter.isEmpty() && !matchesExtension(file, extFilter)) { + return FileVisitResult.CONTINUE; + } + try { + String fileName = file.getFileName().toString(); + String mediaType = Files.probeContentType(file); + if (mediaType == null) mediaType = "application/octet-stream"; + byte[] content = Files.readAllBytes(file); + uploadService.upload(fileName, file.toAbsolutePath().toString(), + mediaType, content.length, content); + indexed.add(file.toAbsolutePath().toString()); + } catch (Exception e) { + errors.add(file.toAbsolutePath() + ": " + e.getMessage()); + } + return FileVisitResult.CONTINUE; + } + }); + + Map result = new LinkedHashMap<>(); + result.put("indexed_count", indexed.size()); + result.put("error_count", errors.size()); + result.put("directory", directory); + if (!errors.isEmpty()) { + result.put("errors", errors); + } + return result; + } catch (Exception e) { + log.error("Failed to index directory: {}", directory, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_search", + description = "Search indexed content. Modes: exact (string match), semantic (vector similarity), " + + "hybrid (default: 0.3 exact + 0.7 semantic), local (entity-centric graph search), " + + "global (relationship-centric graph search), graph_hybrid (local+global combined), " + + "mix (graph+vector combined — recommended when graph is populated)") + public Map search( + @ToolParam(description = "Search phrase or query", required = true) String phrase, + @ToolParam(description = "Search mode: exact, semantic, hybrid, local, global, graph_hybrid, or mix (default: hybrid)", required = false) String mode, + @ToolParam(description = "Maximum number of results (default: 20)", required = false) Integer max_results) { + try { + String effectiveMode = (mode == null || mode.isBlank()) ? "hybrid" : mode.toLowerCase(); + int effectiveMax = (max_results == null || max_results <= 0) ? 20 : max_results; + + List> results = switch (effectiveMode) { + case "exact" -> searchService.exactSearch(phrase, effectiveMax); + case "semantic" -> searchService.semanticSearch(phrase, effectiveMax); + case "hybrid" -> searchService.hybridSearch(phrase, effectiveMax); + case "local" -> graphSearchService.localSearch(phrase, effectiveMax); + case "global" -> graphSearchService.globalSearch(phrase, effectiveMax); + case "graph_hybrid" -> graphSearchService.hybridGraphSearch(phrase, effectiveMax); + case "mix" -> graphSearchService.mixSearch(phrase, effectiveMax); + default -> throw new IllegalArgumentException( + "Unknown search mode: " + effectiveMode + + ". Use exact, semantic, hybrid, local, global, graph_hybrid, or mix."); + }; + + Map response = new LinkedHashMap<>(); + response.put("count", results.size()); + response.put("mode", effectiveMode); + response.put("results", results); + + addStalenessWarning(results, response); + + return response; + } catch (Exception e) { + log.error("Search failed for phrase: {}", phrase, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_get_file_text", + description = "Retrieve the extracted text content of an indexed artifact") + public Map getFileText( + @ToolParam(description = "The artifact ID to retrieve text for", required = true) String artifact_id) { + try { + Map text = artifactService.getText(artifact_id); + return new LinkedHashMap<>(text); + } catch (Exception e) { + log.error("Failed to get text for artifact: {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_get_artifact_status", + description = "Get the current status of an indexed artifact") + public Map getArtifactStatus( + @ToolParam(description = "The artifact ID to check", required = true) String artifact_id) { + try { + Map status = artifactService.getStatus(artifact_id); + return new LinkedHashMap<>(status); + } catch (Exception e) { + log.error("Failed to get status for artifact: {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_wait_for_indexed", + description = "Poll an artifact until it reaches INDEXED or FAILED status, or timeout") + public Map waitForIndexed( + @ToolParam(description = "The artifact ID to wait for", required = true) String artifact_id, + @ToolParam(description = "Timeout in seconds (default: 120)", required = false) Integer timeout_seconds) { + try { + int timeout = (timeout_seconds == null || timeout_seconds <= 0) ? 120 : timeout_seconds; + long deadline = System.currentTimeMillis() + (timeout * 1000L); + + while (System.currentTimeMillis() < deadline) { + Map status = artifactService.getStatus(artifact_id); + String currentStatus = status.getOrDefault("status", "UNKNOWN"); + + if ("INDEXED".equalsIgnoreCase(currentStatus) || "FAILED".equalsIgnoreCase(currentStatus)) { + return new LinkedHashMap<>(status); + } + + Thread.sleep(2000); + } + + Map result = new LinkedHashMap<>(); + result.put("error", "Timeout after " + timeout + " seconds waiting for artifact " + artifact_id); + result.put("artifact_id", artifact_id); + return result; + } catch (Exception e) { + log.error("Wait for indexed failed for artifact: {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_stats", description = "Get JavaDucker indexing statistics") + public Map stats() { + try { + return statsService.getStats(); + } catch (Exception e) { + log.error("Failed to get stats", e); + return Map.of("error", e.getMessage()); + } + } + + // ── Private helpers ───────────────────────────────────────────────── + + private Set parseExtensions(String extensions) { + if (extensions == null || extensions.isBlank()) { + return Set.of(); + } + return Arrays.stream(extensions.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(s -> s.startsWith(".") ? s : "." + s) + .map(String::toLowerCase) + .collect(Collectors.toSet()); + } + + private boolean matchesExtension(Path file, Set extFilter) { + String name = file.getFileName().toString().toLowerCase(); + return extFilter.stream().anyMatch(name::endsWith); + } + + @SuppressWarnings("unchecked") + private void addStalenessWarning(List> results, Map response) { + try { + List paths = results.stream() + .map(r -> (String) r.get("original_client_path")) + .filter(Objects::nonNull) + .distinct() + .toList(); + + if (!paths.isEmpty()) { + Map staleness = stalenessService.checkStaleness(paths); + Object staleList = staleness.get("stale"); + if (staleList instanceof List list && !list.isEmpty()) { + response.put("staleness_warning", "Some results may be stale (files changed since indexing)"); + response.put("stale_files", staleList); + } + } + } catch (Exception e) { + log.warn("Staleness check failed, skipping warning", e); + } + } +} diff --git a/src/main/java/com/javaducker/server/mcp/EnrichmentTools.java b/src/main/java/com/javaducker/server/mcp/EnrichmentTools.java new file mode 100644 index 0000000..cdfb0ac --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/EnrichmentTools.java @@ -0,0 +1,157 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.service.CommunityDetectionService; +import com.javaducker.server.service.KnowledgeGraphService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.sql.*; +import java.util.*; + +@Component +public class EnrichmentTools { + + private static final Logger log = LoggerFactory.getLogger(EnrichmentTools.class); + + private final DuckDBDataSource dataSource; + private final KnowledgeGraphService knowledgeGraphService; + private final CommunityDetectionService communityDetectionService; + + public EnrichmentTools(DuckDBDataSource dataSource, + KnowledgeGraphService knowledgeGraphService, + CommunityDetectionService communityDetectionService) { + this.dataSource = dataSource; + this.knowledgeGraphService = knowledgeGraphService; + this.communityDetectionService = communityDetectionService; + } + + @Tool(name = "javaducker_enrichment_pipeline", + description = "Get a structured enrichment work plan. Returns pending files and the steps " + + "Claude should follow for each file: read text, synthesize tags, extract entities, " + + "classify, and mark enriched") + public Map enrichmentPipeline( + @ToolParam(description = "Batch size (default 10)", required = false) Integer batch_size) { + try { + int size = batch_size != null && batch_size > 0 ? Math.min(batch_size, 50) : 10; + Connection conn = dataSource.getConnection(); + + List> pending = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT artifact_id, file_name, status FROM artifacts " + + "WHERE COALESCE(enrichment_status, 'pending') = 'pending' " + + "AND status = 'INDEXED' ORDER BY created_at DESC LIMIT ?")) { + ps.setInt(1, size); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + pending.add(Map.of( + "artifact_id", rs.getString("artifact_id"), + "file_name", rs.getString("file_name"))); + } + } + } + + Map graphStats = knowledgeGraphService.getStats(); + var communities = communityDetectionService.getCommunities(); + + List steps = List.of( + "1. Read file text via javaducker_get_file_text", + "2. Call javaducker_synthesize_tags with 4-10 semantic tags", + "3. Call javaducker_extract_entities with entities and relationships", + "4. Call javaducker_classify if not yet classified", + "5. Call javaducker_mark_enriched when done" + ); + + Map result = new LinkedHashMap<>(); + result.put("pending_files", pending); + result.put("pending_count", pending.size()); + result.put("steps_per_file", steps); + result.put("batch_size", size); + result.put("graph_stats", graphStats); + result.put("community_count", communities.size()); + return result; + } catch (Exception e) { + log.error("enrichmentPipeline failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_enrichment_status", + description = "Get enrichment progress: total files, enriched count, pending count, graph stats") + public Map enrichmentStatus() { + try { + Connection conn = dataSource.getConnection(); + Map result = new LinkedHashMap<>(); + + try (Statement stmt = conn.createStatement()) { + try (ResultSet rs = stmt.executeQuery( + "SELECT COUNT(*) AS total, " + + "SUM(CASE WHEN COALESCE(enrichment_status, 'pending') = 'enriched' THEN 1 ELSE 0 END) AS enriched, " + + "SUM(CASE WHEN COALESCE(enrichment_status, 'pending') = 'pending' THEN 1 ELSE 0 END) AS pending " + + "FROM artifacts WHERE status = 'INDEXED'")) { + rs.next(); + result.put("total_indexed", rs.getLong("total")); + result.put("enriched", rs.getLong("enriched")); + result.put("pending", rs.getLong("pending")); + } + } + + result.put("graph_stats", knowledgeGraphService.getStats()); + result.put("community_count", communityDetectionService.getCommunities().size()); + return result; + } catch (Exception e) { + log.error("enrichmentStatus failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_rebuild_graph", + description = "Nuclear option: clear all entities, relationships, and communities. " + + "Returns list of all indexed artifacts for full re-extraction") + public Map rebuildGraph() { + try { + Connection conn = dataSource.getConnection(); + + int deletedEntities, deletedRels, deletedCommunities; + try (Statement stmt = conn.createStatement()) { + try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM entities")) { + rs.next(); deletedEntities = rs.getInt(1); + } + try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM entity_relationships")) { + rs.next(); deletedRels = rs.getInt(1); + } + try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) FROM entity_communities")) { + rs.next(); deletedCommunities = rs.getInt(1); + } + stmt.execute("DELETE FROM entity_relationships"); + stmt.execute("DELETE FROM entities"); + stmt.execute("DELETE FROM entity_communities"); + } + + List> artifacts = new ArrayList<>(); + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT artifact_id, file_name FROM artifacts WHERE status = 'INDEXED'")) { + while (rs.next()) { + artifacts.add(Map.of( + "artifact_id", rs.getString("artifact_id"), + "file_name", rs.getString("file_name"))); + } + } + + Map result = new LinkedHashMap<>(); + result.put("deleted_entities", deletedEntities); + result.put("deleted_relationships", deletedRels); + result.put("deleted_communities", deletedCommunities); + result.put("artifacts_to_reprocess", artifacts); + result.put("artifact_count", artifacts.size()); + return result; + } catch (Exception e) { + log.error("rebuildGraph failed", e); + return Map.of("error", e.getMessage()); + } + } +} diff --git a/src/main/java/com/javaducker/server/mcp/KnowledgeGraphTools.java b/src/main/java/com/javaducker/server/mcp/KnowledgeGraphTools.java new file mode 100644 index 0000000..d228aca --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/KnowledgeGraphTools.java @@ -0,0 +1,342 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.CommunityDetectionService; +import com.javaducker.server.service.GraphSearchService; +import com.javaducker.server.service.GraphUpdateService; +import com.javaducker.server.service.KnowledgeGraphService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +@Component +public class KnowledgeGraphTools { + + private static final Logger log = LoggerFactory.getLogger(KnowledgeGraphTools.class); + + private final KnowledgeGraphService service; + private final GraphSearchService graphSearchService; + private final GraphUpdateService graphUpdateService; + private final CommunityDetectionService communityDetectionService; + private final ObjectMapper objectMapper; + + public KnowledgeGraphTools(KnowledgeGraphService service, + GraphSearchService graphSearchService, + GraphUpdateService graphUpdateService, + CommunityDetectionService communityDetectionService, + ObjectMapper objectMapper) { + this.service = service; + this.graphSearchService = graphSearchService; + this.graphUpdateService = graphUpdateService; + this.communityDetectionService = communityDetectionService; + this.objectMapper = objectMapper; + } + + @Tool(name = "javaducker_extract_entities", + description = "Extract entities and relationships from an indexed artifact. " + + "Entities: [{name, type, description}]. Types: class, interface, method, function, module, " + + "endpoint, table, config-key, event, exception, concept, service, pattern, enum, annotation. " + + "Relationships: [{sourceName, targetName, type, description}]. " + + "Rel types: uses, extends, implements, calls, depends-on, configures, tests, creates, contains, references") + public Map extractEntities( + @ToolParam(description = "Artifact ID the entities were extracted from", required = true) String artifact_id, + @ToolParam(description = "JSON array of entity objects [{name, type, description}]", required = true) String entities, + @ToolParam(description = "JSON array of relationship objects [{sourceName, targetName, type, description}]", required = false) String relationships) { + try { + List> entityList = objectMapper.readValue(entities, new TypeReference<>() {}); + if (entityList.isEmpty()) { + return Map.of("error", "At least one entity required"); + } + + int created = 0, merged = 0; + for (Map e : entityList) { + String name = e.get("name"); + String type = e.get("type"); + String desc = e.getOrDefault("description", null); + if (name == null || type == null) continue; + Map result = service.upsertEntity(name, type, desc, artifact_id, null); + if ("created".equals(result.get("action"))) created++; + else merged++; + } + + int relCreated = 0, relMerged = 0; + if (relationships != null && !relationships.isBlank()) { + List> relList = objectMapper.readValue(relationships, new TypeReference<>() {}); + for (Map r : relList) { + String sourceName = r.get("sourceName"); + String targetName = r.get("targetName"); + String relType = r.get("type"); + String desc = r.getOrDefault("description", null); + if (sourceName == null || targetName == null || relType == null) continue; + + // Resolve entity IDs by name lookup + String sourceId = resolveEntityId(sourceName); + String targetId = resolveEntityId(targetName); + if (sourceId == null || targetId == null) continue; + + Map result = service.upsertRelationship( + sourceId, targetId, relType, desc, artifact_id, null, 1.0); + if ("created".equals(result.get("action"))) relCreated++; + else relMerged++; + } + } + + return Map.of( + "artifact_id", artifact_id, + "entities_created", created, + "entities_merged", merged, + "relationships_created", relCreated, + "relationships_merged", relMerged); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + return Map.of("error", "Invalid JSON: " + e.getMessage()); + } catch (Exception e) { + log.error("extractEntities failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_get_entities", + description = "Get entities from the knowledge graph. Filter by artifact, type, or name pattern") + public Map getEntities( + @ToolParam(description = "Filter by entity type (e.g. class, method, service)", required = false) String entity_type, + @ToolParam(description = "Filter by name pattern (case-insensitive substring match)", required = false) String name_pattern) { + try { + List> results; + if (name_pattern != null && !name_pattern.isBlank()) { + results = service.findEntitiesByName(name_pattern); + if (entity_type != null && !entity_type.isBlank()) { + results = results.stream() + .filter(e -> entity_type.equals(e.get("entity_type"))) + .toList(); + } + } else if (entity_type != null && !entity_type.isBlank()) { + results = service.findEntitiesByType(entity_type); + } else { + results = service.findEntitiesByName(""); + } + return Map.of("entities", results, "count", results.size()); + } catch (Exception e) { + log.error("getEntities failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_merge_entities", + description = "Merge two entities into one. The source entity is absorbed into the target. " + + "All relationships are rewired, mention counts are combined, source is deleted") + public Map mergeEntities( + @ToolParam(description = "Entity ID to merge FROM (will be deleted)", required = true) String source_entity_id, + @ToolParam(description = "Entity ID to merge INTO (will be kept)", required = true) String target_entity_id, + @ToolParam(description = "Merged description combining both entities", required = false) String merged_description) { + try { + return service.mergeEntities(source_entity_id, target_entity_id, merged_description); + } catch (Exception e) { + log.error("mergeEntities failed {} -> {}", source_entity_id, target_entity_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_delete_entities", + description = "Remove all entities and relationships sourced solely from a given artifact. " + + "Entities shared with other artifacts survive with decremented mention count") + public Map deleteEntities( + @ToolParam(description = "Artifact ID whose entities should be removed", required = true) String artifact_id) { + try { + return service.deleteEntitiesForArtifact(artifact_id); + } catch (Exception e) { + log.error("deleteEntities failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_graph_stats", + description = "Get knowledge graph statistics: entity count, relationship count, top types, most connected entities") + public Map graphStats() { + try { + return service.getStats(); + } catch (Exception e) { + log.error("graphStats failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_graph_neighborhood", + description = "Get the neighborhood of an entity: all connected entities within N hops") + public Map graphNeighborhood( + @ToolParam(description = "Entity ID to explore from", required = true) String entity_id, + @ToolParam(description = "Number of hops (default 2, max 5)", required = false) Integer depth) { + try { + int d = depth != null ? Math.min(depth, 5) : 2; + return service.getNeighborhood(entity_id, d); + } catch (Exception e) { + log.error("graphNeighborhood failed for {}", entity_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_graph_path", + description = "Find the shortest path between two entities in the knowledge graph") + public Map graphPath( + @ToolParam(description = "Starting entity ID", required = true) String from_entity_id, + @ToolParam(description = "Target entity ID", required = true) String to_entity_id) { + try { + return service.getPath(from_entity_id, to_entity_id); + } catch (Exception e) { + log.error("graphPath failed {} -> {}", from_entity_id, to_entity_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_graph_search", + description = "Search the knowledge graph. Modes: local (entity-centric), global (relationship-centric), " + + "graph_hybrid (combined local+global), mix (graph+vector combined - recommended)") + public Map graphSearch( + @ToolParam(description = "Search query", required = true) String query, + @ToolParam(description = "Search mode: local, global, graph_hybrid, mix", required = false) String mode, + @ToolParam(description = "Max results (default 10)", required = false) Integer top_k, + @ToolParam(description = "Filter by entity types (comma-separated)", required = false) String entity_types) { + try { + String effectiveMode = (mode == null || mode.isBlank()) ? "mix" : mode.toLowerCase(); + int effectiveTopK = (top_k == null || top_k <= 0) ? 10 : top_k; + + List> results = switch (effectiveMode) { + case "local" -> graphSearchService.localSearch(query, effectiveTopK); + case "global" -> graphSearchService.globalSearch(query, effectiveTopK); + case "graph_hybrid" -> graphSearchService.hybridGraphSearch(query, effectiveTopK); + case "mix" -> graphSearchService.mixSearch(query, effectiveTopK); + default -> throw new IllegalArgumentException( + "Unknown mode: " + effectiveMode + ". Use local, global, graph_hybrid, or mix."); + }; + + // Filter by entity types if specified (applies to local/graph_hybrid modes) + if (entity_types != null && !entity_types.isBlank()) { + Set types = Set.of(entity_types.split(",")); + results = results.stream() + .filter(r -> r.get("entity_type") == null || types.contains(r.get("entity_type"))) + .toList(); + } + + return Map.of("mode", effectiveMode, "results", results, "count", results.size()); + } catch (Exception e) { + log.error("graphSearch failed for query: {}", query, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_merge_candidates", + description = "Find entities that may be duplicates based on name similarity and embedding similarity. " + + "Returns pairs with confidence scores for Claude to review and confirm merges") + public Map mergeCandidates( + @ToolParam(description = "Optional entity ID to find merge candidates for", required = false) String entity_id) { + try { + if (entity_id != null && !entity_id.isBlank()) { + List> candidates = service.findMergeCandidates(entity_id); + return Map.of("entity_id", entity_id, "candidates", candidates, "count", candidates.size()); + } else { + List> candidates = service.findDuplicateCandidates(); + return Map.of("candidates", candidates, "count", candidates.size()); + } + } catch (Exception e) { + log.error("mergeCandidates failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_confirm_merge", + description = "Confirm and execute an entity merge after reviewing candidates. " + + "Provide the merged description combining key information from both entities") + public Map confirmMerge( + @ToolParam(description = "Entity ID to merge FROM (will be deleted)", required = true) String source_entity_id, + @ToolParam(description = "Entity ID to merge INTO (will be kept)", required = true) String target_entity_id, + @ToolParam(description = "Merged description combining info from both entities", required = true) String merged_description) { + try { + return service.mergeEntities(source_entity_id, target_entity_id, merged_description); + } catch (Exception e) { + log.error("confirmMerge failed {} -> {}", source_entity_id, target_entity_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reindex_graph", + description = "Clean graph data for an artifact that was re-indexed. Removes entities/relationships " + + "sourced solely from this artifact. Shared entities survive with decremented counts. " + + "Call javaducker_extract_entities afterward to re-extract from the updated file") + public Map reindexGraph( + @ToolParam(description = "Artifact ID that was re-indexed", required = true) String artifact_id) { + try { + return graphUpdateService.onArtifactReindexed(artifact_id); + } catch (Exception e) { + log.error("reindexGraph failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_graph_stale", + description = "Find entities and relationships that may be stale because their source artifacts " + + "have been re-indexed since the entities were extracted") + public Map graphStale() { + try { + var stale = graphUpdateService.findStaleGraphEntries(); + return Map.of("stale_entries", stale, "count", stale.size()); + } catch (Exception e) { + log.error("graphStale failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_detect_communities", + description = "Run community detection on the knowledge graph. Groups related entities " + + "into communities using label propagation") + public Map detectCommunities() { + try { + return communityDetectionService.detectCommunities(); + } catch (Exception e) { + log.error("detectCommunities failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_summarize_community", + description = "Store a summary for a community. Claude should generate the summary " + + "after reviewing community members") + public Map summarizeCommunity( + @ToolParam(description = "Community ID", required = true) String community_id, + @ToolParam(description = "Community summary text", required = true) String summary) { + try { + return communityDetectionService.summarizeCommunity(community_id, summary); + } catch (Exception e) { + log.error("summarizeCommunity failed for {}", community_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_communities", + description = "List all detected communities with member counts") + public Map listCommunities() { + try { + var communities = communityDetectionService.getCommunities(); + return Map.of("communities", communities, "count", communities.size()); + } catch (Exception e) { + log.error("listCommunities failed", e); + return Map.of("error", e.getMessage()); + } + } + + private String resolveEntityId(String entityName) throws Exception { + List> matches = service.findEntitiesByName(entityName); + // Prefer exact name match + for (Map m : matches) { + if (entityName.equals(m.get("entity_name"))) { + return (String) m.get("entity_id"); + } + } + return matches.isEmpty() ? null : (String) matches.get(0).get("entity_id"); + } +} diff --git a/src/main/java/com/javaducker/server/mcp/ReladomoTools.java b/src/main/java/com/javaducker/server/mcp/ReladomoTools.java new file mode 100644 index 0000000..61ecc1b --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/ReladomoTools.java @@ -0,0 +1,132 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.service.ReladomoQueryService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.util.Map; + +@Component +public class ReladomoTools { + + private static final Logger log = LoggerFactory.getLogger(ReladomoTools.class); + + private final ReladomoQueryService reladomoQueryService; + + public ReladomoTools(ReladomoQueryService reladomoQueryService) { + this.reladomoQueryService = reladomoQueryService; + } + + @Tool(name = "javaducker_reladomo_relationships", + description = "Get relationships for a Reladomo object including related objects and cardinality") + public Map relationships( + @ToolParam(description = "Reladomo object name", required = true) String object_name) { + try { + return reladomoQueryService.getRelationships(object_name); + } catch (Exception e) { + log.error("Failed to get relationships for: {}", object_name, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_graph", + description = "Get the relationship graph for a Reladomo object up to a specified depth") + public Map graph( + @ToolParam(description = "Reladomo object name", required = true) String object_name, + @ToolParam(description = "Maximum traversal depth (default: 3)", required = false) Integer depth) { + try { + int effectiveDepth = (depth == null || depth <= 0) ? 3 : depth; + return reladomoQueryService.getGraph(object_name, effectiveDepth); + } catch (Exception e) { + log.error("Failed to get graph for: {}", object_name, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_path", + description = "Find the relationship path between two Reladomo objects") + public Map path( + @ToolParam(description = "Source Reladomo object name", required = true) String from_object, + @ToolParam(description = "Target Reladomo object name", required = true) String to_object) { + try { + return reladomoQueryService.getPath(from_object, to_object); + } catch (Exception e) { + log.error("Failed to find path from {} to {}", from_object, to_object, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_schema", + description = "Get the database schema (columns, types, keys) for a Reladomo object") + public Map schema( + @ToolParam(description = "Reladomo object name", required = true) String object_name) { + try { + return reladomoQueryService.getSchema(object_name); + } catch (Exception e) { + log.error("Failed to get schema for: {}", object_name, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_object_files", + description = "Get the source files associated with a Reladomo object (XML config, generated classes)") + public Map objectFiles( + @ToolParam(description = "Reladomo object name", required = true) String object_name) { + try { + return reladomoQueryService.getObjectFiles(object_name); + } catch (Exception e) { + log.error("Failed to get object files for: {}", object_name, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_finders", + description = "Get common Finder patterns and usage examples for a Reladomo object") + public Map finders( + @ToolParam(description = "Reladomo object name", required = true) String object_name) { + try { + return reladomoQueryService.getFinderPatterns(object_name); + } catch (Exception e) { + log.error("Failed to get finder patterns for: {}", object_name, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_deepfetch", + description = "Get deep-fetch profiles for a Reladomo object to optimize batch loading") + public Map deepFetch( + @ToolParam(description = "Reladomo object name", required = true) String object_name) { + try { + return reladomoQueryService.getDeepFetchProfiles(object_name); + } catch (Exception e) { + log.error("Failed to get deep-fetch profiles for: {}", object_name, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_temporal", + description = "Get temporal configuration info for all indexed Reladomo objects") + public Map temporal() { + try { + return reladomoQueryService.getTemporalInfo(); + } catch (Exception e) { + log.error("Failed to get temporal info", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_reladomo_config", + description = "Get Reladomo runtime configuration for a specific object or all objects") + public Map config( + @ToolParam(description = "Reladomo object name (omit for all objects)", required = false) String object_name) { + try { + return reladomoQueryService.getConfig(object_name); + } catch (Exception e) { + log.error("Failed to get config for: {}", object_name, e); + return Map.of("error", e.getMessage()); + } + } +} diff --git a/src/main/java/com/javaducker/server/mcp/SemanticTagTools.java b/src/main/java/com/javaducker/server/mcp/SemanticTagTools.java new file mode 100644 index 0000000..f0648be --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/SemanticTagTools.java @@ -0,0 +1,104 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.SemanticTagService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.util.List; +import java.util.Map; + +@Component +public class SemanticTagTools { + + private static final Logger log = LoggerFactory.getLogger(SemanticTagTools.class); + + private final SemanticTagService service; + private final ObjectMapper objectMapper; + + public SemanticTagTools(SemanticTagService service, ObjectMapper objectMapper) { + this.service = service; + this.objectMapper = objectMapper; + } + + @Tool(name = "javaducker_synthesize_tags", + description = "Store 4-10 semantic tags for an artifact. Categories: functional, architectural, domain, pattern, concern. Each tag needs: tag, category, confidence (0-1), rationale") + public Map synthesizeTags( + @ToolParam(description = "Artifact ID", required = true) String artifact_id, + @ToolParam(description = "JSON array of tag objects [{tag, category, confidence, rationale}]", required = true) String tags) { + try { + List> tagList = objectMapper.readValue(tags, new TypeReference<>() {}); + return service.writeTags(artifact_id, tagList); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + return Map.of("error", "Invalid JSON: " + e.getMessage()); + } catch (IllegalArgumentException e) { + return Map.of("error", e.getMessage()); + } catch (Exception e) { + log.error("synthesizeTags failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_search_by_tags", + description = "Search artifacts by semantic tags. Returns files matching the given tags") + public Map searchByTags( + @ToolParam(description = "JSON array of tag strings", required = true) String tags, + @ToolParam(description = "Match mode: 'any' or 'all'", required = false) String match_mode, + @ToolParam(description = "Filter by category", required = false) String category) { + try { + List tagList = objectMapper.readValue(tags, new TypeReference<>() {}); + boolean matchAll = "all".equalsIgnoreCase(match_mode); + List> results; + + if (category != null && !category.isBlank()) { + // Filter by category: get artifacts matching tags, then filter + results = service.searchByTags(tagList, matchAll); + // Post-filter: only keep artifacts that have at least one tag in the given category + List> byCategory = service.findByCategory(category); + var categoryArtifacts = new java.util.HashSet(); + for (Map row : byCategory) { + categoryArtifacts.add((String) row.get("artifact_id")); + } + results = results.stream() + .filter(r -> categoryArtifacts.contains(r.get("artifact_id"))) + .toList(); + } else { + results = service.searchByTags(tagList, matchAll); + } + return Map.of("results", results, "count", results.size()); + } catch (com.fasterxml.jackson.core.JsonProcessingException e) { + return Map.of("error", "Invalid JSON: " + e.getMessage()); + } catch (Exception e) { + log.error("searchByTags failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_tag_cloud", + description = "Get all semantic tags grouped by category with artifact counts") + public Map tagCloud() { + try { + return service.getTagCloud(); + } catch (Exception e) { + log.error("tagCloud failed", e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_suggest_tags", + description = "Suggest semantic tags for an artifact based on similar files") + public Map suggestTags( + @ToolParam(description = "Artifact ID", required = true) String artifact_id) { + try { + List> suggestions = service.suggestTags(artifact_id); + return Map.of("artifact_id", artifact_id, "suggestions", suggestions, "count", suggestions.size()); + } catch (Exception e) { + log.error("suggestTags failed for {}", artifact_id, e); + return Map.of("error", e.getMessage()); + } + } +} diff --git a/src/main/java/com/javaducker/server/mcp/SessionTools.java b/src/main/java/com/javaducker/server/mcp/SessionTools.java new file mode 100644 index 0000000..c1f1a94 --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/SessionTools.java @@ -0,0 +1,129 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.ContentIntelligenceService; +import com.javaducker.server.service.SearchService; +import com.javaducker.server.service.SessionIngestionService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +@Component +public class SessionTools { + + private static final Logger log = LoggerFactory.getLogger(SessionTools.class); + + private final SessionIngestionService sessionIngestionService; + private final SearchService searchService; + private final ContentIntelligenceService contentIntelligenceService; + private final ObjectMapper objectMapper; + + public SessionTools(SessionIngestionService sessionIngestionService, + SearchService searchService, + ContentIntelligenceService contentIntelligenceService, + ObjectMapper objectMapper) { + this.sessionIngestionService = sessionIngestionService; + this.searchService = searchService; + this.contentIntelligenceService = contentIntelligenceService; + this.objectMapper = objectMapper; + } + + @Tool(name = "javaducker_index_sessions", + description = "Index Claude session transcripts from a project directory into JavaDucker") + public Map indexSessions( + @ToolParam(description = "Absolute path to the project root containing .claude/ sessions", required = true) String project_path, + @ToolParam(description = "Maximum number of sessions to index (default: all)", required = false) Integer max_sessions, + @ToolParam(description = "Use incremental indexing to skip already-indexed sessions (default: false)", required = false) String incremental) { + try { + int effectiveMax = (max_sessions == null || max_sessions <= 0) ? Integer.MAX_VALUE : max_sessions; + boolean isIncremental = "true".equalsIgnoreCase(incremental); + + if (isIncremental) { + return sessionIngestionService.indexSessionsIncremental(project_path, effectiveMax); + } else { + return sessionIngestionService.indexSessions(project_path, effectiveMax); + } + } catch (Exception e) { + log.error("Failed to index sessions from: {}", project_path, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_search_sessions", + description = "Search indexed session transcripts by phrase") + public Map searchSessions( + @ToolParam(description = "Search phrase", required = true) String phrase, + @ToolParam(description = "Maximum number of results (default: 20)", required = false) Integer max_results) { + try { + int effectiveMax = (max_results == null || max_results <= 0) ? 20 : max_results; + List> results = sessionIngestionService.searchSessions(phrase, effectiveMax); + + Map response = new LinkedHashMap<>(); + response.put("results", results); + response.put("count", results.size()); + return response; + } catch (Exception e) { + log.error("Failed to search sessions for: {}", phrase, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_session_context", + description = "Get combined session and semantic search context for a topic") + public Map sessionContext( + @ToolParam(description = "Topic to search for across sessions and indexed code", required = true) String topic) { + try { + List> sessionResults = sessionIngestionService.searchSessions(topic, 10); + List> semanticResults = searchService.semanticSearch(topic, 5); + + Map response = new LinkedHashMap<>(); + response.put("session_results", sessionResults); + response.put("semantic_results", semanticResults); + return response; + } catch (Exception e) { + log.error("Failed to get session context for: {}", topic, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_extract_decisions", + description = "Store architectural decisions extracted from a session transcript") + public Map extractDecisions( + @ToolParam(description = "Session ID the decisions were extracted from", required = true) String session_id, + @ToolParam(description = "JSON array of decisions, each with text, context, and tags fields", required = true) String decisions) { + try { + List> decisionList = objectMapper.readValue( + decisions, new TypeReference<>() {}); + return sessionIngestionService.storeDecisions(session_id, decisionList); + } catch (Exception e) { + log.error("Failed to extract decisions for session: {}", session_id, e); + return Map.of("error", e.getMessage()); + } + } + + @Tool(name = "javaducker_recent_decisions", + description = "Get recent architectural decisions, optionally filtered by tag") + public Map recentDecisions( + @ToolParam(description = "Maximum number of sessions to scan (default: 5)", required = false) Integer max_sessions, + @ToolParam(description = "Filter decisions by tag", required = false) String tag) { + try { + int effectiveMax = (max_sessions == null || max_sessions <= 0) ? 5 : max_sessions; + List> results = sessionIngestionService.getRecentDecisions(effectiveMax, tag); + + Map response = new LinkedHashMap<>(); + response.put("results", results); + response.put("count", results.size()); + return response; + } catch (Exception e) { + log.error("Failed to get recent decisions", e); + return Map.of("error", e.getMessage()); + } + } +} diff --git a/src/main/java/com/javaducker/server/mcp/WatchTools.java b/src/main/java/com/javaducker/server/mcp/WatchTools.java new file mode 100644 index 0000000..a959a7a --- /dev/null +++ b/src/main/java/com/javaducker/server/mcp/WatchTools.java @@ -0,0 +1,90 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.ingestion.FileWatcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.tool.annotation.Tool; +import org.springframework.ai.tool.annotation.ToolParam; +import org.springframework.stereotype.Component; + +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Collectors; + +@Component +public class WatchTools { + + private static final Logger log = LoggerFactory.getLogger(WatchTools.class); + + private final FileWatcher fileWatcher; + + public WatchTools(FileWatcher fileWatcher) { + this.fileWatcher = fileWatcher; + } + + @Tool(name = "javaducker_watch", + description = "Control the file watcher: start watching a directory, stop watching, or check status") + public Map watch( + @ToolParam(description = "Action to perform: start, stop, or status", required = true) String action, + @ToolParam(description = "Absolute path to the directory to watch (required for start)", required = false) String directory, + @ToolParam(description = "Comma-separated file extensions to watch, e.g. .java,.xml,.md (optional, defaults to all files)", required = false) String extensions) { + try { + return switch (action.toLowerCase()) { + case "start" -> startWatching(directory, extensions); + case "stop" -> stopWatching(); + case "status" -> getStatus(); + default -> Map.of("error", "Unknown action: " + action + ". Use start, stop, or status."); + }; + } catch (Exception e) { + log.error("watch {} failed", action, e); + return Map.of("error", e.getMessage()); + } + } + + private Map startWatching(String directory, String extensions) throws Exception { + if (directory == null || directory.isBlank()) { + return Map.of("error", "directory is required for the start action"); + } + + Set extSet = parseExtensions(extensions); + Path dirPath = Path.of(directory); + + fileWatcher.startWatching(dirPath, extSet); + + Map result = new LinkedHashMap<>(); + result.put("action", "start"); + result.put("directory", directory); + result.put("extensions", extSet); + result.put("watching", true); + return result; + } + + private Map stopWatching() { + fileWatcher.stopWatching(); + + Map result = new LinkedHashMap<>(); + result.put("action", "stop"); + result.put("watching", false); + return result; + } + + private Map getStatus() { + Map result = new LinkedHashMap<>(); + result.put("action", "status"); + result.put("watching", fileWatcher.isWatching()); + Path dir = fileWatcher.getWatchedDirectory(); + result.put("directory", dir != null ? dir.toString() : null); + return result; + } + + private Set parseExtensions(String extensions) { + if (extensions == null || extensions.isBlank()) { + return Set.of(); + } + return Arrays.stream(extensions.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(s -> s.startsWith(".") ? s : "." + s) + .collect(Collectors.toSet()); + } +} diff --git a/src/main/java/com/javaducker/server/rest/JavaDuckerRestController.java b/src/main/java/com/javaducker/server/rest/JavaDuckerRestController.java index 5c6a31c..2157a6e 100644 --- a/src/main/java/com/javaducker/server/rest/JavaDuckerRestController.java +++ b/src/main/java/com/javaducker/server/rest/JavaDuckerRestController.java @@ -28,6 +28,8 @@ public class JavaDuckerRestController { private final CoChangeService coChangeService; private final ExplainService explainService; private final SessionIngestionService sessionIngestionService; + private final SemanticTagService semanticTagService; + private final KnowledgeGraphService knowledgeGraphService; public JavaDuckerRestController(UploadService uploadService, ArtifactService artifactService, SearchService searchService, StatsService statsService, @@ -38,7 +40,9 @@ public JavaDuckerRestController(UploadService uploadService, ArtifactService art GitBlameService gitBlameService, CoChangeService coChangeService, ExplainService explainService, - SessionIngestionService sessionIngestionService) { + SessionIngestionService sessionIngestionService, + SemanticTagService semanticTagService, + KnowledgeGraphService knowledgeGraphService) { this.uploadService = uploadService; this.artifactService = artifactService; this.searchService = searchService; @@ -53,6 +57,8 @@ public JavaDuckerRestController(UploadService uploadService, ArtifactService art this.coChangeService = coChangeService; this.explainService = explainService; this.sessionIngestionService = sessionIngestionService; + this.semanticTagService = semanticTagService; + this.knowledgeGraphService = knowledgeGraphService; } @GetMapping("/health") @@ -586,4 +592,99 @@ public ResponseEntity> reladomoConfig( @RequestParam(required = false) String objectName) throws Exception { return ResponseEntity.ok(reladomoQueryService.getConfig(objectName)); } + + // ── Semantic Tags endpoints ────────────────────────────────────────── + + @SuppressWarnings("unchecked") + @PostMapping("/semantic-tags") + public Map writeSemanticTags(@RequestBody Map body) throws Exception { + String artifactId = (String) body.get("artifactId"); + List> tags = (List>) body.get("tags"); + return semanticTagService.writeTags(artifactId, tags); + } + + @GetMapping("/semantic-tags/search") + public List> searchSemanticTags( + @RequestParam String tags, + @RequestParam(defaultValue = "any") String matchMode, + @RequestParam(required = false) String category) throws Exception { + List tagList = Arrays.asList(tags.split(",")); + boolean matchAll = "all".equalsIgnoreCase(matchMode); + List> results = semanticTagService.searchByTags(tagList, matchAll); + if (category != null && !category.isBlank()) { + List> byCategory = semanticTagService.findByCategory(category); + var categoryArtifacts = new HashSet(); + for (Map row : byCategory) { + categoryArtifacts.add((String) row.get("artifact_id")); + } + results = results.stream() + .filter(r -> categoryArtifacts.contains(r.get("artifact_id"))) + .toList(); + } + return results; + } + + @GetMapping("/semantic-tags/cloud") + public Map semanticTagCloud() throws Exception { + return semanticTagService.getTagCloud(); + } + + @GetMapping("/semantic-tags/suggest/{artifactId}") + public List> suggestSemanticTags(@PathVariable String artifactId) throws Exception { + return semanticTagService.suggestTags(artifactId); + } + + // ── Knowledge Graph ────────────────────────────────────────────────────── + + @PostMapping("/entities/extract") + public Map extractEntities(@RequestBody Map body) throws Exception { + String artifactId = (String) body.get("artifactId"); + @SuppressWarnings("unchecked") + List> entities = (List>) body.get("entities"); + @SuppressWarnings("unchecked") + List> relationships = (List>) body.getOrDefault("relationships", List.of()); + int created = 0, merged = 0; + for (Map e : entities) { + Map r = knowledgeGraphService.upsertEntity(e.get("name"), e.get("type"), + e.getOrDefault("description", null), artifactId, null); + if ("created".equals(r.get("action"))) created++; else merged++; + } + int relCreated = 0, relMerged = 0; + for (Map r : relationships) { + Map res = knowledgeGraphService.upsertRelationship( + r.get("sourceEntityId"), r.get("targetEntityId"), r.get("type"), + r.getOrDefault("description", null), artifactId, null, 1.0); + if ("created".equals(res.get("action"))) relCreated++; else relMerged++; + } + return Map.of("entities_created", created, "entities_merged", merged, + "relationships_created", relCreated, "relationships_merged", relMerged); + } + + @GetMapping("/entities") + public Map getEntities( + @RequestParam(required = false) String type, + @RequestParam(required = false) String name) throws Exception { + List> results; + if (name != null) results = knowledgeGraphService.findEntitiesByName(name); + else if (type != null) results = knowledgeGraphService.findEntitiesByType(type); + else results = knowledgeGraphService.findEntitiesByName(""); + return Map.of("entities", results, "count", results.size()); + } + + @PostMapping("/entities/merge") + public Map mergeEntities(@RequestBody Map body) throws Exception { + return knowledgeGraphService.mergeEntities( + body.get("sourceEntityId"), body.get("targetEntityId"), + body.getOrDefault("mergedDescription", null)); + } + + @DeleteMapping("/entities/by-artifact/{artifactId}") + public Map deleteEntitiesByArtifact(@PathVariable String artifactId) throws Exception { + return knowledgeGraphService.deleteEntitiesForArtifact(artifactId); + } + + @GetMapping("/graph/stats") + public Map graphStats() throws Exception { + return knowledgeGraphService.getStats(); + } } diff --git a/src/main/java/com/javaducker/server/service/CommunityDetectionService.java b/src/main/java/com/javaducker/server/service/CommunityDetectionService.java new file mode 100644 index 0000000..658246c --- /dev/null +++ b/src/main/java/com/javaducker/server/service/CommunityDetectionService.java @@ -0,0 +1,284 @@ +package com.javaducker.server.service; + +import com.javaducker.server.db.DuckDBDataSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.sql.*; +import java.util.*; + +@Service +public class CommunityDetectionService { + + private static final Logger log = LoggerFactory.getLogger(CommunityDetectionService.class); + private static final int MAX_ITERATIONS = 20; + private final DuckDBDataSource dataSource; + + public CommunityDetectionService(DuckDBDataSource dataSource) { + this.dataSource = dataSource; + } + + /** + * Detect communities using label propagation algorithm. + * 1. Build adjacency list from entity_relationships + * 2. Initialize each node with its own label + * 3. Iterate: each node adopts the most frequent label among neighbors + * 4. Converge when labels stop changing (or max 20 iterations) + * 5. Group nodes by label -> communities + * 6. Store in entity_communities table (DELETE existing, INSERT new) + * 7. Name each community after its most-mentioned entity + */ + public Map detectCommunities() throws SQLException { + Connection conn = dataSource.getConnection(); + + // 1. Load all entities + Map labels = new LinkedHashMap<>(); // entityId -> label + Map entityNames = new LinkedHashMap<>(); // entityId -> name + Map mentionCounts = new LinkedHashMap<>(); // entityId -> mention_count + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT entity_id, entity_name, mention_count FROM entities")) { + while (rs.next()) { + String id = rs.getString("entity_id"); + labels.put(id, id); // initialize label = own id + entityNames.put(id, rs.getString("entity_name")); + mentionCounts.put(id, rs.getInt("mention_count")); + } + } + + if (labels.isEmpty()) { + return Map.of("communities_detected", 0, "message", "No entities found"); + } + + // 2. Build adjacency list from entity_relationships + Map> adj = new LinkedHashMap<>(); + for (String id : labels.keySet()) { + adj.put(id, new ArrayList<>()); + } + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT source_entity_id, target_entity_id FROM entity_relationships")) { + while (rs.next()) { + String src = rs.getString("source_entity_id"); + String tgt = rs.getString("target_entity_id"); + if (adj.containsKey(src)) adj.get(src).add(tgt); + if (adj.containsKey(tgt)) adj.get(tgt).add(src); + } + } + + // 3. Label propagation iterations + int iterations = 0; + for (int iter = 0; iter < MAX_ITERATIONS; iter++) { + boolean changed = false; + List nodeIds = new ArrayList<>(labels.keySet()); + Collections.shuffle(nodeIds); + for (String nodeId : nodeIds) { + List neighbors = adj.getOrDefault(nodeId, List.of()); + if (neighbors.isEmpty()) continue; + // Count labels among neighbors + Map labelCounts = new HashMap<>(); + for (String n : neighbors) { + String nLabel = labels.get(n); + if (nLabel != null) { + labelCounts.merge(nLabel, 1, Integer::sum); + } + } + if (labelCounts.isEmpty()) continue; + String bestLabel = Collections.max(labelCounts.entrySet(), + Map.Entry.comparingByValue()).getKey(); + if (!bestLabel.equals(labels.get(nodeId))) { + labels.put(nodeId, bestLabel); + changed = true; + } + } + iterations++; + if (!changed) break; + } + + // 4. Group by label -> communities + Map> communityMap = new LinkedHashMap<>(); + labels.forEach((entityId, label) -> + communityMap.computeIfAbsent(label, k -> new ArrayList<>()).add(entityId)); + + // 5. Filter: only keep communities with >= 2 members + communityMap.entrySet().removeIf(e -> e.getValue().size() < 2); + + // 6. Store in entity_communities table + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_communities"); + } + + int communityCount = 0; + for (Map.Entry> entry : communityMap.entrySet()) { + List memberIds = entry.getValue(); + communityCount++; + String communityId = "community-" + communityCount; + + // Name after the most-mentioned entity + String communityName = memberIds.stream() + .max(Comparator.comparingInt(id -> mentionCounts.getOrDefault(id, 0))) + .map(entityNames::get) + .orElse("Community " + communityCount); + + // Build JSON array of entity IDs + StringBuilder entityIdsJson = new StringBuilder("["); + for (int i = 0; i < memberIds.size(); i++) { + if (i > 0) entityIdsJson.append(","); + entityIdsJson.append("\"").append(memberIds.get(i)).append("\""); + } + entityIdsJson.append("]"); + + try (PreparedStatement ps = conn.prepareStatement( + "INSERT INTO entity_communities (community_id, community_name, entity_ids, " + + "level, created_at) VALUES (?, ?, ?, 0, CURRENT_TIMESTAMP)")) { + ps.setString(1, communityId); + ps.setString(2, communityName); + ps.setString(3, entityIdsJson.toString()); + ps.executeUpdate(); + } + } + + log.info("Detected {} communities in {} iterations", communityCount, iterations); + return Map.of( + "communities_detected", communityCount, + "iterations", iterations, + "total_entities", labels.size()); + } + + /** + * Get community by ID with member entity details. + */ + public Map getCommunity(String communityId) throws SQLException { + Connection conn = dataSource.getConnection(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entity_communities WHERE community_id = ?")) { + ps.setString(1, communityId); + try (ResultSet rs = ps.executeQuery()) { + if (rs.next()) { + Map community = rowToMap(rs); + // Parse entity_ids and fetch entity details + String entityIdsStr = (String) community.get("entity_ids"); + if (entityIdsStr != null && !entityIdsStr.isBlank()) { + List> members = new ArrayList<>(); + List ids = parseJsonArray(entityIdsStr); + for (String entityId : ids) { + try (PreparedStatement eps = conn.prepareStatement( + "SELECT entity_id, entity_name, entity_type, description, " + + "mention_count FROM entities WHERE entity_id = ?")) { + eps.setString(1, entityId); + try (ResultSet ers = eps.executeQuery()) { + if (ers.next()) members.add(rowToMap(ers)); + } + } + } + community.put("members", members); + } + return community; + } + } + } + return null; + } + + /** + * List all communities with member counts. + */ + public List> getCommunities() throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery( + "SELECT * FROM entity_communities ORDER BY level, community_name")) { + while (rs.next()) { + Map community = rowToMap(rs); + String entityIdsStr = (String) community.get("entity_ids"); + int memberCount = entityIdsStr != null ? parseJsonArray(entityIdsStr).size() : 0; + community.put("member_count", memberCount); + results.add(community); + } + } + return results; + } + + /** + * Store/update a community summary (Claude generates the text). + */ + public Map summarizeCommunity(String communityId, String summary) + throws SQLException { + Connection conn = dataSource.getConnection(); + + // Read existing community + Map existing = null; + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entity_communities WHERE community_id = ?")) { + ps.setString(1, communityId); + try (ResultSet rs = ps.executeQuery()) { + if (rs.next()) existing = rowToMap(rs); + } + } + if (existing == null) { + return Map.of("error", "Community not found: " + communityId); + } + + // DELETE + INSERT to update summary + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_communities WHERE community_id = '" + + esc(communityId) + "'"); + } + try (PreparedStatement ps = conn.prepareStatement( + "INSERT INTO entity_communities (community_id, community_name, summary, " + + "entity_ids, level, parent_community_id, created_at) " + + "VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)")) { + ps.setString(1, communityId); + ps.setString(2, (String) existing.get("community_name")); + ps.setString(3, summary); + ps.setString(4, (String) existing.get("entity_ids")); + ps.setInt(5, existing.get("level") != null + ? ((Number) existing.get("level")).intValue() : 0); + ps.setString(6, (String) existing.get("parent_community_id")); + ps.executeUpdate(); + } + + return Map.of("community_id", communityId, "summary_stored", true); + } + + /** + * Clear all communities for full re-detection. + */ + public Map rebuildCommunities() throws SQLException { + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_communities"); + } + return detectCommunities(); + } + + // ── Private helpers ──────────────────────────────────────────────────── + + private Map rowToMap(ResultSet rs) throws SQLException { + Map map = new LinkedHashMap<>(); + ResultSetMetaData meta = rs.getMetaData(); + for (int i = 1; i <= meta.getColumnCount(); i++) { + String col = meta.getColumnName(i).toLowerCase(); + map.put(col, rs.getObject(i)); + } + return map; + } + + static List parseJsonArray(String json) { + List result = new ArrayList<>(); + if (json == null || json.isBlank() || json.equals("[]")) return result; + // Strip brackets + String inner = json.substring(1, json.length() - 1); + for (String token : inner.split(",")) { + String val = token.trim().replace("\"", ""); + if (!val.isEmpty()) result.add(val); + } + return result; + } + + private String esc(String s) { + return s.replace("'", "''"); + } +} diff --git a/src/main/java/com/javaducker/server/service/ExplainService.java b/src/main/java/com/javaducker/server/service/ExplainService.java index 5a26d9f..a7125cb 100644 --- a/src/main/java/com/javaducker/server/service/ExplainService.java +++ b/src/main/java/com/javaducker/server/service/ExplainService.java @@ -25,6 +25,8 @@ public class ExplainService { private final DependencyService dependencyService; private final ContentIntelligenceService contentIntelligenceService; private final DuckDBDataSource dataSource; + private final SemanticTagService semanticTagService; + private final KnowledgeGraphService knowledgeGraphService; // Optional services — may not be built yet private final Object gitBlameService; @@ -35,6 +37,8 @@ public ExplainService(ArtifactService artifactService, DependencyService dependencyService, ContentIntelligenceService contentIntelligenceService, DuckDBDataSource dataSource, + SemanticTagService semanticTagService, + KnowledgeGraphService knowledgeGraphService, @Autowired(required = false) @SuppressWarnings("unused") Object gitBlameServicePlaceholder, @Autowired(required = false) @SuppressWarnings("unused") @@ -43,6 +47,8 @@ public ExplainService(ArtifactService artifactService, this.dependencyService = dependencyService; this.contentIntelligenceService = contentIntelligenceService; this.dataSource = dataSource; + this.semanticTagService = semanticTagService; + this.knowledgeGraphService = knowledgeGraphService; // Will be replaced with real types when GitBlameService / CoChangeService exist this.gitBlameService = null; this.coChangeService = null; @@ -92,6 +98,18 @@ public Map explain(String artifactId) { addSection(result, "related_artifacts", () -> limitList(contentIntelligenceService.getRelatedByConcept(artifactId), 5)); + // 8b. semantic_tags + addSection(result, "semantic_tags", () -> { + List> tags = semanticTagService.getTagsForArtifact(artifactId); + return tags.isEmpty() ? null : tags; + }); + + // 8c. graph_entities + addSection(result, "graph_entities", () -> { + List> entities = knowledgeGraphService.getEntitiesForArtifact(artifactId); + return entities.isEmpty() ? null : entities; + }); + // 9. blame_highlights (optional — service may not exist) if (gitBlameService != null) { addSection(result, "blame_highlights", () -> getBlameHighlights(artifactId)); diff --git a/src/main/java/com/javaducker/server/service/GraphSearchService.java b/src/main/java/com/javaducker/server/service/GraphSearchService.java new file mode 100644 index 0000000..82d6493 --- /dev/null +++ b/src/main/java/com/javaducker/server/service/GraphSearchService.java @@ -0,0 +1,369 @@ +package com.javaducker.server.service; + +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.ingestion.EmbeddingService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.*; + +@Service +public class GraphSearchService { + + private static final Logger log = LoggerFactory.getLogger(GraphSearchService.class); + private final DuckDBDataSource dataSource; + private final EmbeddingService embeddingService; + private final KnowledgeGraphService knowledgeGraphService; + + public GraphSearchService(DuckDBDataSource dataSource, + EmbeddingService embeddingService, + KnowledgeGraphService knowledgeGraphService) { + this.dataSource = dataSource; + this.embeddingService = embeddingService; + this.knowledgeGraphService = knowledgeGraphService; + } + + /** + * Local search: entity-centric retrieval. + * Embeds the query, scans entities table, computes cosine similarity, + * returns top-K entities with descriptions, connected relationships, source chunks. + */ + public List> localSearch(String query, int topK) throws SQLException { + double[] queryEmb = embeddingService.embed(query); + Connection conn = dataSource.getConnection(); + List> scored = new ArrayList<>(); + + try (PreparedStatement ps = conn.prepareStatement( + "SELECT entity_id, entity_name, entity_type, description, embedding, source_artifact_ids " + + "FROM entities WHERE embedding IS NOT NULL")) { + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + double[] emb = extractEmbedding(rs); + if (emb == null) continue; + double sim = cosineSimilarity(queryEmb, emb); + if (sim > 0.01) { + Map hit = new LinkedHashMap<>(); + hit.put("entity_id", rs.getString("entity_id")); + hit.put("entity_name", rs.getString("entity_name")); + hit.put("entity_type", rs.getString("entity_type")); + hit.put("description", rs.getString("description")); + hit.put("score", sim); + hit.put("source_files", rs.getString("source_artifact_ids")); + hit.put("match_type", "LOCAL"); + scored.add(hit); + } + } + } + } + + scored.sort((a, b) -> Double.compare((double) b.get("score"), (double) a.get("score"))); + List> topResults = scored.size() > topK + ? new ArrayList<>(scored.subList(0, topK)) : new ArrayList<>(scored); + + // Enrich each top entity with its relationships + for (Map hit : topResults) { + try { + List> rels = knowledgeGraphService + .getRelationships((String) hit.get("entity_id")); + hit.put("relationships", rels); + } catch (SQLException e) { + log.warn("Failed to fetch relationships for {}", hit.get("entity_id"), e); + hit.put("relationships", List.of()); + } + } + + return topResults; + } + + /** + * Global search: relationship-centric retrieval. + * Embeds the query, scans entity_relationships table, computes cosine similarity, + * returns top-K relationships with source/target entity info. + */ + public List> globalSearch(String query, int topK) throws SQLException { + double[] queryEmb = embeddingService.embed(query); + Connection conn = dataSource.getConnection(); + List> scored = new ArrayList<>(); + + try (PreparedStatement ps = conn.prepareStatement( + "SELECT relationship_id, source_entity_id, target_entity_id, " + + "relationship_type, description, embedding, source_artifact_ids " + + "FROM entity_relationships WHERE embedding IS NOT NULL")) { + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + double[] emb = extractEmbedding(rs); + if (emb == null) continue; + double sim = cosineSimilarity(queryEmb, emb); + if (sim > 0.01) { + Map hit = new LinkedHashMap<>(); + hit.put("relationship_id", rs.getString("relationship_id")); + hit.put("source_entity_id", rs.getString("source_entity_id")); + hit.put("target_entity_id", rs.getString("target_entity_id")); + hit.put("relationship_type", rs.getString("relationship_type")); + hit.put("description", rs.getString("description")); + hit.put("score", sim); + hit.put("source_files", rs.getString("source_artifact_ids")); + hit.put("match_type", "GLOBAL"); + scored.add(hit); + } + } + } + } + + scored.sort((a, b) -> Double.compare((double) b.get("score"), (double) a.get("score"))); + List> topResults = scored.size() > topK + ? new ArrayList<>(scored.subList(0, topK)) : new ArrayList<>(scored); + + // Enrich with entity names + for (Map hit : topResults) { + enrichRelationshipWithEntityNames(hit); + } + + return topResults; + } + + /** + * Hybrid graph search: combine local + global results. + * Local entities weighted 0.6, global relationships weighted 0.4. + * Deduplicates by entity_id, takes topK. + */ + public List> hybridGraphSearch(String query, int topK) throws SQLException { + List> local = localSearch(query, topK); + List> global = globalSearch(query, topK); + + Map> merged = new LinkedHashMap<>(); + + // Add local entity results with weight 0.6 + for (Map hit : local) { + String entityId = (String) hit.get("entity_id"); + Map entry = new LinkedHashMap<>(hit); + entry.put("score", (double) hit.get("score") * 0.6); + entry.put("match_type", "GRAPH_HYBRID"); + merged.put(entityId, entry); + } + + // Add global relationship endpoints with weight 0.4 + for (Map hit : global) { + double weightedScore = (double) hit.get("score") * 0.4; + String sourceId = (String) hit.get("source_entity_id"); + String targetId = (String) hit.get("target_entity_id"); + + mergeEntityFromRelationship(merged, sourceId, weightedScore, hit); + mergeEntityFromRelationship(merged, targetId, weightedScore, hit); + } + + List> results = new ArrayList<>(merged.values()); + results.sort((a, b) -> Double.compare((double) b.get("score"), (double) a.get("score"))); + return results.size() > topK ? results.subList(0, topK) : results; + } + + /** + * Mix search: combine graph search + chunk vector search. + * Graph results weighted 0.5, chunk results weighted 0.5. + * Deduplicates by artifact_id. + */ + public List> mixSearch(String query, int topK) throws SQLException { + List> graphResults = hybridGraphSearch(query, topK); + List> chunkResults = chunkSearch(query, topK); + + Map> merged = new LinkedHashMap<>(); + + // Graph results contribute via source_artifact_ids + for (Map hit : graphResults) { + String sourceFiles = (String) hit.get("source_files"); + if (sourceFiles == null) continue; + double weightedScore = (double) hit.get("score") * 0.5; + // Parse artifact IDs from JSON array string + for (String artId : parseJsonArray(sourceFiles)) { + if (merged.containsKey(artId)) { + Map existing = merged.get(artId); + existing.put("score", (double) existing.get("score") + weightedScore); + existing.put("match_type", "MIX"); + } else { + Map entry = new LinkedHashMap<>(); + entry.put("artifact_id", artId); + entry.put("score", weightedScore); + entry.put("match_type", "MIX"); + entry.put("graph_entity", hit.get("entity_name")); + entry.put("graph_description", hit.get("description")); + merged.put(artId, entry); + } + } + } + + // Chunk results contribute directly + for (Map hit : chunkResults) { + String artId = (String) hit.get("artifact_id"); + double weightedScore = (double) hit.get("score") * 0.5; + if (merged.containsKey(artId)) { + Map existing = merged.get(artId); + existing.put("score", (double) existing.get("score") + weightedScore); + existing.put("match_type", "MIX"); + if (!existing.containsKey("preview")) { + existing.put("preview", hit.get("preview")); + existing.put("file_name", hit.get("file_name")); + } + } else { + Map entry = new LinkedHashMap<>(hit); + entry.put("score", weightedScore); + entry.put("match_type", "MIX"); + merged.put(artId, entry); + } + } + + List> results = new ArrayList<>(merged.values()); + results.sort((a, b) -> Double.compare((double) b.get("score"), (double) a.get("score"))); + return results.size() > topK ? results.subList(0, topK) : results; + } + + // ── Private helpers ──────────────────────────────────────────────────── + + List> chunkSearch(String query, int topK) throws SQLException { + double[] queryEmb = embeddingService.embed(query); + Connection conn = dataSource.getConnection(); + List> scored = new ArrayList<>(); + + try (PreparedStatement ps = conn.prepareStatement(""" + SELECT ce.chunk_id, ce.embedding, ac.chunk_text, ac.artifact_id, + ac.line_start, ac.line_end, a.file_name + FROM chunk_embeddings ce + JOIN artifact_chunks ac ON ce.chunk_id = ac.chunk_id + JOIN artifacts a ON ac.artifact_id = a.artifact_id + WHERE a.status = 'INDEXED' + AND COALESCE(a.freshness, 'current') != 'superseded' + """)) { + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + double[] emb = extractEmbedding(rs); + if (emb == null) continue; + double sim = cosineSimilarity(queryEmb, emb); + if (sim > 0.01) { + Map hit = new LinkedHashMap<>(); + hit.put("chunk_id", rs.getString("chunk_id")); + hit.put("artifact_id", rs.getString("artifact_id")); + String text = rs.getString("chunk_text"); + hit.put("preview", text != null && text.length() > 200 + ? text.substring(0, 200) + "..." : text); + hit.put("file_name", rs.getString("file_name")); + hit.put("line_start", rs.getObject("line_start")); + hit.put("line_end", rs.getObject("line_end")); + hit.put("score", sim); + hit.put("match_type", "CHUNK"); + scored.add(hit); + } + } + } + } + + scored.sort((a, b) -> Double.compare((double) b.get("score"), (double) a.get("score"))); + return scored.size() > topK ? scored.subList(0, topK) : scored; + } + + private void mergeEntityFromRelationship(Map> merged, + String entityId, double weightedScore, + Map relHit) { + if (entityId == null) return; + if (merged.containsKey(entityId)) { + Map existing = merged.get(entityId); + existing.put("score", (double) existing.get("score") + weightedScore); + existing.put("match_type", "GRAPH_HYBRID"); + } else { + Map entry = new LinkedHashMap<>(); + entry.put("entity_id", entityId); + entry.put("score", weightedScore); + entry.put("match_type", "GRAPH_HYBRID"); + entry.put("source_files", relHit.get("source_files")); + // Try to fetch entity details + try { + Map entity = knowledgeGraphService.getEntity(entityId); + if (entity != null) { + entry.put("entity_name", entity.get("entity_name")); + entry.put("entity_type", entity.get("entity_type")); + entry.put("description", entity.get("description")); + } + } catch (SQLException e) { + log.warn("Failed to fetch entity {}", entityId, e); + } + merged.put(entityId, entry); + } + } + + private void enrichRelationshipWithEntityNames(Map hit) { + try { + String sourceId = (String) hit.get("source_entity_id"); + String targetId = (String) hit.get("target_entity_id"); + Map sourceEntity = knowledgeGraphService.getEntity(sourceId); + Map targetEntity = knowledgeGraphService.getEntity(targetId); + if (sourceEntity != null) { + hit.put("source_entity_name", sourceEntity.get("entity_name")); + hit.put("source_entity_type", sourceEntity.get("entity_type")); + } + if (targetEntity != null) { + hit.put("target_entity_name", targetEntity.get("entity_name")); + hit.put("target_entity_type", targetEntity.get("entity_type")); + } + } catch (SQLException e) { + log.warn("Failed to enrich relationship entity names", e); + } + } + + static List parseJsonArray(String jsonArray) { + if (jsonArray == null || jsonArray.isBlank()) return List.of(); + // Simple JSON array parser: ["val1","val2"] + String stripped = jsonArray.trim(); + if (stripped.equals("[]")) return List.of(); + stripped = stripped.substring(1, stripped.length() - 1); // remove [ ] + List result = new ArrayList<>(); + for (String token : stripped.split(",")) { + String val = token.trim().replace("\"", ""); + if (!val.isEmpty()) result.add(val); + } + return result; + } + + private double[] extractEmbedding(ResultSet rs) throws SQLException { + Object embObj = rs.getObject("embedding"); + if (embObj == null) return null; + + if (embObj instanceof double[] arr) { + return arr; + } + + if (embObj instanceof Object[] objArr) { + double[] result = new double[objArr.length]; + for (int i = 0; i < objArr.length; i++) { + result[i] = ((Number) objArr[i]).doubleValue(); + } + return result; + } + + if (embObj instanceof java.sql.Array sqlArray) { + Object[] arr = (Object[]) sqlArray.getArray(); + double[] result = new double[arr.length]; + for (int i = 0; i < arr.length; i++) { + result[i] = ((Number) arr[i]).doubleValue(); + } + return result; + } + + log.warn("Unexpected embedding type: {}", embObj.getClass()); + return null; + } + + static double cosineSimilarity(double[] a, double[] b) { + if (a == null || b == null || a.length != b.length) return 0.0; + double dot = 0, normA = 0, normB = 0; + for (int i = 0; i < a.length; i++) { + dot += a[i] * b[i]; + normA += a[i] * a[i]; + normB += b[i] * b[i]; + } + if (normA == 0 || normB == 0) return 0; + return dot / (Math.sqrt(normA) * Math.sqrt(normB)); + } +} diff --git a/src/main/java/com/javaducker/server/service/GraphUpdateService.java b/src/main/java/com/javaducker/server/service/GraphUpdateService.java new file mode 100644 index 0000000..d7a453a --- /dev/null +++ b/src/main/java/com/javaducker/server/service/GraphUpdateService.java @@ -0,0 +1,70 @@ +package com.javaducker.server.service; + +import com.javaducker.server.db.DuckDBDataSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.sql.*; +import java.util.*; + +@Service +public class GraphUpdateService { + private static final Logger log = LoggerFactory.getLogger(GraphUpdateService.class); + private final DuckDBDataSource dataSource; + private final KnowledgeGraphService knowledgeGraphService; + + public GraphUpdateService(DuckDBDataSource dataSource, KnowledgeGraphService knowledgeGraphService) { + this.dataSource = dataSource; + this.knowledgeGraphService = knowledgeGraphService; + } + + /** + * Called after a file is re-indexed. Removes graph data sourced solely from this artifact. + * Entities shared with other artifacts survive with decremented counts. + * Returns stats about what was removed/updated. + */ + public Map onArtifactReindexed(String artifactId) throws SQLException { + return knowledgeGraphService.deleteEntitiesForArtifact(artifactId); + } + + /** + * Called when a file is deleted. Same as reindexed but permanent. + */ + public Map onArtifactDeleted(String artifactId) throws SQLException { + return knowledgeGraphService.deleteEntitiesForArtifact(artifactId); + } + + /** + * Find entities/relationships that reference artifacts which have been + * re-indexed since the entity was last updated. + * Compare entities.updated_at with artifacts.indexed_at. + */ + public List> findStaleGraphEntries() throws SQLException { + Connection conn = dataSource.getConnection(); + List> stale = new ArrayList<>(); + + // Find entities whose source artifacts have been re-indexed after the entity was created + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(""" + SELECT e.entity_id, e.entity_name, e.entity_type, e.source_artifact_ids, e.updated_at + FROM entities e + WHERE EXISTS ( + SELECT 1 FROM artifacts a + WHERE e.source_artifact_ids LIKE '%' || a.artifact_id || '%' + AND a.indexed_at > e.updated_at + ) + """)) { + while (rs.next()) { + Map entry = new LinkedHashMap<>(); + entry.put("entity_id", rs.getString("entity_id")); + entry.put("entity_name", rs.getString("entity_name")); + entry.put("entity_type", rs.getString("entity_type")); + entry.put("source_artifact_ids", rs.getString("source_artifact_ids")); + entry.put("reason", "source artifact re-indexed after entity creation"); + stale.add(entry); + } + } + return stale; + } +} diff --git a/src/main/java/com/javaducker/server/service/KnowledgeGraphService.java b/src/main/java/com/javaducker/server/service/KnowledgeGraphService.java new file mode 100644 index 0000000..cde8f48 --- /dev/null +++ b/src/main/java/com/javaducker/server/service/KnowledgeGraphService.java @@ -0,0 +1,688 @@ +package com.javaducker.server.service; + +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.ingestion.EmbeddingService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.sql.*; +import java.util.*; + +@Service +public class KnowledgeGraphService { + + private static final Logger log = LoggerFactory.getLogger(KnowledgeGraphService.class); + private final DuckDBDataSource dataSource; + private final EmbeddingService embeddingService; + + public KnowledgeGraphService(DuckDBDataSource dataSource, EmbeddingService embeddingService) { + this.dataSource = dataSource; + this.embeddingService = embeddingService; + } + + public Map upsertEntity(String entityName, String entityType, String description, + String artifactId, String chunkId) throws SQLException { + Connection conn = dataSource.getConnection(); + // Check if entity with same name+type exists + String existingId = null; + Map existing = null; + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entities WHERE entity_name = ? AND entity_type = ?")) { + ps.setString(1, entityName); + ps.setString(2, entityType); + try (ResultSet rs = ps.executeQuery()) { + if (rs.next()) { + existingId = rs.getString("entity_id"); + existing = rowToMap(rs); + } + } + } + + if (existing != null) { + String updatedArtifacts = appendToJsonArray((String) existing.get("source_artifact_ids"), artifactId); + String updatedChunks = chunkId != null + ? appendToJsonArray((String) existing.get("source_chunk_ids"), chunkId) + : (String) existing.get("source_chunk_ids"); + int newCount = ((Number) existing.get("mention_count")).intValue() + 1; + String newDesc = description != null && (existing.get("description") == null + || description.length() > ((String) existing.get("description")).length()) + ? description : (String) existing.get("description"); + + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entities WHERE entity_id = '" + esc(existingId) + "'"); + } + double[] emb = newDesc != null ? embeddingService.embed(newDesc) : null; + insertEntity(conn, existingId, entityName, entityType, newDesc, + (String) existing.get("summary"), updatedArtifacts, updatedChunks, newCount, emb); + return Map.of("entity_id", existingId, "entity_name", entityName, + "entity_type", entityType, "mention_count", newCount, "action", "merged"); + } + + // New entity + String entityId = entityType.toLowerCase() + "-" + slugify(entityName); + double[] emb = description != null ? embeddingService.embed(description) : null; + String artifactIds = appendToJsonArray(null, artifactId); + String chunkIds = chunkId != null ? appendToJsonArray(null, chunkId) : null; + insertEntity(conn, entityId, entityName, entityType, description, null, artifactIds, chunkIds, 1, emb); + return Map.of("entity_id", entityId, "entity_name", entityName, + "entity_type", entityType, "mention_count", 1, "action", "created"); + } + + public Map upsertRelationship(String sourceEntityId, String targetEntityId, + String relationshipType, String description, + String artifactId, String chunkId, + double weight) throws SQLException { + Connection conn = dataSource.getConnection(); + String relId = slugify(sourceEntityId + "-" + relationshipType + "-" + targetEntityId); + + Map existing = null; + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entity_relationships WHERE source_entity_id = ? AND target_entity_id = ? AND relationship_type = ?")) { + ps.setString(1, sourceEntityId); + ps.setString(2, targetEntityId); + ps.setString(3, relationshipType); + try (ResultSet rs = ps.executeQuery()) { + if (rs.next()) { + existing = rowToMap(rs); + relId = rs.getString("relationship_id"); + } + } + } + + if (existing != null) { + String updatedArtifacts = appendToJsonArray((String) existing.get("source_artifact_ids"), artifactId); + String updatedChunks = chunkId != null + ? appendToJsonArray((String) existing.get("source_chunk_ids"), chunkId) + : (String) existing.get("source_chunk_ids"); + double newWeight = ((Number) existing.get("weight")).doubleValue() + weight; + + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_relationships WHERE relationship_id = '" + esc(relId) + "'"); + } + double[] emb = description != null ? embeddingService.embed(description) : null; + insertRelationship(conn, relId, sourceEntityId, targetEntityId, relationshipType, + description, newWeight, updatedArtifacts, updatedChunks, emb); + return Map.of("relationship_id", relId, "action", "merged", "weight", newWeight); + } + + double[] emb = description != null ? embeddingService.embed(description) : null; + String artifactIds = appendToJsonArray(null, artifactId); + String chunkIds = chunkId != null ? appendToJsonArray(null, chunkId) : null; + insertRelationship(conn, relId, sourceEntityId, targetEntityId, relationshipType, + description, weight, artifactIds, chunkIds, emb); + return Map.of("relationship_id", relId, "action", "created", "weight", weight); + } + + public Map getEntity(String entityId) throws SQLException { + Connection conn = dataSource.getConnection(); + try (PreparedStatement ps = conn.prepareStatement("SELECT * FROM entities WHERE entity_id = ?")) { + ps.setString(1, entityId); + try (ResultSet rs = ps.executeQuery()) { + if (rs.next()) return rowToMap(rs); + } + } + return null; + } + + public List> findEntitiesByName(String namePattern) throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entities WHERE LOWER(entity_name) LIKE LOWER(?)")) { + ps.setString(1, "%" + namePattern + "%"); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) results.add(rowToMap(rs)); + } + } + return results; + } + + public List> findEntitiesByType(String entityType) throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entities WHERE entity_type = ?")) { + ps.setString(1, entityType); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) results.add(rowToMap(rs)); + } + } + return results; + } + + public List> getRelationships(String entityId) throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entity_relationships WHERE source_entity_id = ? OR target_entity_id = ?")) { + ps.setString(1, entityId); + ps.setString(2, entityId); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) results.add(rowToMap(rs)); + } + } + return results; + } + + public Map getNeighborhood(String entityId, int depth) throws SQLException { + Connection conn = dataSource.getConnection(); + Set visited = new LinkedHashSet<>(); + visited.add(entityId); + Set frontier = new HashSet<>(); + frontier.add(entityId); + List> edges = new ArrayList<>(); + + for (int level = 0; level < depth && !frontier.isEmpty(); level++) { + Set nextFrontier = new HashSet<>(); + for (String nodeId : frontier) { + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entity_relationships WHERE source_entity_id = ? OR target_entity_id = ?")) { + ps.setString(1, nodeId); + ps.setString(2, nodeId); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + Map edge = rowToMap(rs); + edges.add(edge); + String src = (String) edge.get("source_entity_id"); + String tgt = (String) edge.get("target_entity_id"); + String other = src.equals(nodeId) ? tgt : src; + if (!visited.contains(other)) { + visited.add(other); + nextFrontier.add(other); + } + } + } + } + } + frontier = nextFrontier; + } + + // Fetch full entity details for all visited nodes + List> nodes = new ArrayList<>(); + for (String nodeId : visited) { + Map entity = getEntity(nodeId); + if (entity != null) nodes.add(entity); + } + + // Deduplicate edges by relationship_id + Map> uniqueEdges = new LinkedHashMap<>(); + for (Map edge : edges) { + uniqueEdges.putIfAbsent((String) edge.get("relationship_id"), edge); + } + + return Map.of("nodes", nodes, "edges", new ArrayList<>(uniqueEdges.values())); + } + + public Map getPath(String fromEntityId, String toEntityId) throws SQLException { + Connection conn = dataSource.getConnection(); + Map parentMap = new LinkedHashMap<>(); + Map> parentEdge = new LinkedHashMap<>(); + Set visited = new HashSet<>(); + Queue queue = new LinkedList<>(); + queue.add(fromEntityId); + visited.add(fromEntityId); + boolean found = false; + + while (!queue.isEmpty() && !found) { + String current = queue.poll(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entity_relationships WHERE source_entity_id = ? OR target_entity_id = ?")) { + ps.setString(1, current); + ps.setString(2, current); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + Map edge = rowToMap(rs); + String src = (String) edge.get("source_entity_id"); + String tgt = (String) edge.get("target_entity_id"); + String neighbor = src.equals(current) ? tgt : src; + if (!visited.contains(neighbor)) { + visited.add(neighbor); + parentMap.put(neighbor, current); + parentEdge.put(neighbor, edge); + if (neighbor.equals(toEntityId)) { + found = true; + break; + } + queue.add(neighbor); + } + } + } + } + } + + if (!found) return Map.of("found", false, "path", List.of(), "edges", List.of()); + + // Reconstruct path + List path = new ArrayList<>(); + List> pathEdges = new ArrayList<>(); + String node = toEntityId; + while (node != null) { + path.add(node); + if (parentEdge.containsKey(node)) pathEdges.add(parentEdge.get(node)); + node = parentMap.get(node); + } + Collections.reverse(path); + Collections.reverse(pathEdges); + return Map.of("found", true, "path", path, "edges", pathEdges); + } + + public List> getEntitiesForArtifact(String artifactId) throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT entity_id, entity_name, entity_type, description, mention_count " + + "FROM entities WHERE source_artifact_ids LIKE ?")) { + ps.setString(1, "%\"" + artifactId + "\"%"); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + Map row = new LinkedHashMap<>(); + row.put("entity_id", rs.getString("entity_id")); + row.put("entity_name", rs.getString("entity_name")); + row.put("entity_type", rs.getString("entity_type")); + row.put("description", rs.getString("description")); + row.put("mention_count", rs.getInt("mention_count")); + results.add(row); + } + } + } + return results; + } + + public Map getStats() throws SQLException { + Connection conn = dataSource.getConnection(); + Map stats = new LinkedHashMap<>(); + try (Statement stmt = conn.createStatement()) { + try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) AS cnt FROM entities")) { + rs.next(); + stats.put("entity_count", rs.getLong("cnt")); + } + try (ResultSet rs = stmt.executeQuery("SELECT COUNT(*) AS cnt FROM entity_relationships")) { + rs.next(); + stats.put("relationship_count", rs.getLong("cnt")); + } + try (ResultSet rs = stmt.executeQuery( + "SELECT entity_type, COUNT(*) AS cnt FROM entities GROUP BY entity_type ORDER BY cnt DESC LIMIT 10")) { + List> topTypes = new ArrayList<>(); + while (rs.next()) { + topTypes.add(Map.of("type", rs.getString("entity_type"), "count", rs.getLong("cnt"))); + } + stats.put("top_types", topTypes); + } + } + return stats; + } + + public Map deleteEntitiesForArtifact(String artifactId) throws SQLException { + Connection conn = dataSource.getConnection(); + int deletedEntities = 0; + int deletedRelationships = 0; + int updatedEntities = 0; + + // Find entities sourced from this artifact + List> entities = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entities WHERE source_artifact_ids LIKE ?")) { + ps.setString(1, "%\"" + artifactId + "\"%"); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) entities.add(rowToMap(rs)); + } + } + + for (Map entity : entities) { + String eid = (String) entity.get("entity_id"); + String sources = (String) entity.get("source_artifact_ids"); + String updated = removeFromJsonArray(sources, artifactId); + int newCount = ((Number) entity.get("mention_count")).intValue() - 1; + + if (updated == null || updated.equals("[]") || newCount <= 0) { + // Delete entity and its relationships + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_relationships WHERE source_entity_id = '" + esc(eid) + "' OR target_entity_id = '" + esc(eid) + "'"); + } + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entities WHERE entity_id = '" + esc(eid) + "'"); + } + deletedEntities++; + // Count deleted relationships + deletedRelationships += getRelationshipCountForDeletedEntity(entity); + } else { + // Update entity with decremented count + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entities WHERE entity_id = '" + esc(eid) + "'"); + } + double[] emb = entity.get("description") != null + ? embeddingService.embed((String) entity.get("description")) : null; + insertEntity(conn, eid, (String) entity.get("entity_name"), + (String) entity.get("entity_type"), (String) entity.get("description"), + (String) entity.get("summary"), updated, + (String) entity.get("source_chunk_ids"), newCount, emb); + updatedEntities++; + } + } + + // Also clean relationships sourced only from this artifact + List> rels = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT * FROM entity_relationships WHERE source_artifact_ids LIKE ?")) { + ps.setString(1, "%\"" + artifactId + "\"%"); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) rels.add(rowToMap(rs)); + } + } + for (Map rel : rels) { + String rid = (String) rel.get("relationship_id"); + String sources = (String) rel.get("source_artifact_ids"); + String updated = removeFromJsonArray(sources, artifactId); + if (updated == null || updated.equals("[]")) { + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_relationships WHERE relationship_id = '" + esc(rid) + "'"); + } + deletedRelationships++; + } + } + + return Map.of("deleted_entities", deletedEntities, "deleted_relationships", deletedRelationships, + "updated_entities", updatedEntities); + } + + public Map mergeEntities(String sourceEntityId, String targetEntityId, + String mergedDescription) throws SQLException { + Connection conn = dataSource.getConnection(); + Map source = getEntity(sourceEntityId); + Map target = getEntity(targetEntityId); + if (source == null || target == null) { + return Map.of("error", "One or both entities not found"); + } + + // Merge metadata + String mergedArtifacts = mergeJsonArrays( + (String) source.get("source_artifact_ids"), (String) target.get("source_artifact_ids")); + String mergedChunks = mergeJsonArrays( + (String) source.get("source_chunk_ids"), (String) target.get("source_chunk_ids")); + int mergedCount = ((Number) source.get("mention_count")).intValue() + + ((Number) target.get("mention_count")).intValue(); + + // Rewire relationships from source to target + int rewired = 0; + List> sourceRels = getRelationships(sourceEntityId); + for (Map rel : sourceRels) { + String rid = (String) rel.get("relationship_id"); + String src = (String) rel.get("source_entity_id"); + String tgt = (String) rel.get("target_entity_id"); + String newSrc = src.equals(sourceEntityId) ? targetEntityId : src; + String newTgt = tgt.equals(sourceEntityId) ? targetEntityId : tgt; + // Skip self-loops + if (newSrc.equals(newTgt)) { + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_relationships WHERE relationship_id = '" + esc(rid) + "'"); + } + continue; + } + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entity_relationships WHERE relationship_id = '" + esc(rid) + "'"); + } + double[] emb = rel.get("description") != null + ? embeddingService.embed((String) rel.get("description")) : null; + insertRelationship(conn, rid, newSrc, newTgt, + (String) rel.get("relationship_type"), (String) rel.get("description"), + ((Number) rel.get("weight")).doubleValue(), + (String) rel.get("source_artifact_ids"), (String) rel.get("source_chunk_ids"), emb); + rewired++; + } + + // Delete source entity + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entities WHERE entity_id = '" + esc(sourceEntityId) + "'"); + } + + // Update target entity + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entities WHERE entity_id = '" + esc(targetEntityId) + "'"); + } + double[] emb = mergedDescription != null ? embeddingService.embed(mergedDescription) : null; + insertEntity(conn, targetEntityId, (String) target.get("entity_name"), + (String) target.get("entity_type"), mergedDescription, + (String) target.get("summary"), mergedArtifacts, mergedChunks, mergedCount, emb); + + return Map.of("merged_into", targetEntityId, "source_deleted", sourceEntityId, + "relationships_rewired", rewired, "mention_count", mergedCount); + } + + // ── Duplicate detection (Chapter 5) ───────────────────────────────────── + + public List> findDuplicateCandidates() throws SQLException { + Connection conn = dataSource.getConnection(); + List> allEntities = new ArrayList<>(); + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery("SELECT entity_id, entity_name, entity_type FROM entities")) { + while (rs.next()) { + allEntities.add(Map.of( + "entity_id", rs.getString("entity_id"), + "entity_name", rs.getString("entity_name"), + "entity_type", rs.getString("entity_type"))); + } + } + + List> candidates = new ArrayList<>(); + for (int i = 0; i < allEntities.size(); i++) { + for (int j = i + 1; j < allEntities.size(); j++) { + Map a = allEntities.get(i); + Map b = allEntities.get(j); + String nameA = (String) a.get("entity_name"); + String nameB = (String) b.get("entity_name"); + + double confidence = 0; + String reason = null; + + if (nameA.equalsIgnoreCase(nameB) && !nameA.equals(nameB)) { + confidence = 1.0; + reason = "exact name match (case-insensitive)"; + } else if (!nameA.equalsIgnoreCase(nameB)) { + int dist = levenshteinDistance(nameA.toLowerCase(), nameB.toLowerCase()); + if (dist > 0 && dist <= 2) { + confidence = 0.8; + reason = "Levenshtein distance " + dist; + } + } + + if (confidence > 0) { + Map candidate = new LinkedHashMap<>(); + candidate.put("source_entity_id", a.get("entity_id")); + candidate.put("target_entity_id", b.get("entity_id")); + candidate.put("source_name", nameA); + candidate.put("target_name", nameB); + candidate.put("confidence", confidence); + candidate.put("reason", reason); + candidates.add(candidate); + } + } + } + + candidates.sort((x, y) -> Double.compare((double) y.get("confidence"), (double) x.get("confidence"))); + if (candidates.size() > 50) candidates = new ArrayList<>(candidates.subList(0, 50)); + return candidates; + } + + public List> findMergeCandidates(String entityId) throws SQLException { + Connection conn = dataSource.getConnection(); + double[] targetEmb = null; + try (PreparedStatement ps = conn.prepareStatement("SELECT embedding FROM entities WHERE entity_id = ?")) { + ps.setString(1, entityId); + try (ResultSet rs = ps.executeQuery()) { + if (rs.next()) targetEmb = extractEmbedding(rs.getObject("embedding")); + } + } + if (targetEmb == null) return List.of(); + + List> candidates = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT entity_id, entity_name, entity_type, embedding FROM entities WHERE entity_id != ?")) { + ps.setString(1, entityId); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + double[] otherEmb = extractEmbedding(rs.getObject("embedding")); + if (otherEmb == null) continue; + double sim = cosineSimilarity(targetEmb, otherEmb); + if (sim > 0.85) { + Map candidate = new LinkedHashMap<>(); + candidate.put("entity_id", rs.getString("entity_id")); + candidate.put("entity_name", rs.getString("entity_name")); + candidate.put("entity_type", rs.getString("entity_type")); + candidate.put("similarity", sim); + candidates.add(candidate); + } + } + } + } + candidates.sort((a, b) -> Double.compare((double) b.get("similarity"), (double) a.get("similarity"))); + return candidates; + } + + static int levenshteinDistance(String a, String b) { + int[][] dp = new int[a.length() + 1][b.length() + 1]; + for (int i = 0; i <= a.length(); i++) dp[i][0] = i; + for (int j = 0; j <= b.length(); j++) dp[0][j] = j; + for (int i = 1; i <= a.length(); i++) { + for (int j = 1; j <= b.length(); j++) { + int cost = a.charAt(i - 1) == b.charAt(j - 1) ? 0 : 1; + dp[i][j] = Math.min(Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1), dp[i - 1][j - 1] + cost); + } + } + return dp[a.length()][b.length()]; + } + + static double cosineSimilarity(double[] a, double[] b) { + if (a.length != b.length) return 0; + double dot = 0, normA = 0, normB = 0; + for (int i = 0; i < a.length; i++) { + dot += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; + } + double denom = Math.sqrt(normA) * Math.sqrt(normB); + return denom == 0 ? 0 : dot / denom; + } + + private double[] extractEmbedding(Object embObj) { + if (embObj == null) return null; + if (embObj instanceof double[] arr) return arr; + if (embObj instanceof Object[] objArr) { + double[] result = new double[objArr.length]; + for (int i = 0; i < objArr.length; i++) result[i] = ((Number) objArr[i]).doubleValue(); + return result; + } + if (embObj instanceof java.sql.Array sqlArr) { + try { + Object arr = sqlArr.getArray(); + return extractEmbedding(arr); + } catch (Exception e) { return null; } + } + return null; + } + + // ── Private helpers ──────────────────────────────────────────────────── + + private void insertEntity(Connection conn, String entityId, String entityName, String entityType, + String description, String summary, String sourceArtifactIds, + String sourceChunkIds, int mentionCount, double[] embedding) throws SQLException { + String embSql = embeddingToSql(embedding); + String sql = "INSERT INTO entities (entity_id, entity_name, entity_type, description, summary, " + + "source_artifact_ids, source_chunk_ids, mention_count, embedding, created_at, updated_at) " + + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, " + embSql + ", CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)"; + try (PreparedStatement ps = conn.prepareStatement(sql)) { + ps.setString(1, entityId); + ps.setString(2, entityName); + ps.setString(3, entityType); + ps.setString(4, description); + ps.setString(5, summary); + ps.setString(6, sourceArtifactIds); + ps.setString(7, sourceChunkIds); + ps.setInt(8, mentionCount); + ps.executeUpdate(); + } + } + + private void insertRelationship(Connection conn, String relId, String sourceEntityId, + String targetEntityId, String relType, String description, + double weight, String sourceArtifactIds, String sourceChunkIds, + double[] embedding) throws SQLException { + String embSql = embeddingToSql(embedding); + String sql = "INSERT INTO entity_relationships (relationship_id, source_entity_id, target_entity_id, " + + "relationship_type, description, weight, source_artifact_ids, source_chunk_ids, " + + "embedding, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, " + embSql + ", CURRENT_TIMESTAMP)"; + try (PreparedStatement ps = conn.prepareStatement(sql)) { + ps.setString(1, relId); + ps.setString(2, sourceEntityId); + ps.setString(3, targetEntityId); + ps.setString(4, relType); + ps.setString(5, description); + ps.setDouble(6, weight); + ps.setString(7, sourceArtifactIds); + ps.setString(8, sourceChunkIds); + ps.executeUpdate(); + } + } + + private String embeddingToSql(double[] embedding) { + if (embedding == null) return "NULL"; + StringBuilder sb = new StringBuilder("["); + for (int i = 0; i < embedding.length; i++) { + if (i > 0) sb.append(","); + sb.append(embedding[i]); + } + sb.append("]::DOUBLE[]"); + return sb.toString(); + } + + private Map rowToMap(ResultSet rs) throws SQLException { + Map map = new LinkedHashMap<>(); + ResultSetMetaData meta = rs.getMetaData(); + for (int i = 1; i <= meta.getColumnCount(); i++) { + String col = meta.getColumnName(i).toLowerCase(); + if (col.equals("embedding")) continue; // skip large arrays in map output + map.put(col, rs.getObject(i)); + } + return map; + } + + private String slugify(String s) { + return s.toLowerCase().replaceAll("[^a-z0-9]+", "-").replaceAll("-+", "-").replaceAll("^-|-$", ""); + } + + private String appendToJsonArray(String existing, String value) { + if (value == null) return existing; + if (existing == null || existing.isBlank()) return "[\"" + value + "\"]"; + if (existing.contains("\"" + value + "\"")) return existing; + return existing.substring(0, existing.length() - 1) + ",\"" + value + "\"]"; + } + + private String removeFromJsonArray(String jsonArray, String value) { + if (jsonArray == null || jsonArray.isBlank()) return null; + String result = jsonArray.replace(",\"" + value + "\"", "").replace("\"" + value + "\",", "") + .replace("[\"" + value + "\"]", "[]").replace("\"" + value + "\"", ""); + // Clean up leftover commas + result = result.replace("[,", "[").replace(",]", "]"); + return result; + } + + private String mergeJsonArrays(String a, String b) { + if (a == null || a.isBlank()) return b; + if (b == null || b.isBlank()) return a; + // Simple merge: parse values from b and append to a + String result = a; + String stripped = b.substring(1, b.length() - 1); // remove [ ] + for (String token : stripped.split(",")) { + String val = token.trim().replace("\"", ""); + if (!val.isEmpty()) result = appendToJsonArray(result, val); + } + return result; + } + + private int getRelationshipCountForDeletedEntity(Map entity) { + // Approximate: we already deleted, just return 0 for bookkeeping + return 0; + } + + private String esc(String s) { + return s.replace("'", "''"); + } +} diff --git a/src/main/java/com/javaducker/server/service/SemanticTagService.java b/src/main/java/com/javaducker/server/service/SemanticTagService.java new file mode 100644 index 0000000..295e3f0 --- /dev/null +++ b/src/main/java/com/javaducker/server/service/SemanticTagService.java @@ -0,0 +1,283 @@ +package com.javaducker.server.service; + +import com.javaducker.server.db.DuckDBDataSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.sql.*; +import java.util.*; + +@Service +public class SemanticTagService { + + private static final Logger log = LoggerFactory.getLogger(SemanticTagService.class); + private static final Set VALID_CATEGORIES = Set.of( + "functional", "architectural", "domain", "pattern", "concern"); + + private final DuckDBDataSource dataSource; + + public SemanticTagService(DuckDBDataSource dataSource) { + this.dataSource = dataSource; + } + + public Map writeTags(String artifactId, List> tags) throws SQLException { + if (tags.size() < 4 || tags.size() > 10) { + throw new IllegalArgumentException( + "Tags count must be between 4 and 10, got " + tags.size()); + } + for (Map t : tags) { + String category = (String) t.get("category"); + if (category == null || !VALID_CATEGORIES.contains(category)) { + throw new IllegalArgumentException( + "Invalid category: " + category + ". Valid: " + VALID_CATEGORIES); + } + } + + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM artifact_semantic_tags WHERE artifact_id = '" + esc(artifactId) + "'"); + } + try (PreparedStatement ps = conn.prepareStatement( + "INSERT INTO artifact_semantic_tags (artifact_id, tag, category, confidence, rationale, source) VALUES (?, ?, ?, ?, ?, ?)")) { + for (Map t : tags) { + ps.setString(1, artifactId); + ps.setString(2, (String) t.get("tag")); + ps.setString(3, (String) t.get("category")); + ps.setDouble(4, t.containsKey("confidence") && t.get("confidence") != null + ? ((Number) t.get("confidence")).doubleValue() : 1.0); + ps.setString(5, (String) t.getOrDefault("rationale", null)); + ps.setString(6, (String) t.getOrDefault("source", "llm")); + ps.executeUpdate(); + } + } + return Map.of("artifact_id", artifactId, "tags_count", tags.size()); + } + + public List> findByTag(String tag) throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement(""" + SELECT st.artifact_id, st.tag, st.category, st.confidence, st.rationale, st.source, st.created_at, + a.file_name + FROM artifact_semantic_tags st + JOIN artifacts a ON st.artifact_id = a.artifact_id + WHERE st.tag = ? + ORDER BY st.confidence DESC + """)) { + ps.setString(1, tag); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + Map row = new LinkedHashMap<>(); + row.put("artifact_id", rs.getString("artifact_id")); + row.put("tag", rs.getString("tag")); + row.put("category", rs.getString("category")); + row.put("confidence", rs.getDouble("confidence")); + row.put("rationale", rs.getString("rationale")); + row.put("source", rs.getString("source")); + row.put("file_name", rs.getString("file_name")); + results.add(row); + } + } + } + return results; + } + + public List> findByCategory(String category) throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement(""" + SELECT st.artifact_id, st.tag, st.category, st.confidence, st.rationale, st.source, + a.file_name + FROM artifact_semantic_tags st + JOIN artifacts a ON st.artifact_id = a.artifact_id + WHERE st.category = ? + ORDER BY st.confidence DESC + """)) { + ps.setString(1, category); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + Map row = new LinkedHashMap<>(); + row.put("artifact_id", rs.getString("artifact_id")); + row.put("tag", rs.getString("tag")); + row.put("category", rs.getString("category")); + row.put("confidence", rs.getDouble("confidence")); + row.put("file_name", rs.getString("file_name")); + results.add(row); + } + } + } + return results; + } + + public List> searchByTags(List tags, boolean matchAll) throws SQLException { + if (tags == null || tags.isEmpty()) { + return List.of(); + } + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + + StringBuilder placeholders = new StringBuilder(); + for (int i = 0; i < tags.size(); i++) { + if (i > 0) placeholders.append(", "); + placeholders.append("?"); + } + + String sql; + if (matchAll) { + sql = """ + SELECT st.artifact_id, a.file_name, + STRING_AGG(st.tag, ', ') as matched_tags, + COUNT(DISTINCT st.tag) as match_count + FROM artifact_semantic_tags st + JOIN artifacts a ON st.artifact_id = a.artifact_id + WHERE st.tag IN (%s) + GROUP BY st.artifact_id, a.file_name + HAVING COUNT(DISTINCT st.tag) = ? + ORDER BY match_count DESC + """.formatted(placeholders); + } else { + sql = """ + SELECT st.artifact_id, a.file_name, + STRING_AGG(st.tag, ', ') as matched_tags, + COUNT(DISTINCT st.tag) as match_count + FROM artifact_semantic_tags st + JOIN artifacts a ON st.artifact_id = a.artifact_id + WHERE st.tag IN (%s) + GROUP BY st.artifact_id, a.file_name + ORDER BY match_count DESC + """.formatted(placeholders); + } + + try (PreparedStatement ps = conn.prepareStatement(sql)) { + int idx = 1; + for (String tag : tags) { + ps.setString(idx++, tag); + } + if (matchAll) { + ps.setInt(idx, tags.size()); + } + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + results.add(Map.of( + "artifact_id", rs.getString("artifact_id"), + "file_name", rs.getString("file_name"), + "matched_tags", rs.getString("matched_tags"), + "match_count", rs.getInt("match_count"))); + } + } + } + return results; + } + + public List> getTagsForArtifact(String artifactId) throws SQLException { + Connection conn = dataSource.getConnection(); + List> results = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT tag, category, confidence, rationale, source FROM artifact_semantic_tags WHERE artifact_id = ?")) { + ps.setString(1, artifactId); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + Map row = new LinkedHashMap<>(); + row.put("tag", rs.getString("tag")); + row.put("category", rs.getString("category")); + row.put("confidence", rs.getDouble("confidence")); + row.put("rationale", rs.getString("rationale")); + row.put("source", rs.getString("source")); + results.add(row); + } + } + } + return results; + } + + public Map getTagCloud() throws SQLException { + Connection conn = dataSource.getConnection(); + Map>> byCategory = new LinkedHashMap<>(); + int totalTags = 0; + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(""" + SELECT tag, category, COUNT(*) as count + FROM artifact_semantic_tags + GROUP BY tag, category + ORDER BY count DESC + """)) { + while (rs.next()) { + String category = rs.getString("category"); + byCategory.computeIfAbsent(category, k -> new ArrayList<>()) + .add(Map.of( + "tag", rs.getString("tag"), + "count", rs.getInt("count"))); + totalTags++; + } + } + Map result = new LinkedHashMap<>(); + result.put("categories", byCategory); + result.put("total_tags", totalTags); + return result; + } + + public List> suggestTags(String artifactId) throws SQLException { + Connection conn = dataSource.getConnection(); + + // Step 1: Get tags for this artifact + List myTags = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement( + "SELECT tag FROM artifact_semantic_tags WHERE artifact_id = ?")) { + ps.setString(1, artifactId); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + myTags.add(rs.getString("tag")); + } + } + } + if (myTags.isEmpty()) { + return List.of(); + } + + // Step 2: Find other artifacts sharing those tags + StringBuilder placeholders = new StringBuilder(); + for (int i = 0; i < myTags.size(); i++) { + if (i > 0) placeholders.append(", "); + placeholders.append("?"); + } + + // Step 3: Get tags from those artifacts that this artifact doesn't have + String sql = """ + SELECT st.tag, st.category, COUNT(*) as frequency + FROM artifact_semantic_tags st + WHERE st.artifact_id IN ( + SELECT DISTINCT artifact_id FROM artifact_semantic_tags + WHERE tag IN (%s) AND artifact_id != ? + ) + AND st.tag NOT IN (%s) + GROUP BY st.tag, st.category + ORDER BY frequency DESC + """.formatted(placeholders, placeholders); + + List> suggestions = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement(sql)) { + int idx = 1; + for (String tag : myTags) { + ps.setString(idx++, tag); + } + ps.setString(idx++, artifactId); + for (String tag : myTags) { + ps.setString(idx++, tag); + } + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + suggestions.add(Map.of( + "tag", rs.getString("tag"), + "category", rs.getString("category"), + "frequency", rs.getInt("frequency"))); + } + } + } + return suggestions; + } + + private String esc(String s) { + return s.replace("'", "''"); + } +} diff --git a/src/main/resources/application-mcp.yml b/src/main/resources/application-mcp.yml new file mode 100644 index 0000000..77cd3f4 --- /dev/null +++ b/src/main/resources/application-mcp.yml @@ -0,0 +1,14 @@ +# MCP stdio mode — for Claude Code integration +# Launch: java -jar target/javaducker-1.0.0.jar --spring.profiles.active=mcp +spring: + main: + web-application-type: none + ai: + mcp: + server: + stdio: true + +logging: + level: + root: WARN + com.javaducker: INFO diff --git a/src/main/resources/application-server.yml b/src/main/resources/application-server.yml new file mode 100644 index 0000000..ee3551b --- /dev/null +++ b/src/main/resources/application-server.yml @@ -0,0 +1,7 @@ +# REST API server mode (default behavior) +# Launch: java -jar target/javaducker-1.0.0.jar --spring.profiles.active=server +spring: + ai: + mcp: + server: + stdio: false diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 0593cbd..893a057 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -1,6 +1,14 @@ server: port: 8080 +spring: + ai: + mcp: + server: + name: javaducker + version: 1.0.0 + type: SYNC + javaducker: db-path: data/javaducker.duckdb intake-dir: temp/intake diff --git a/src/test/java/com/javaducker/integration/McpToolRegistrationTest.java b/src/test/java/com/javaducker/integration/McpToolRegistrationTest.java new file mode 100644 index 0000000..eb3809e --- /dev/null +++ b/src/test/java/com/javaducker/integration/McpToolRegistrationTest.java @@ -0,0 +1,187 @@ +package com.javaducker.integration; + +import com.javaducker.server.mcp.*; +import org.junit.jupiter.api.Test; +import org.springframework.ai.tool.annotation.Tool; + +import java.lang.reflect.Method; +import java.util.*; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Reflection-based test that verifies all MCP tool classes declare the expected + * {@link Tool} annotations with correct names and non-empty descriptions. + * No Spring context is loaded — this purely checks annotation metadata. + */ +class McpToolRegistrationTest { + + private static final List> TOOL_CLASSES = List.of( + CoreTools.class, + AnalysisTools.class, + WatchTools.class, + ContentIntelligenceTools.class, + ReladomoTools.class, + SessionTools.class, + SemanticTagTools.class, + KnowledgeGraphTools.class, + EnrichmentTools.class + ); + + private static final Set EXPECTED_TOOL_NAMES = Set.of( + // CoreTools (8) + "javaducker_health", "javaducker_index_file", "javaducker_index_directory", + "javaducker_search", "javaducker_get_file_text", "javaducker_get_artifact_status", + "javaducker_wait_for_indexed", "javaducker_stats", + // AnalysisTools (10) + "javaducker_explain", "javaducker_blame", "javaducker_related", + "javaducker_dependencies", "javaducker_dependents", "javaducker_map", + "javaducker_stale", "javaducker_index_health", "javaducker_summarize", + "javaducker_find_related", + // WatchTools (1) + "javaducker_watch", + // ContentIntelligenceTools (17) + "javaducker_classify", "javaducker_tag", "javaducker_extract_points", + "javaducker_set_freshness", "javaducker_synthesize", "javaducker_link_concepts", + "javaducker_enrich_queue", "javaducker_mark_enriched", "javaducker_latest", + "javaducker_find_by_type", "javaducker_find_by_tag", "javaducker_find_points", + "javaducker_concepts", "javaducker_concept_timeline", "javaducker_concept_health", + "javaducker_stale_content", "javaducker_synthesis", + // ReladomoTools (9) + "javaducker_reladomo_relationships", "javaducker_reladomo_graph", + "javaducker_reladomo_path", "javaducker_reladomo_schema", + "javaducker_reladomo_object_files", "javaducker_reladomo_finders", + "javaducker_reladomo_deepfetch", "javaducker_reladomo_temporal", + "javaducker_reladomo_config", + // SessionTools (5) + "javaducker_index_sessions", "javaducker_search_sessions", + "javaducker_session_context", "javaducker_extract_decisions", + "javaducker_recent_decisions", + // SemanticTagTools (4) + "javaducker_synthesize_tags", "javaducker_search_by_tags", + "javaducker_tag_cloud", "javaducker_suggest_tags", + // KnowledgeGraphTools (15) + "javaducker_extract_entities", "javaducker_get_entities", + "javaducker_merge_entities", "javaducker_delete_entities", + "javaducker_graph_stats", "javaducker_graph_neighborhood", + "javaducker_graph_path", "javaducker_graph_search", + "javaducker_merge_candidates", "javaducker_confirm_merge", + "javaducker_reindex_graph", "javaducker_graph_stale", + "javaducker_detect_communities", "javaducker_summarize_community", + "javaducker_communities", + // EnrichmentTools (3) + "javaducker_enrichment_pipeline", "javaducker_enrichment_status", + "javaducker_rebuild_graph" + ); + + private static final int EXPECTED_TOTAL = 72; + + // ---- helpers ---- + + private List findToolMethods(Class clazz) { + return Arrays.stream(clazz.getDeclaredMethods()) + .filter(m -> m.isAnnotationPresent(Tool.class)) + .collect(Collectors.toList()); + } + + private String toolName(Method m) { + return m.getAnnotation(Tool.class).name(); + } + + private String toolDescription(Method m) { + return m.getAnnotation(Tool.class).description(); + } + + // ---- tests ---- + + @Test + void totalToolCountIs49() { + long total = TOOL_CLASSES.stream() + .mapToLong(c -> findToolMethods(c).size()) + .sum(); + + assertEquals(EXPECTED_TOTAL, total, + "Expected " + EXPECTED_TOTAL + " @Tool methods across all tool classes but found " + total); + } + + @Test + void perClassToolCounts() { + Map expectedCounts = Map.of( + "CoreTools", 8, + "AnalysisTools", 10, + "WatchTools", 1, + "ContentIntelligenceTools", 17, + "ReladomoTools", 9, + "SessionTools", 5, + "SemanticTagTools", 4, + "KnowledgeGraphTools", 15, + "EnrichmentTools", 3 + ); + + for (Class clazz : TOOL_CLASSES) { + int actual = findToolMethods(clazz).size(); + int expected = expectedCounts.get(clazz.getSimpleName()); + assertEquals(expected, actual, + clazz.getSimpleName() + " should have " + expected + " @Tool methods but has " + actual); + } + } + + @Test + void allExpectedToolNamesAreRegistered() { + Set actualNames = TOOL_CLASSES.stream() + .flatMap(c -> findToolMethods(c).stream()) + .map(this::toolName) + .collect(Collectors.toSet()); + + Set missing = new TreeSet<>(EXPECTED_TOOL_NAMES); + missing.removeAll(actualNames); + assertTrue(missing.isEmpty(), + "Missing expected tool names: " + missing); + + Set unexpected = new TreeSet<>(actualNames); + unexpected.removeAll(EXPECTED_TOOL_NAMES); + assertTrue(unexpected.isEmpty(), + "Unexpected tool names found: " + unexpected); + } + + @Test + void everyToolHasNonEmptyDescription() { + List blanks = TOOL_CLASSES.stream() + .flatMap(c -> findToolMethods(c).stream()) + .filter(m -> toolDescription(m) == null || toolDescription(m).isBlank()) + .map(m -> m.getDeclaringClass().getSimpleName() + "." + m.getName()) + .collect(Collectors.toList()); + + assertTrue(blanks.isEmpty(), + "@Tool methods with blank description: " + blanks); + } + + @Test + void noToolNameDuplicates() { + List allNames = TOOL_CLASSES.stream() + .flatMap(c -> findToolMethods(c).stream()) + .map(this::toolName) + .collect(Collectors.toList()); + + Set seen = new HashSet<>(); + List duplicates = allNames.stream() + .filter(n -> !seen.add(n)) + .collect(Collectors.toList()); + + assertTrue(duplicates.isEmpty(), + "Duplicate tool names found: " + duplicates); + } + + @Test + void allToolNamesFollowNamingConvention() { + List violations = TOOL_CLASSES.stream() + .flatMap(c -> findToolMethods(c).stream()) + .map(this::toolName) + .filter(name -> !name.startsWith("javaducker_")) + .collect(Collectors.toList()); + + assertTrue(violations.isEmpty(), + "Tool names not following 'javaducker_' prefix convention: " + violations); + } +} diff --git a/src/test/java/com/javaducker/server/db/SchemaBootstrapTest.java b/src/test/java/com/javaducker/server/db/SchemaBootstrapTest.java index 09163a6..832366f 100644 --- a/src/test/java/com/javaducker/server/db/SchemaBootstrapTest.java +++ b/src/test/java/com/javaducker/server/db/SchemaBootstrapTest.java @@ -136,6 +136,64 @@ void createsContentIntelligenceIndices() throws SQLException { } } + @Test + void createsSemanticTagsTable() throws SQLException { + SchemaBootstrap bootstrap = createBootstrap(); + bootstrap.createSchema(); + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + assertDoesNotThrow(() -> stmt.executeQuery("SELECT COUNT(*) FROM artifact_semantic_tags")); + } + } + + @Test + void createsSemanticTagsIndices() throws SQLException { + SchemaBootstrap bootstrap = createBootstrap(); + bootstrap.createSchema(); + Connection conn = dataSource.getConnection(); + for (String idx : new String[]{"idx_semantic_tags_tag", "idx_semantic_tags_category"}) { + try (var ps = conn.prepareStatement( + "SELECT COUNT(*) FROM duckdb_indexes() WHERE index_name = ?")) { + ps.setString(1, idx); + try (ResultSet rs = ps.executeQuery()) { + rs.next(); + assertEquals(1, rs.getLong(1), "Index " + idx + " should exist"); + } + } + } + } + + @Test + void createsKnowledgeGraphTables() throws SQLException { + SchemaBootstrap bootstrap = createBootstrap(); + bootstrap.createSchema(); + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + assertDoesNotThrow(() -> stmt.executeQuery("SELECT COUNT(*) FROM entities")); + assertDoesNotThrow(() -> stmt.executeQuery("SELECT COUNT(*) FROM entity_relationships")); + assertDoesNotThrow(() -> stmt.executeQuery("SELECT COUNT(*) FROM entity_communities")); + } + } + + @Test + void createsKnowledgeGraphIndices() throws SQLException { + SchemaBootstrap bootstrap = createBootstrap(); + bootstrap.createSchema(); + Connection conn = dataSource.getConnection(); + String[] expected = {"idx_entities_name", "idx_entities_type", "idx_rel_source", + "idx_rel_target", "idx_rel_type", "idx_community_level"}; + for (String idx : expected) { + try (var ps = conn.prepareStatement( + "SELECT COUNT(*) FROM duckdb_indexes() WHERE index_name = ?")) { + ps.setString(1, idx); + try (ResultSet rs = ps.executeQuery()) { + rs.next(); + assertEquals(1, rs.getLong(1), "Index " + idx + " should exist"); + } + } + } + } + @Test void schemaIsIdempotent() throws SQLException { SchemaBootstrap bootstrap = createBootstrap(); diff --git a/src/test/java/com/javaducker/server/mcp/AnalysisToolsTest.java b/src/test/java/com/javaducker/server/mcp/AnalysisToolsTest.java new file mode 100644 index 0000000..49f5c5f --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/AnalysisToolsTest.java @@ -0,0 +1,468 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.service.*; +import com.javaducker.server.service.GitBlameService.BlameEntry; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.time.Instant; +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class AnalysisToolsTest { + + @Mock ExplainService explainService; + @Mock GitBlameService gitBlameService; + @Mock CoChangeService coChangeService; + @Mock DependencyService dependencyService; + @Mock ProjectMapService projectMapService; + @Mock StalenessService stalenessService; + @Mock ArtifactService artifactService; + @Mock SemanticTagService semanticTagService; + @Mock KnowledgeGraphService knowledgeGraphService; + + @InjectMocks AnalysisTools tools; + + // ── explain ────────────────────────────────────────────────────────── + + @Test + void explain_delegatesFilePath() throws Exception { + Map expected = Map.of("file", "data"); + when(explainService.explainByPath("src/Main.java")).thenReturn(expected); + + Map result = tools.explain("src/Main.java"); + + assertEquals(expected, result); + verify(explainService).explainByPath("src/Main.java"); + } + + @Test + void explain_returnsErrorWhenNotFound() throws Exception { + when(explainService.explainByPath("missing.java")).thenReturn(null); + + Map result = tools.explain("missing.java"); + + assertTrue(result.containsKey("error")); + } + + @Test + void explain_returnsErrorOnException() throws Exception { + when(explainService.explainByPath(any())).thenThrow(new RuntimeException("db down")); + + Map result = tools.explain("src/Foo.java"); + + assertEquals("db down", result.get("error")); + } + + // ── blame ──────────────────────────────────────────────────────────── + + @Test + void blame_callsBlameWithoutLineRange() throws Exception { + BlameEntry entry = new BlameEntry(1, 5, "abc123", "alice", + Instant.parse("2024-01-01T00:00:00Z"), "initial commit", "code"); + when(gitBlameService.blame("src/Foo.java")).thenReturn(List.of(entry)); + + Map result = tools.blame("src/Foo.java", null, null); + + assertEquals("src/Foo.java", result.get("file_path")); + assertEquals(1, result.get("entry_count")); + verify(gitBlameService).blame("src/Foo.java"); + verify(gitBlameService, never()).blameForLines(anyString(), anyInt(), anyInt()); + } + + @Test + void blame_callsBlameForLinesWithRange() throws Exception { + BlameEntry entry = new BlameEntry(10, 15, "def456", "bob", + Instant.parse("2024-06-01T00:00:00Z"), "fix bug", "fixed code"); + when(gitBlameService.blameForLines("src/Bar.java", 10, 20)).thenReturn(List.of(entry)); + + Map result = tools.blame("src/Bar.java", 10, 20); + + assertEquals(1, result.get("entry_count")); + verify(gitBlameService).blameForLines("src/Bar.java", 10, 20); + verify(gitBlameService, never()).blame(anyString()); + } + + @Test + @SuppressWarnings("unchecked") + void blame_convertsEntriesToMaps() throws Exception { + Instant date = Instant.parse("2024-03-15T12:00:00Z"); + BlameEntry entry = new BlameEntry(1, 3, "aaa", "carol", date, "msg", "content"); + when(gitBlameService.blame("f.java")).thenReturn(List.of(entry)); + + Map result = tools.blame("f.java", null, null); + + List> entries = (List>) result.get("entries"); + assertNotNull(entries); + assertEquals(1, entries.size()); + Map map = entries.get(0); + assertEquals(1, map.get("lineStart")); + assertEquals(3, map.get("lineEnd")); + assertEquals("aaa", map.get("commitHash")); + assertEquals("carol", map.get("author")); + assertEquals(date.toString(), map.get("authorDate")); + assertEquals("msg", map.get("commitMessage")); + assertEquals("content", map.get("content")); + } + + @Test + void blame_returnsErrorOnException() throws Exception { + when(gitBlameService.blame(any())).thenThrow(new RuntimeException("not a git repo")); + + Map result = tools.blame("bad.txt", null, null); + + assertEquals("not a git repo", result.get("error")); + } + + // ── related ────────────────────────────────────────────────────────── + + @Test + void related_passesMaxResults() throws Exception { + List> files = List.of(Map.of("file", "Other.java")); + when(coChangeService.getRelatedFiles("src/Main.java", 5)).thenReturn(files); + + Map result = tools.related("src/Main.java", 5); + + assertEquals(1, result.get("count")); + verify(coChangeService).getRelatedFiles("src/Main.java", 5); + } + + @Test + void related_usesDefaultMaxResultsWhenNull() throws Exception { + when(coChangeService.getRelatedFiles("f.java", 10)).thenReturn(List.of()); + + tools.related("f.java", null); + + verify(coChangeService).getRelatedFiles("f.java", 10); + } + + // ── dependencies & dependents ──────────────────────────────────────── + + @Test + void dependencies_delegatesByArtifactId() throws Exception { + List> deps = List.of(Map.of("target", "lib-core")); + when(dependencyService.getDependencies("art-1")).thenReturn(deps); + + Map result = tools.dependencies("art-1"); + + assertEquals("art-1", result.get("artifact_id")); + assertEquals(1, result.get("count")); + assertEquals(deps, result.get("dependencies")); + } + + @Test + void dependents_delegatesByArtifactId() throws Exception { + List> deps = List.of(Map.of("source", "app-main")); + when(dependencyService.getDependents("art-2")).thenReturn(deps); + + Map result = tools.dependents("art-2"); + + assertEquals("art-2", result.get("artifact_id")); + assertEquals(1, result.get("count")); + assertEquals(deps, result.get("dependents")); + } + + // ── map ────────────────────────────────────────────────────────────── + + @Test + void map_delegatesToProjectMapService() throws Exception { + Map expected = Map.of("artifacts", List.of()); + when(projectMapService.getProjectMap()).thenReturn(expected); + + Map result = tools.map(); + + assertEquals(expected, result); + } + + // ── stale ──────────────────────────────────────────────────────────── + + @Test + void stale_parsesJsonFilePathsArray() throws Exception { + List paths = List.of("src/A.java", "src/B.java"); + Map expected = Map.of("stale", List.of(), "current", 2); + when(stalenessService.checkStaleness(paths)).thenReturn(expected); + + Map result = tools.stale("[\"src/A.java\",\"src/B.java\"]", null); + + assertEquals(expected, result); + verify(stalenessService).checkStaleness(paths); + } + + @Test + void stale_returnsErrorWhenNoPathsProvided() { + Map result = tools.stale(null, null); + + assertTrue(result.containsKey("error")); + } + + @Test + void stale_returnsErrorOnInvalidJson() { + Map result = tools.stale("not-json", null); + + // Invalid JSON results in empty paths, which triggers the error + assertTrue(result.containsKey("error")); + } + + // ── indexHealth ────────────────────────────────────────────────────── + + @Test + void indexHealth_returnsHealthyWhenNoStale() throws Exception { + Map checkResult = new LinkedHashMap<>(); + checkResult.put("stale", List.of()); + checkResult.put("current", 10); + checkResult.put("total_checked", 10L); + checkResult.put("stale_count", 0); + checkResult.put("stale_percentage", 0.0); + when(stalenessService.checkAll()).thenReturn(checkResult); + + Map result = tools.indexHealth(); + + assertEquals("healthy", result.get("health_status")); + assertEquals("All indexed files are up to date.", result.get("recommendation")); + } + + @Test + void indexHealth_returnsDegradedWhenSomewhatStale() throws Exception { + Map checkResult = new LinkedHashMap<>(); + checkResult.put("stale", List.of(Map.of("file", "a.java"))); + checkResult.put("current", 9); + checkResult.put("total_checked", 10L); + checkResult.put("stale_count", 1); + checkResult.put("stale_percentage", 10.0); + when(stalenessService.checkAll()).thenReturn(checkResult); + + Map result = tools.indexHealth(); + + assertEquals("degraded", result.get("health_status")); + assertTrue(((String) result.get("recommendation")).contains("1 of 10")); + } + + @Test + void indexHealth_returnsUnhealthyWhenVeryStale() throws Exception { + Map checkResult = new LinkedHashMap<>(); + checkResult.put("stale", List.of(Map.of("f", "a"), Map.of("f", "b"), Map.of("f", "c"))); + checkResult.put("current", 1); + checkResult.put("total_checked", 4L); + checkResult.put("stale_count", 3); + checkResult.put("stale_percentage", 75.0); + when(stalenessService.checkAll()).thenReturn(checkResult); + + Map result = tools.indexHealth(); + + assertEquals("unhealthy", result.get("health_status")); + assertTrue(((String) result.get("recommendation")).contains("75%")); + } + + // ── summarize ──────────────────────────────────────────────────────── + + @Test + void summarize_delegatesCorrectly() throws Exception { + Map summary = new HashMap<>(); + summary.put("artifact_id", "art-1"); + summary.put("summary_text", "A service class"); + when(artifactService.getSummary("art-1")).thenReturn(summary); + when(artifactService.getStatus("art-1")).thenReturn(null); + + Map result = tools.summarize("art-1"); + + assertEquals("A service class", result.get("summary_text")); + verify(artifactService).getSummary("art-1"); + } + + @Test + void summarize_returnsErrorWhenNotFound() throws Exception { + when(artifactService.getSummary("missing")).thenReturn(null); + + Map result = tools.summarize("missing"); + + assertTrue(result.containsKey("error")); + } + + @Test + @SuppressWarnings("unchecked") + void summarize_addsStalenessWarning() throws Exception { + Map summary = new HashMap<>(); + summary.put("artifact_id", "art-1"); + summary.put("summary_text", "text"); + when(artifactService.getSummary("art-1")).thenReturn(summary); + + Map status = new HashMap<>(); + status.put("original_client_path", "src/Foo.java"); + when(artifactService.getStatus("art-1")).thenReturn(status); + + Map staleness = new HashMap<>(); + staleness.put("stale", List.of(Map.of("file", "src/Foo.java"))); + when(stalenessService.checkStaleness(List.of("src/Foo.java"))).thenReturn(staleness); + + Map result = tools.summarize("art-1"); + + assertTrue(result.containsKey("staleness_warning")); + } + + // ── blameEntryToMap ────────────────────────────────────────────────── + + @Test + void blameEntryToMap_convertsAllFields() { + Instant date = Instant.parse("2024-01-15T10:30:00Z"); + BlameEntry entry = new BlameEntry(5, 10, "hash", "author", date, "msg", "code"); + + Map map = AnalysisTools.blameEntryToMap(entry); + + assertEquals(5, map.get("lineStart")); + assertEquals(10, map.get("lineEnd")); + assertEquals("hash", map.get("commitHash")); + assertEquals("author", map.get("author")); + assertEquals("2024-01-15T10:30:00Z", map.get("authorDate")); + assertEquals("msg", map.get("commitMessage")); + assertEquals("code", map.get("content")); + } + + @Test + void blameEntryToMap_handlesNullDate() { + BlameEntry entry = new BlameEntry(1, 1, "h", "a", null, "m", "c"); + + Map map = AnalysisTools.blameEntryToMap(entry); + + assertNull(map.get("authorDate")); + } + + // ── resolveFilePaths ───────────────────────────────────────────────── + + @Test + void resolveFilePaths_parsesJsonArray() { + List result = tools.resolveFilePaths("[\"a.java\",\"b.java\"]", null); + + assertEquals(List.of("a.java", "b.java"), result); + } + + @Test + void resolveFilePaths_returnsEmptyOnNullInputs() { + List result = tools.resolveFilePaths(null, null); + + assertTrue(result.isEmpty()); + } + + @Test + void resolveFilePaths_returnsEmptyOnInvalidJson() { + List result = tools.resolveFilePaths("not valid json", null); + + assertTrue(result.isEmpty()); + } + + // ── findRelated ───────────────────────────────────────────────────── + + @Test + @SuppressWarnings("unchecked") + void findRelated_mergesTagsEntitiesAndCoChanges() throws Exception { + String artId = "art-1"; + + // Semantic tags: art-1 has tags "service" and "spring" + List> myTags = List.of( + Map.of("tag", "service", "category", "architectural"), + Map.of("tag", "spring", "category", "domain")); + when(semanticTagService.getTagsForArtifact(artId)).thenReturn(myTags); + + // searchByTags returns art-2 sharing 2 tags + List> tagMatches = List.of( + Map.of("artifact_id", "art-2", "file_name", "Other.java", "matched_tags", "service, spring", "match_count", 2)); + when(semanticTagService.searchByTags(List.of("service", "spring"), false)).thenReturn(tagMatches); + + // Entities: art-1 has entity "SearchService" + List> myEntities = List.of( + Map.of("entity_id", "class-searchservice", "entity_name", "SearchService", "entity_type", "CLASS")); + when(knowledgeGraphService.getEntitiesForArtifact(artId)).thenReturn(myEntities); + Map fullEntity = new LinkedHashMap<>(); + fullEntity.put("entity_id", "class-searchservice"); + fullEntity.put("source_artifact_ids", "[\"art-1\",\"art-3\"]"); + when(knowledgeGraphService.getEntity("class-searchservice")).thenReturn(fullEntity); + + // Co-change: art-1 path + Map status = new HashMap<>(); + status.put("original_client_path", "src/Service.java"); + when(artifactService.getStatus(artId)).thenReturn(status); + List> coChanges = List.of( + Map.of("related_file", "src/ServiceTest.java", "co_change_count", 5, "last_commit_date", "2024-01-01")); + when(coChangeService.getRelatedFiles("src/Service.java", 10)).thenReturn(coChanges); + + Map result = tools.findRelated(artId, null); + + assertEquals(artId, result.get("artifact_id")); + List> related = (List>) result.get("related"); + assertNotNull(related); + assertFalse(related.isEmpty()); + + // art-2 should be present (from tags) + boolean hasArt2 = related.stream() + .anyMatch(r -> "art-2".equals(r.get("artifact_id"))); + assertTrue(hasArt2, "art-2 should appear from shared tags"); + + // art-3 should be present (from shared entity) + boolean hasArt3 = related.stream() + .anyMatch(r -> "art-3".equals(r.get("artifact_id"))); + assertTrue(hasArt3, "art-3 should appear from shared entity"); + + // co-change entry should be present + boolean hasCoChange = related.stream() + .anyMatch(r -> "src/ServiceTest.java".equals(r.get("artifact_id")) + || "src/ServiceTest.java".equals(r.get("file_name"))); + assertTrue(hasCoChange, "co-change file should appear"); + } + + @Test + @SuppressWarnings("unchecked") + void findRelated_returnsEmptyWhenNoRelationsFound() throws Exception { + when(semanticTagService.getTagsForArtifact("art-x")).thenReturn(List.of()); + when(knowledgeGraphService.getEntitiesForArtifact("art-x")).thenReturn(List.of()); + when(artifactService.getStatus("art-x")).thenReturn(null); + + Map result = tools.findRelated("art-x", 5); + + assertEquals("art-x", result.get("artifact_id")); + assertEquals(0, result.get("count")); + List> related = (List>) result.get("related"); + assertTrue(related.isEmpty()); + } + + @Test + void findRelated_returnsErrorOnException() throws Exception { + when(semanticTagService.getTagsForArtifact("bad")).thenThrow(new RuntimeException("db down")); + when(knowledgeGraphService.getEntitiesForArtifact("bad")).thenThrow(new RuntimeException("db down")); + when(artifactService.getStatus("bad")).thenThrow(new RuntimeException("db down")); + + // Should still return a result (errors are caught per-section) + Map result = tools.findRelated("bad", null); + + assertEquals("bad", result.get("artifact_id")); + assertEquals(0, result.get("count")); + } + + @Test + @SuppressWarnings("unchecked") + void findRelated_respectsMaxResults() throws Exception { + String artId = "art-max"; + // Create many tag matches + List> myTags = List.of(Map.of("tag", "t1", "category", "functional")); + when(semanticTagService.getTagsForArtifact(artId)).thenReturn(myTags); + + List> manyMatches = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + manyMatches.add(Map.of("artifact_id", "art-" + i, "file_name", "F" + i + ".java", + "matched_tags", "t1", "match_count", 1)); + } + when(semanticTagService.searchByTags(List.of("t1"), false)).thenReturn(manyMatches); + when(knowledgeGraphService.getEntitiesForArtifact(artId)).thenReturn(List.of()); + when(artifactService.getStatus(artId)).thenReturn(null); + + Map result = tools.findRelated(artId, 3); + + List> related = (List>) result.get("related"); + assertTrue(related.size() <= 3); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/ContentIntelligenceToolsTest.java b/src/test/java/com/javaducker/server/mcp/ContentIntelligenceToolsTest.java new file mode 100644 index 0000000..48b763d --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/ContentIntelligenceToolsTest.java @@ -0,0 +1,373 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.ContentIntelligenceService; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.sql.SQLException; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class ContentIntelligenceToolsTest { + + @Mock + private ContentIntelligenceService service; + + private ObjectMapper objectMapper; + private ContentIntelligenceTools tools; + + @BeforeEach + void setUp() { + objectMapper = new ObjectMapper(); + tools = new ContentIntelligenceTools(service, objectMapper); + } + + // ── classify ─────────────────────────────────────────────────────────────── + + @Test + void classifyDelegatesAllParams() throws Exception { + when(service.classify("a1", "code", 0.9, "rule")) + .thenReturn(Map.of("artifact_id", "a1", "doc_type", "code")); + + Map result = tools.classify("a1", "code", 0.9, "rule"); + + assertEquals("a1", result.get("artifact_id")); + verify(service).classify("a1", "code", 0.9, "rule"); + } + + @Test + void classifyUsesDefaults() throws Exception { + when(service.classify("a1", "doc", 1.0, "llm")) + .thenReturn(Map.of("artifact_id", "a1")); + + tools.classify("a1", "doc", null, null); + + verify(service).classify("a1", "doc", 1.0, "llm"); + } + + @Test + void classifyReturnsErrorOnException() throws Exception { + when(service.classify(anyString(), anyString(), anyDouble(), anyString())) + .thenThrow(new SQLException("db down")); + + Map result = tools.classify("a1", "code", 1.0, "llm"); + + assertEquals("db down", result.get("error")); + } + + // ── tag ──────────────────────────────────────────────────────────────────── + + @Test + void tagParsesJsonAndDelegates() throws Exception { + String json = "[{\"tag\":\"java\",\"tag_type\":\"lang\",\"source\":\"llm\"}]"; + when(service.tag(eq("a1"), anyList())) + .thenReturn(Map.of("artifact_id", "a1", "tags_added", 1)); + + Map result = tools.tag("a1", json); + + assertEquals("a1", result.get("artifact_id")); + verify(service).tag(eq("a1"), argThat(list -> list.size() == 1 && "java".equals(list.get(0).get("tag")))); + } + + @Test + void tagReturnsMalformedJsonError() { + Map result = tools.tag("a1", "not json"); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().startsWith("Invalid JSON:")); + } + + // ── extractPoints ────────────────────────────────────────────────────────── + + @Test + void extractPointsParsesJsonAndDelegates() throws Exception { + String json = "[{\"point_type\":\"decision\",\"point_text\":\"Use DuckDB\"}]"; + when(service.extractPoints(eq("a1"), anyList())) + .thenReturn(Map.of("artifact_id", "a1", "points_added", 1)); + + Map result = tools.extractPoints("a1", json); + + assertEquals("a1", result.get("artifact_id")); + verify(service).extractPoints(eq("a1"), argThat(list -> + list.size() == 1 && "decision".equals(list.get(0).get("point_type")))); + } + + @Test + void extractPointsReturnsMalformedJsonError() { + Map result = tools.extractPoints("a1", "{bad}"); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().startsWith("Invalid JSON:")); + } + + // ── setFreshness ─────────────────────────────────────────────────────────── + + @Test + void setFreshnessDelegatesWithSupersededBy() throws Exception { + when(service.setFreshness("a1", "superseded", "a2")) + .thenReturn(Map.of("artifact_id", "a1", "freshness", "superseded")); + + Map result = tools.setFreshness("a1", "superseded", "a2"); + + assertEquals("superseded", result.get("freshness")); + verify(service).setFreshness("a1", "superseded", "a2"); + } + + @Test + void setFreshnessDelegatesWithoutSupersededBy() throws Exception { + when(service.setFreshness("a1", "current", null)) + .thenReturn(Map.of("artifact_id", "a1", "freshness", "current")); + + Map result = tools.setFreshness("a1", "current", null); + + assertEquals("current", result.get("freshness")); + verify(service).setFreshness("a1", "current", null); + } + + // ── synthesize ───────────────────────────────────────────────────────────── + + @Test + void synthesizeDelegatesAllFields() throws Exception { + when(service.synthesize("a1", "summary", "t1,t2", "kp", "ok", "/path")) + .thenReturn(Map.of("artifact_id", "a1")); + + Map result = tools.synthesize("a1", "summary", "t1,t2", "kp", "ok", "/path"); + + assertEquals("a1", result.get("artifact_id")); + verify(service).synthesize("a1", "summary", "t1,t2", "kp", "ok", "/path"); + } + + @Test + void synthesizeDelegatesWithNullOptionals() throws Exception { + when(service.synthesize("a1", "summary", null, null, null, null)) + .thenReturn(Map.of("artifact_id", "a1")); + + tools.synthesize("a1", "summary", null, null, null, null); + + verify(service).synthesize("a1", "summary", null, null, null, null); + } + + // ── linkConcepts ─────────────────────────────────────────────────────────── + + @Test + void linkConceptsParsesJsonAndDelegates() throws Exception { + String json = "[{\"concept\":\"auth\",\"artifact_a\":\"a1\",\"artifact_b\":\"a2\",\"strength\":0.8}]"; + when(service.linkConcepts(anyList())) + .thenReturn(Map.of("links_created", 1)); + + Map result = tools.linkConcepts(json); + + assertEquals(1, result.get("links_created")); + verify(service).linkConcepts(argThat(list -> list.size() == 1 && "auth".equals(list.get(0).get("concept")))); + } + + @Test + void linkConceptsReturnsMalformedJsonError() { + Map result = tools.linkConcepts("[bad]"); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().startsWith("Invalid JSON:")); + } + + // ── enrichQueue ──────────────────────────────────────────────────────────── + + @Test + void enrichQueuePassesLimitWithDefault() throws Exception { + when(service.getEnrichQueue(50)) + .thenReturn(List.of(Map.of("artifact_id", "a1"))); + + Map result = tools.enrichQueue(null); + + assertEquals(1, result.get("count")); + verify(service).getEnrichQueue(50); + } + + @Test + void enrichQueuePassesExplicitLimit() throws Exception { + when(service.getEnrichQueue(10)) + .thenReturn(List.of()); + + Map result = tools.enrichQueue(10); + + assertEquals(0, result.get("count")); + verify(service).getEnrichQueue(10); + } + + // ── markEnriched ─────────────────────────────────────────────────────────── + + @Test + void markEnrichedDelegatesArtifactId() throws Exception { + when(service.markEnriched("a1")) + .thenReturn(Map.of("artifact_id", "a1", "status", "enriched")); + + Map result = tools.markEnriched("a1"); + + assertEquals("enriched", result.get("status")); + verify(service).markEnriched("a1"); + } + + // ── latest ───────────────────────────────────────────────────────────────── + + @Test + void latestDelegatesTopic() throws Exception { + when(service.getLatest("auth")) + .thenReturn(Map.of("artifact_id", "a1", "topic", "auth")); + + Map result = tools.latest("auth"); + + assertEquals("auth", result.get("topic")); + verify(service).getLatest("auth"); + } + + // ── findByType ───────────────────────────────────────────────────────────── + + @Test + void findByTypeDelegatesAndWraps() throws Exception { + when(service.findByType("code")) + .thenReturn(List.of(Map.of("artifact_id", "a1"), Map.of("artifact_id", "a2"))); + + Map result = tools.findByType("code"); + + assertEquals(2, result.get("count")); + assertInstanceOf(List.class, result.get("results")); + verify(service).findByType("code"); + } + + // ── findByTag ────────────────────────────────────────────────────────────── + + @Test + void findByTagDelegatesAndWraps() throws Exception { + when(service.findByTag("java")) + .thenReturn(List.of(Map.of("artifact_id", "a1"))); + + Map result = tools.findByTag("java"); + + assertEquals(1, result.get("count")); + verify(service).findByTag("java"); + } + + // ── findPoints ───────────────────────────────────────────────────────────── + + @Test + void findPointsDelegatesWithOptionalTag() throws Exception { + when(service.findPoints("decision", "auth")) + .thenReturn(List.of(Map.of("point_text", "use JWT"))); + + Map result = tools.findPoints("decision", "auth"); + + assertEquals(1, result.get("count")); + verify(service).findPoints("decision", "auth"); + } + + @Test + void findPointsDelegatesWithNullTag() throws Exception { + when(service.findPoints("decision", null)) + .thenReturn(List.of()); + + Map result = tools.findPoints("decision", null); + + assertEquals(0, result.get("count")); + verify(service).findPoints("decision", null); + } + + // ── synthesis ────────────────────────────────────────────────────────────── + + @Test + void synthesisRoutesByArtifactId() throws Exception { + when(service.getSynthesis("a1")) + .thenReturn(Map.of("artifact_id", "a1", "summary", "test")); + + Map result = tools.synthesis("a1", null); + + assertEquals("test", result.get("summary")); + verify(service).getSynthesis("a1"); + verify(service, never()).searchSynthesis(anyString()); + } + + @Test + void synthesisRoutesByKeyword() throws Exception { + when(service.searchSynthesis("auth")) + .thenReturn(List.of(Map.of("artifact_id", "a1"))); + + Map result = tools.synthesis(null, "auth"); + + assertEquals(1, result.get("count")); + verify(service).searchSynthesis("auth"); + verify(service, never()).getSynthesis(anyString()); + } + + @Test + void synthesisReturnsErrorWhenNeitherParamGiven() { + Map result = tools.synthesis(null, null); + + assertEquals("Either artifact_id or keyword must be provided", result.get("error")); + } + + @Test + void synthesisReturnsErrorWhenBothBlank() { + Map result = tools.synthesis(" ", " "); + + assertEquals("Either artifact_id or keyword must be provided", result.get("error")); + } + + // ── concepts ─────────────────────────────────────────────────────────────── + + @Test + void conceptsWrapsListResult() throws Exception { + when(service.listConcepts()) + .thenReturn(List.of(Map.of("concept", "auth"))); + + Map result = tools.concepts(); + + assertEquals(1, result.get("count")); + verify(service).listConcepts(); + } + + // ── conceptTimeline ──────────────────────────────────────────────────────── + + @Test + void conceptTimelineDelegates() throws Exception { + when(service.getConceptTimeline("auth")) + .thenReturn(Map.of("concept", "auth", "entries", List.of())); + + Map result = tools.conceptTimeline("auth"); + + assertEquals("auth", result.get("concept")); + verify(service).getConceptTimeline("auth"); + } + + // ── conceptHealth ────────────────────────────────────────────────────────── + + @Test + void conceptHealthDelegates() throws Exception { + when(service.getConceptHealth()) + .thenReturn(Map.of("total_concepts", 5)); + + Map result = tools.conceptHealth(); + + assertEquals(5, result.get("total_concepts")); + verify(service).getConceptHealth(); + } + + // ── staleContent ─────────────────────────────────────────────────────────── + + @Test + void staleContentWrapsListResult() throws Exception { + when(service.getStaleContent()) + .thenReturn(List.of(Map.of("artifact_id", "a1"))); + + Map result = tools.staleContent(); + + assertEquals(1, result.get("count")); + verify(service).getStaleContent(); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/CoreToolsTest.java b/src/test/java/com/javaducker/server/mcp/CoreToolsTest.java new file mode 100644 index 0000000..90a4495 --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/CoreToolsTest.java @@ -0,0 +1,350 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.service.*; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class CoreToolsTest { + + @Mock UploadService uploadService; + @Mock ArtifactService artifactService; + @Mock SearchService searchService; + @Mock StatsService statsService; + @Mock StalenessService stalenessService; + @Mock GraphSearchService graphSearchService; + + @InjectMocks CoreTools coreTools; + + // ── health ────────────────────────────────────────────────────────── + + @Test + void health_returnsStatusOkWithStats() throws Exception { + Map stats = Map.of("total_artifacts", 42L); + when(statsService.getStats()).thenReturn(new LinkedHashMap<>(stats)); + + Map result = coreTools.health(); + + assertEquals("ok", result.get("status")); + assertEquals(42L, result.get("total_artifacts")); + } + + @Test + void health_returnsErrorOnException() throws Exception { + when(statsService.getStats()).thenThrow(new SQLException("db down")); + + Map result = coreTools.health(); + + assertEquals("db down", result.get("error")); + } + + // ── indexFile ──────────────────────────────────────────────────────── + + @Test + void indexFile_delegatesToUploadService(@TempDir Path tempDir) throws Exception { + Path file = tempDir.resolve("Hello.java"); + Files.writeString(file, "public class Hello {}"); + + when(uploadService.upload(eq("Hello.java"), eq(file.toString()), anyString(), + anyLong(), any(byte[].class))).thenReturn("abc-123"); + + Map result = coreTools.indexFile(file.toString()); + + assertEquals("abc-123", result.get("artifact_id")); + assertEquals("Hello.java", result.get("file_name")); + verify(uploadService).upload(eq("Hello.java"), eq(file.toString()), anyString(), + eq((long) "public class Hello {}".getBytes().length), any(byte[].class)); + } + + @Test + void indexFile_returnsErrorForMissingFile() { + Map result = coreTools.indexFile("/nonexistent/path/File.java"); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().contains("File not found")); + } + + // ── indexDirectory ────────────────────────────────────────────────── + + @Test + void indexDirectory_indexesMatchingFiles(@TempDir Path tempDir) throws Exception { + Files.writeString(tempDir.resolve("A.java"), "class A {}"); + Files.writeString(tempDir.resolve("B.java"), "class B {}"); + Files.writeString(tempDir.resolve("readme.txt"), "hello"); + + when(uploadService.upload(anyString(), anyString(), anyString(), + anyLong(), any(byte[].class))).thenReturn("id"); + + Map result = coreTools.indexDirectory(tempDir.toString(), "java"); + + assertEquals(2, result.get("indexed_count")); + assertEquals(0, result.get("error_count")); + } + + @Test + void indexDirectory_indexesAllFilesWhenNoExtensionFilter(@TempDir Path tempDir) throws Exception { + Files.writeString(tempDir.resolve("A.java"), "class A {}"); + Files.writeString(tempDir.resolve("readme.txt"), "hello"); + + when(uploadService.upload(anyString(), anyString(), anyString(), + anyLong(), any(byte[].class))).thenReturn("id"); + + Map result = coreTools.indexDirectory(tempDir.toString(), null); + + assertEquals(2, result.get("indexed_count")); + } + + @Test + void indexDirectory_returnsErrorForNonDirectory() { + Map result = coreTools.indexDirectory("/nonexistent/dir", null); + + assertTrue(result.containsKey("error")); + } + + // ── search ────────────────────────────────────────────────────────── + + @Test + void search_usesHybridByDefault() throws Exception { + List> hits = List.of(Map.of("file_name", "A.java")); + when(searchService.hybridSearch("test", 20)).thenReturn(hits); + + Map result = coreTools.search("test", null, null); + + assertEquals("hybrid", result.get("mode")); + assertEquals(1, result.get("count")); + verify(searchService).hybridSearch("test", 20); + } + + @Test + void search_exactModeDelegatesCorrectly() throws Exception { + when(searchService.exactSearch("foo", 10)).thenReturn(List.of()); + + Map result = coreTools.search("foo", "exact", 10); + + assertEquals("exact", result.get("mode")); + verify(searchService).exactSearch("foo", 10); + verify(searchService, never()).hybridSearch(anyString(), anyInt()); + } + + @Test + void search_semanticModeDelegatesCorrectly() throws Exception { + when(searchService.semanticSearch("bar", 5)).thenReturn(List.of()); + + Map result = coreTools.search("bar", "semantic", 5); + + assertEquals("semantic", result.get("mode")); + verify(searchService).semanticSearch("bar", 5); + } + + @Test + void search_hybridModeExplicit() throws Exception { + when(searchService.hybridSearch("baz", 20)).thenReturn(List.of()); + + Map result = coreTools.search("baz", "hybrid", null); + + assertEquals("hybrid", result.get("mode")); + verify(searchService).hybridSearch("baz", 20); + } + + @Test + void search_addsStalenessWarning() throws Exception { + List> hits = List.of( + Map.of("file_name", "A.java", "original_client_path", "/src/A.java")); + when(searchService.hybridSearch("test", 20)).thenReturn(hits); + when(stalenessService.checkStaleness(List.of("/src/A.java"))) + .thenReturn(Map.of("stale", List.of(Map.of("path", "/src/A.java")))); + + Map result = coreTools.search("test", null, null); + + assertTrue(result.containsKey("staleness_warning")); + assertTrue(result.containsKey("stale_files")); + } + + @Test + void search_returnsErrorOnException() throws Exception { + when(searchService.hybridSearch(anyString(), anyInt())) + .thenThrow(new SQLException("search failed")); + + Map result = coreTools.search("test", null, null); + + assertEquals("search failed", result.get("error")); + } + + @Test + void search_unknownModeReturnsError() { + Map result = coreTools.search("test", "magic", null); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().contains("Unknown search mode")); + } + + @Test + void search_localModeDelegatesToGraphSearchService() throws Exception { + when(graphSearchService.localSearch("entities", 10)).thenReturn(List.of()); + + Map result = coreTools.search("entities", "local", 10); + + assertEquals("local", result.get("mode")); + verify(graphSearchService).localSearch("entities", 10); + } + + @Test + void search_globalModeDelegatesToGraphSearchService() throws Exception { + when(graphSearchService.globalSearch("rels", 5)).thenReturn(List.of()); + + Map result = coreTools.search("rels", "global", 5); + + assertEquals("global", result.get("mode")); + verify(graphSearchService).globalSearch("rels", 5); + } + + @Test + void search_graphHybridModeDelegatesToGraphSearchService() throws Exception { + when(graphSearchService.hybridGraphSearch("query", 20)).thenReturn(List.of()); + + Map result = coreTools.search("query", "graph_hybrid", null); + + assertEquals("graph_hybrid", result.get("mode")); + verify(graphSearchService).hybridGraphSearch("query", 20); + } + + @Test + void search_mixModeDelegatesToGraphSearchService() throws Exception { + when(graphSearchService.mixSearch("query", 20)).thenReturn(List.of()); + + Map result = coreTools.search("query", "mix", null); + + assertEquals("mix", result.get("mode")); + verify(graphSearchService).mixSearch("query", 20); + } + + // ── getFileText ───────────────────────────────────────────────────── + + @Test + void getFileText_delegatesCorrectly() throws Exception { + Map text = Map.of("text", "public class Foo {}", "artifact_id", "abc"); + when(artifactService.getText("abc")).thenReturn(text); + + Map result = coreTools.getFileText("abc"); + + assertEquals("public class Foo {}", result.get("text")); + assertEquals("abc", result.get("artifact_id")); + } + + @Test + void getFileText_returnsErrorOnException() throws Exception { + when(artifactService.getText("bad")).thenThrow(new SQLException("not found")); + + Map result = coreTools.getFileText("bad"); + + assertEquals("not found", result.get("error")); + } + + // ── getArtifactStatus ─────────────────────────────────────────────── + + @Test + void getArtifactStatus_delegatesCorrectly() throws Exception { + Map status = Map.of("status", "INDEXED", "artifact_id", "abc"); + when(artifactService.getStatus("abc")).thenReturn(status); + + Map result = coreTools.getArtifactStatus("abc"); + + assertEquals("INDEXED", result.get("status")); + assertEquals("abc", result.get("artifact_id")); + } + + @Test + void getArtifactStatus_returnsErrorOnException() throws Exception { + when(artifactService.getStatus("bad")).thenThrow(new SQLException("db error")); + + Map result = coreTools.getArtifactStatus("bad"); + + assertEquals("db error", result.get("error")); + } + + // ── waitForIndexed ────────────────────────────────────────────────── + + @Test + void waitForIndexed_returnsOnIndexedStatus() throws Exception { + Map status = Map.of("status", "INDEXED", "artifact_id", "abc"); + when(artifactService.getStatus("abc")).thenReturn(status); + + Map result = coreTools.waitForIndexed("abc", 10); + + assertEquals("INDEXED", result.get("status")); + verify(artifactService, times(1)).getStatus("abc"); + } + + @Test + void waitForIndexed_returnsOnFailedStatus() throws Exception { + Map status = Map.of("status", "FAILED", "artifact_id", "abc", + "error_message", "parse error"); + when(artifactService.getStatus("abc")).thenReturn(status); + + Map result = coreTools.waitForIndexed("abc", 10); + + assertEquals("FAILED", result.get("status")); + } + + @Test + void waitForIndexed_pollsUntilIndexed() throws Exception { + Map pending = Map.of("status", "PENDING", "artifact_id", "abc"); + Map indexed = Map.of("status", "INDEXED", "artifact_id", "abc"); + when(artifactService.getStatus("abc")) + .thenReturn(pending) + .thenReturn(pending) + .thenReturn(indexed); + + Map result = coreTools.waitForIndexed("abc", 30); + + assertEquals("INDEXED", result.get("status")); + verify(artifactService, times(3)).getStatus("abc"); + } + + @Test + void waitForIndexed_returnsErrorOnTimeout() throws Exception { + Map pending = Map.of("status", "PENDING", "artifact_id", "abc"); + when(artifactService.getStatus("abc")).thenReturn(pending); + + // Use a very short timeout to avoid slow test + Map result = coreTools.waitForIndexed("abc", 1); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().contains("Timeout")); + } + + // ── stats ─────────────────────────────────────────────────────────── + + @Test + void stats_delegatesCorrectly() throws Exception { + Map stats = Map.of("total_artifacts", 100L, "indexed", 95L); + when(statsService.getStats()).thenReturn(stats); + + Map result = coreTools.stats(); + + assertEquals(100L, result.get("total_artifacts")); + assertEquals(95L, result.get("indexed")); + } + + @Test + void stats_returnsErrorOnException() throws Exception { + when(statsService.getStats()).thenThrow(new SQLException("db error")); + + Map result = coreTools.stats(); + + assertEquals("db error", result.get("error")); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/EnrichmentToolsTest.java b/src/test/java/com/javaducker/server/mcp/EnrichmentToolsTest.java new file mode 100644 index 0000000..7b1ea91 --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/EnrichmentToolsTest.java @@ -0,0 +1,116 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.config.AppConfig; +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.db.SchemaBootstrap; +import com.javaducker.server.ingestion.*; +import com.javaducker.server.service.*; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.sql.Connection; +import java.sql.Statement; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class EnrichmentToolsTest { + + @TempDir + static Path tempDir; + + static DuckDBDataSource dataSource; + static EnrichmentTools tools; + + @BeforeAll + static void setup() throws Exception { + AppConfig config = new AppConfig(); + config.setDbPath(tempDir.resolve("test-enrich.duckdb").toString()); + config.setIntakeDir(tempDir.resolve("intake").toString()); + dataSource = new DuckDBDataSource(config); + ArtifactService artifactService = new ArtifactService(dataSource); + EmbeddingService emb = new EmbeddingService(config); + SearchService searchService = new SearchService(dataSource, emb, config); + IngestionWorker worker = new IngestionWorker(dataSource, artifactService, + new TextExtractor(), new TextNormalizer(), new Chunker(), + emb, new FileSummarizer(), new ImportParser(), + new ReladomoXmlParser(), new ReladomoService(dataSource), + new ReladomoFinderParser(), new ReladomoConfigParser(), + searchService, config); + new SchemaBootstrap(dataSource, config, worker).createSchema(); + + KnowledgeGraphService kgService = new KnowledgeGraphService(dataSource, emb); + CommunityDetectionService cdService = new CommunityDetectionService(dataSource); + tools = new EnrichmentTools(dataSource, kgService, cdService); + + // Seed artifacts + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, enrichment_status, created_at, updated_at) + VALUES ('enr-1', 'ServiceA.java', 'INDEXED', 'pending', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, enrichment_status, created_at, updated_at) + VALUES ('enr-2', 'ServiceB.java', 'INDEXED', 'enriched', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, enrichment_status, created_at, updated_at) + VALUES ('enr-3', 'ServiceC.java', 'INDEXED', 'pending', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + } + } + + @AfterAll + static void teardown() { + dataSource.close(); + } + + @Test + @Order(1) + void enrichmentPipelineReturnsPendingFiles() { + Map result = tools.enrichmentPipeline(null); + + assertFalse(result.containsKey("error")); + assertEquals(2, result.get("pending_count")); + @SuppressWarnings("unchecked") + var files = (List>) result.get("pending_files"); + assertEquals(2, files.size()); + assertNotNull(result.get("steps_per_file")); + assertNotNull(result.get("graph_stats")); + } + + @Test + @Order(2) + void enrichmentPipelineRespectsBatchSize() { + Map result = tools.enrichmentPipeline(1); + + assertEquals(1, result.get("pending_count")); + } + + @Test + @Order(3) + void enrichmentStatusReturnsCorrectCounts() { + Map result = tools.enrichmentStatus(); + + assertFalse(result.containsKey("error")); + assertEquals(3L, result.get("total_indexed")); + assertEquals(1L, result.get("enriched")); + assertEquals(2L, result.get("pending")); + } + + @Test + @Order(4) + void rebuildGraphClearsAndReturnsArtifacts() { + Map result = tools.rebuildGraph(); + + assertFalse(result.containsKey("error")); + assertEquals(3, result.get("artifact_count")); + @SuppressWarnings("unchecked") + var artifacts = (List>) result.get("artifacts_to_reprocess"); + assertEquals(3, artifacts.size()); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/KnowledgeGraphToolsTest.java b/src/test/java/com/javaducker/server/mcp/KnowledgeGraphToolsTest.java new file mode 100644 index 0000000..27ce8a1 --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/KnowledgeGraphToolsTest.java @@ -0,0 +1,273 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.CommunityDetectionService; +import com.javaducker.server.service.GraphSearchService; +import com.javaducker.server.service.GraphUpdateService; +import com.javaducker.server.service.KnowledgeGraphService; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class KnowledgeGraphToolsTest { + + @Mock + private KnowledgeGraphService service; + + @Mock + private GraphSearchService graphSearchService; + + @Mock + private GraphUpdateService graphUpdateService; + + @Mock + private CommunityDetectionService communityDetectionService; + + private ObjectMapper objectMapper; + private KnowledgeGraphTools tools; + + @BeforeEach + void setUp() { + objectMapper = new ObjectMapper(); + tools = new KnowledgeGraphTools(service, graphSearchService, graphUpdateService, + communityDetectionService, objectMapper); + } + + // ── extractEntities ────────────────────────────────────────────────────── + + @Test + void extractEntitiesCreatesEntities() throws Exception { + when(service.upsertEntity(anyString(), anyString(), anyString(), anyString(), any())) + .thenReturn(Map.of("entity_id", "class-foo", "action", "created")); + + String entities = """ + [{"name":"Foo","type":"class","description":"A foo class"}, + {"name":"Bar","type":"interface","description":"A bar interface"}] + """; + Map result = tools.extractEntities("art-1", entities, null); + + assertEquals("art-1", result.get("artifact_id")); + assertEquals(2, result.get("entities_created")); + assertEquals(0, result.get("entities_merged")); + verify(service, times(2)).upsertEntity(anyString(), anyString(), anyString(), eq("art-1"), isNull()); + } + + @Test + void extractEntitiesWithRelationships() throws Exception { + when(service.upsertEntity(anyString(), anyString(), anyString(), anyString(), any())) + .thenReturn(Map.of("entity_id", "class-foo", "action", "created")); + when(service.findEntitiesByName("Foo")) + .thenReturn(List.of(Map.of("entity_id", "class-foo", "entity_name", "Foo"))); + when(service.findEntitiesByName("Bar")) + .thenReturn(List.of(Map.of("entity_id", "interface-bar", "entity_name", "Bar"))); + when(service.upsertRelationship(anyString(), anyString(), anyString(), anyString(), anyString(), any(), anyDouble())) + .thenReturn(Map.of("relationship_id", "r1", "action", "created")); + + String entities = """ + [{"name":"Foo","type":"class","description":"A foo"}, + {"name":"Bar","type":"interface","description":"A bar"}] + """; + String rels = """ + [{"sourceName":"Foo","targetName":"Bar","type":"implements","description":"Foo implements Bar"}] + """; + Map result = tools.extractEntities("art-1", entities, rels); + + assertEquals(1, result.get("relationships_created")); + } + + @Test + void extractEntitiesRejectsEmptyList() { + Map result = tools.extractEntities("art-1", "[]", null); + assertTrue(result.containsKey("error")); + } + + @Test + void extractEntitiesHandlesInvalidJson() { + Map result = tools.extractEntities("art-1", "not json", null); + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().startsWith("Invalid JSON:")); + } + + // ── getEntities ────────────────────────────────────────────────────────── + + @Test + void getEntitiesByType() throws Exception { + when(service.findEntitiesByType("class")) + .thenReturn(List.of(Map.of("entity_id", "class-foo", "entity_name", "Foo"))); + + Map result = tools.getEntities("class", null); + + assertEquals(1, result.get("count")); + } + + @Test + void getEntitiesByName() throws Exception { + when(service.findEntitiesByName("Foo")) + .thenReturn(List.of(Map.of("entity_id", "class-foo", "entity_name", "Foo"))); + + Map result = tools.getEntities(null, "Foo"); + + assertEquals(1, result.get("count")); + } + + // ── mergeEntities ──────────────────────────────────────────────────────── + + @Test + void mergeEntitiesDelegates() throws Exception { + when(service.mergeEntities("a", "b", "merged desc")) + .thenReturn(Map.of("action", "merged", "target_entity_id", "b")); + + Map result = tools.mergeEntities("a", "b", "merged desc"); + + assertEquals("merged", result.get("action")); + } + + // ── deleteEntities ─────────────────────────────────────────────────────── + + @Test + void deleteEntitiesDelegates() throws Exception { + when(service.deleteEntitiesForArtifact("art-1")) + .thenReturn(Map.of("deleted_entities", 2, "deleted_relationships", 1)); + + Map result = tools.deleteEntities("art-1"); + + assertEquals(2, result.get("deleted_entities")); + } + + // ── graphStats ─────────────────────────────────────────────────────────── + + @Test + void graphStatsReturnsStats() throws Exception { + when(service.getStats()) + .thenReturn(Map.of("entity_count", 10, "relationship_count", 5)); + + Map result = tools.graphStats(); + + assertEquals(10, result.get("entity_count")); + } + + // ── graphNeighborhood ──────────────────────────────────────────────────── + + @Test + void graphNeighborhoodDefaultDepth() throws Exception { + when(service.getNeighborhood("e1", 2)) + .thenReturn(Map.of("nodes", List.of(), "edges", List.of())); + + Map result = tools.graphNeighborhood("e1", null); + + assertNotNull(result.get("nodes")); + verify(service).getNeighborhood("e1", 2); + } + + @Test + void graphNeighborhoodCapsDepthAt5() throws Exception { + when(service.getNeighborhood("e1", 5)) + .thenReturn(Map.of("nodes", List.of(), "edges", List.of())); + + tools.graphNeighborhood("e1", 10); + + verify(service).getNeighborhood("e1", 5); + } + + // ── graphPath ──────────────────────────────────────────────────────────── + + @Test + void graphPathDelegates() throws Exception { + when(service.getPath("a", "b")) + .thenReturn(Map.of("found", true, "path", List.of("a", "b"))); + + Map result = tools.graphPath("a", "b"); + + assertEquals(true, result.get("found")); + } + + @Test + void graphPathHandlesError() throws Exception { + when(service.getPath("a", "b")).thenThrow(new RuntimeException("db error")); + + Map result = tools.graphPath("a", "b"); + + assertTrue(result.containsKey("error")); + } + + // ── graphSearch ───────────────────────────────────────────────────────── + + @Test + void graphSearchDefaultsModeToMix() throws Exception { + when(graphSearchService.mixSearch("test query", 10)) + .thenReturn(List.of(Map.of("entity_id", "e1", "score", 0.9, "match_type", "MIX"))); + + Map result = tools.graphSearch("test query", null, null, null); + + assertEquals("mix", result.get("mode")); + assertEquals(1, result.get("count")); + verify(graphSearchService).mixSearch("test query", 10); + } + + @Test + void graphSearchLocalMode() throws Exception { + when(graphSearchService.localSearch("test", 5)) + .thenReturn(List.of(Map.of("entity_id", "e1", "score", 0.8, "match_type", "LOCAL"))); + + Map result = tools.graphSearch("test", "local", 5, null); + + assertEquals("local", result.get("mode")); + verify(graphSearchService).localSearch("test", 5); + } + + @Test + void graphSearchInvalidModeReturnsError() { + Map result = tools.graphSearch("test", "invalid", null, null); + + assertTrue(result.containsKey("error")); + } + + // ── mergeCandidates ───────────────────────────────────────────────────── + + @Test + void mergeCandidatesReturnsResults() throws Exception { + List> mockCandidates = List.of( + Map.of("source_entity_id", "a", "target_entity_id", "b", "confidence", 1.0)); + when(service.findDuplicateCandidates()).thenReturn(mockCandidates); + + Map result = tools.mergeCandidates(null); + + assertEquals(1, result.get("count")); + verify(service).findDuplicateCandidates(); + } + + @Test + void mergeCandidatesWithEntityIdDelegates() throws Exception { + List> mockCandidates = List.of( + Map.of("entity_id", "b", "entity_name", "Bar", "similarity", 0.95)); + when(service.findMergeCandidates("a")).thenReturn(mockCandidates); + + Map result = tools.mergeCandidates("a"); + + assertEquals("a", result.get("entity_id")); + assertEquals(1, result.get("count")); + } + + // ── confirmMerge ──────────────────────────────────────────────────────── + + @Test + void confirmMergeDelegates() throws Exception { + when(service.mergeEntities("src", "tgt", "merged desc")) + .thenReturn(Map.of("merged_into", "tgt", "source_deleted", "src")); + + Map result = tools.confirmMerge("src", "tgt", "merged desc"); + + assertEquals("tgt", result.get("merged_into")); + verify(service).mergeEntities("src", "tgt", "merged desc"); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/ReladomoToolsTest.java b/src/test/java/com/javaducker/server/mcp/ReladomoToolsTest.java new file mode 100644 index 0000000..3fc7946 --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/ReladomoToolsTest.java @@ -0,0 +1,154 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.service.ReladomoQueryService; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.sql.SQLException; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class ReladomoToolsTest { + + @Mock + private ReladomoQueryService reladomoQueryService; + + @InjectMocks + private ReladomoTools reladomoTools; + + @Test + void relationships_delegatesObjectName() throws SQLException { + Map expected = Map.of("object_name", "Order", "relationships", "[]"); + when(reladomoQueryService.getRelationships("Order")).thenReturn(expected); + + Map result = reladomoTools.relationships("Order"); + + assertEquals(expected, result); + verify(reladomoQueryService).getRelationships("Order"); + } + + @Test + void relationships_returnsErrorOnException() throws SQLException { + when(reladomoQueryService.getRelationships("Bad")).thenThrow(new SQLException("db error")); + + Map result = reladomoTools.relationships("Bad"); + + assertEquals("db error", result.get("error")); + } + + @Test + void graph_passesDepthWithDefault() throws SQLException { + Map expected = Map.of("nodes", 5); + when(reladomoQueryService.getGraph("Order", 3)).thenReturn(expected); + + Map result = reladomoTools.graph("Order", null); + + assertEquals(expected, result); + verify(reladomoQueryService).getGraph("Order", 3); + } + + @Test + void graph_passesExplicitDepth() throws SQLException { + Map expected = Map.of("nodes", 10); + when(reladomoQueryService.getGraph("Order", 5)).thenReturn(expected); + + Map result = reladomoTools.graph("Order", 5); + + assertEquals(expected, result); + verify(reladomoQueryService).getGraph("Order", 5); + } + + @Test + void path_passesFromAndTo() throws SQLException { + Map expected = Map.of("path", "Order -> OrderItem -> Product"); + when(reladomoQueryService.getPath("Order", "Product")).thenReturn(expected); + + Map result = reladomoTools.path("Order", "Product"); + + assertEquals(expected, result); + verify(reladomoQueryService).getPath("Order", "Product"); + } + + @Test + void schema_delegatesObjectName() throws SQLException { + Map expected = Map.of("columns", 8); + when(reladomoQueryService.getSchema("Order")).thenReturn(expected); + + Map result = reladomoTools.schema("Order"); + + assertEquals(expected, result); + verify(reladomoQueryService).getSchema("Order"); + } + + @Test + void objectFiles_delegatesObjectName() throws SQLException { + Map expected = Map.of("files", 3); + when(reladomoQueryService.getObjectFiles("Order")).thenReturn(expected); + + Map result = reladomoTools.objectFiles("Order"); + + assertEquals(expected, result); + verify(reladomoQueryService).getObjectFiles("Order"); + } + + @Test + void finders_delegatesObjectName() throws SQLException { + Map expected = Map.of("patterns", 4); + when(reladomoQueryService.getFinderPatterns("Order")).thenReturn(expected); + + Map result = reladomoTools.finders("Order"); + + assertEquals(expected, result); + verify(reladomoQueryService).getFinderPatterns("Order"); + } + + @Test + void deepFetch_delegatesObjectName() throws SQLException { + Map expected = Map.of("profiles", 2); + when(reladomoQueryService.getDeepFetchProfiles("Order")).thenReturn(expected); + + Map result = reladomoTools.deepFetch("Order"); + + assertEquals(expected, result); + verify(reladomoQueryService).getDeepFetchProfiles("Order"); + } + + @Test + void temporal_returnsServiceResults() throws SQLException { + Map expected = Map.of("temporal_objects", 6); + when(reladomoQueryService.getTemporalInfo()).thenReturn(expected); + + Map result = reladomoTools.temporal(); + + assertEquals(expected, result); + verify(reladomoQueryService).getTemporalInfo(); + } + + @Test + void config_withObjectName() throws SQLException { + Map expected = Map.of("cache_type", "partial"); + when(reladomoQueryService.getConfig("Order")).thenReturn(expected); + + Map result = reladomoTools.config("Order"); + + assertEquals(expected, result); + verify(reladomoQueryService).getConfig("Order"); + } + + @Test + void config_withNullObjectName() throws SQLException { + Map expected = Map.of("objects", 12); + when(reladomoQueryService.getConfig(null)).thenReturn(expected); + + Map result = reladomoTools.config(null); + + assertEquals(expected, result); + verify(reladomoQueryService).getConfig(null); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/SemanticTagToolsTest.java b/src/test/java/com/javaducker/server/mcp/SemanticTagToolsTest.java new file mode 100644 index 0000000..a624b97 --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/SemanticTagToolsTest.java @@ -0,0 +1,181 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.SemanticTagService; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.sql.SQLException; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class SemanticTagToolsTest { + + @Mock + private SemanticTagService service; + + private ObjectMapper objectMapper; + private SemanticTagTools tools; + + @BeforeEach + void setUp() { + objectMapper = new ObjectMapper(); + tools = new SemanticTagTools(service, objectMapper); + } + + // ── synthesizeTags ──────────────────────────────────────────────────────── + + @Test + void synthesizeTagsParsesJsonAndDelegates() throws Exception { + String json = """ + [{"tag":"auth","category":"domain","confidence":0.9,"rationale":"handles auth"}, + {"tag":"spring","category":"architectural","confidence":0.8}, + {"tag":"crud","category":"pattern","confidence":0.7}, + {"tag":"security","category":"concern","confidence":0.6}] + """; + when(service.writeTags(eq("a1"), anyList())) + .thenReturn(Map.of("artifact_id", "a1", "tags_count", 4)); + + Map result = tools.synthesizeTags("a1", json); + + assertEquals("a1", result.get("artifact_id")); + assertEquals(4, result.get("tags_count")); + verify(service).writeTags(eq("a1"), argThat(list -> list.size() == 4)); + } + + @Test + void synthesizeTagsReturnsMalformedJsonError() { + Map result = tools.synthesizeTags("a1", "not json"); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().startsWith("Invalid JSON:")); + } + + @Test + void synthesizeTagsReturnsValidationError() throws Exception { + when(service.writeTags(eq("a1"), anyList())) + .thenThrow(new IllegalArgumentException("Tags count must be between 4 and 10, got 2")); + + String json = "[{\"tag\":\"a\",\"category\":\"domain\"},{\"tag\":\"b\",\"category\":\"functional\"}]"; + Map result = tools.synthesizeTags("a1", json); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().contains("Tags count")); + } + + @Test + void synthesizeTagsReturnsErrorOnSqlException() throws Exception { + when(service.writeTags(anyString(), anyList())) + .thenThrow(new SQLException("db down")); + + String json = """ + [{"tag":"a","category":"domain"},{"tag":"b","category":"functional"}, + {"tag":"c","category":"pattern"},{"tag":"d","category":"concern"}] + """; + Map result = tools.synthesizeTags("a1", json); + + assertEquals("db down", result.get("error")); + } + + // ── searchByTags ────────────────────────────────────────────────────────── + + @Test + void searchByTagsDefaultsToAny() throws Exception { + when(service.searchByTags(anyList(), eq(false))) + .thenReturn(List.of(Map.of("artifact_id", "a1", "file_name", "Test.java", + "matched_tags", "auth", "match_count", 1))); + + Map result = tools.searchByTags("[\"auth\"]", null, null); + + assertEquals(1, result.get("count")); + verify(service).searchByTags(argThat(l -> l.size() == 1 && "auth".equals(l.get(0))), eq(false)); + } + + @Test + void searchByTagsMatchAll() throws Exception { + when(service.searchByTags(anyList(), eq(true))) + .thenReturn(List.of()); + + Map result = tools.searchByTags("[\"auth\",\"crud\"]", "all", null); + + assertEquals(0, result.get("count")); + verify(service).searchByTags(anyList(), eq(true)); + } + + @Test + void searchByTagsWithCategoryFilter() throws Exception { + when(service.searchByTags(anyList(), eq(false))) + .thenReturn(List.of( + Map.of("artifact_id", "a1", "file_name", "A.java", + "matched_tags", "auth", "match_count", 1), + Map.of("artifact_id", "a2", "file_name", "B.java", + "matched_tags", "auth", "match_count", 1))); + when(service.findByCategory("domain")) + .thenReturn(List.of(Map.of("artifact_id", "a1", "tag", "auth", "category", "domain"))); + + Map result = tools.searchByTags("[\"auth\"]", null, "domain"); + + assertEquals(1, result.get("count"), "Only a1 should match domain filter"); + } + + @Test + void searchByTagsReturnsMalformedJsonError() { + Map result = tools.searchByTags("bad", null, null); + + assertTrue(result.containsKey("error")); + assertTrue(result.get("error").toString().startsWith("Invalid JSON:")); + } + + // ── tagCloud ────────────────────────────────────────────────────────────── + + @Test + void tagCloudDelegates() throws Exception { + when(service.getTagCloud()) + .thenReturn(Map.of("categories", Map.of(), "total_tags", 0)); + + Map result = tools.tagCloud(); + + assertEquals(0, result.get("total_tags")); + verify(service).getTagCloud(); + } + + @Test + void tagCloudReturnsErrorOnException() throws Exception { + when(service.getTagCloud()).thenThrow(new SQLException("db down")); + + Map result = tools.tagCloud(); + + assertEquals("db down", result.get("error")); + } + + // ── suggestTags ─────────────────────────────────────────────────────────── + + @Test + void suggestTagsDelegates() throws Exception { + when(service.suggestTags("a1")) + .thenReturn(List.of(Map.of("tag", "security", "category", "concern", "frequency", 3))); + + Map result = tools.suggestTags("a1"); + + assertEquals("a1", result.get("artifact_id")); + assertEquals(1, result.get("count")); + verify(service).suggestTags("a1"); + } + + @Test + void suggestTagsReturnsErrorOnException() throws Exception { + when(service.suggestTags("a1")).thenThrow(new SQLException("db down")); + + Map result = tools.suggestTags("a1"); + + assertEquals("db down", result.get("error")); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/SessionToolsTest.java b/src/test/java/com/javaducker/server/mcp/SessionToolsTest.java new file mode 100644 index 0000000..758cc0e --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/SessionToolsTest.java @@ -0,0 +1,171 @@ +package com.javaducker.server.mcp; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.javaducker.server.service.ContentIntelligenceService; +import com.javaducker.server.service.SearchService; +import com.javaducker.server.service.SessionIngestionService; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Spy; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.sql.SQLException; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class SessionToolsTest { + + @Mock + private SessionIngestionService sessionIngestionService; + + @Mock + private SearchService searchService; + + @Mock + private ContentIntelligenceService contentIntelligenceService; + + @Spy + private ObjectMapper objectMapper = new ObjectMapper(); + + @InjectMocks + private SessionTools sessionTools; + + @Test + void indexSessions_callsIndexSessionsWithMaxSessions() throws SQLException { + Map expected = Map.of("indexed", 5); + when(sessionIngestionService.indexSessions("/project", 10)).thenReturn(expected); + + Map result = sessionTools.indexSessions("/project", 10, null); + + assertEquals(expected, result); + verify(sessionIngestionService).indexSessions("/project", 10); + } + + @Test + void indexSessions_defaultMaxSessionsIsMaxValue() throws SQLException { + Map expected = Map.of("indexed", 100); + when(sessionIngestionService.indexSessions("/project", Integer.MAX_VALUE)).thenReturn(expected); + + Map result = sessionTools.indexSessions("/project", null, null); + + assertEquals(expected, result); + verify(sessionIngestionService).indexSessions("/project", Integer.MAX_VALUE); + } + + @Test + void indexSessions_incrementalCallsIncrementalMethod() throws SQLException { + Map expected = Map.of("indexed", 2, "skipped", 8); + when(sessionIngestionService.indexSessionsIncremental("/project", Integer.MAX_VALUE)) + .thenReturn(expected); + + Map result = sessionTools.indexSessions("/project", null, "true"); + + assertEquals(expected, result); + verify(sessionIngestionService).indexSessionsIncremental("/project", Integer.MAX_VALUE); + verify(sessionIngestionService, never()).indexSessions(anyString(), anyInt()); + } + + @Test + void indexSessions_returnsErrorOnException() throws SQLException { + when(sessionIngestionService.indexSessions("/bad", Integer.MAX_VALUE)) + .thenThrow(new SQLException("db error")); + + Map result = sessionTools.indexSessions("/bad", null, null); + + assertEquals("db error", result.get("error")); + } + + @Test + void searchSessions_passesPhraseAndMaxResults() throws SQLException { + List> sessions = List.of( + Map.of("session_id", "s1", "snippet", "found it")); + when(sessionIngestionService.searchSessions("reladomo", 20)).thenReturn(sessions); + + Map result = sessionTools.searchSessions("reladomo", null); + + assertEquals(1, result.get("count")); + assertEquals(sessions, result.get("results")); + verify(sessionIngestionService).searchSessions("reladomo", 20); + } + + @Test + void searchSessions_passesExplicitMaxResults() throws SQLException { + List> sessions = List.of(); + when(sessionIngestionService.searchSessions("query", 5)).thenReturn(sessions); + + Map result = sessionTools.searchSessions("query", 5); + + assertEquals(0, result.get("count")); + verify(sessionIngestionService).searchSessions("query", 5); + } + + @Test + void sessionContext_combinesSessionAndSemanticResults() throws Exception { + List> sessionResults = List.of(Map.of("id", "s1")); + List> semanticResults = List.of(Map.of("id", "a1")); + when(sessionIngestionService.searchSessions("caching", 10)).thenReturn(sessionResults); + when(searchService.semanticSearch("caching", 5)).thenReturn(semanticResults); + + Map result = sessionTools.sessionContext("caching"); + + assertEquals(sessionResults, result.get("session_results")); + assertEquals(semanticResults, result.get("semantic_results")); + verify(sessionIngestionService).searchSessions("caching", 10); + verify(searchService).semanticSearch("caching", 5); + } + + @Test + @SuppressWarnings("unchecked") + void extractDecisions_parsesJsonAndDelegates() throws Exception { + String json = "[{\"text\":\"Use DuckDB\",\"context\":\"storage layer\",\"tags\":\"architecture\"}]"; + Map expected = Map.of("stored", 1); + when(sessionIngestionService.storeDecisions(eq("session-1"), anyList())).thenReturn(expected); + + Map result = sessionTools.extractDecisions("session-1", json); + + assertEquals(expected, result); + verify(sessionIngestionService).storeDecisions(eq("session-1"), argThat(list -> { + List> decisions = (List>) list; + return decisions.size() == 1 + && "Use DuckDB".equals(decisions.get(0).get("text")) + && "storage layer".equals(decisions.get(0).get("context")) + && "architecture".equals(decisions.get(0).get("tags")); + })); + } + + @Test + void extractDecisions_returnsErrorOnBadJson() { + Map result = sessionTools.extractDecisions("session-1", "not-json"); + + assertNotNull(result.get("error")); + } + + @Test + void recentDecisions_passesMaxSessionsAndTag() throws SQLException { + List> decisions = List.of(Map.of("text", "Use caching")); + when(sessionIngestionService.getRecentDecisions(3, "performance")).thenReturn(decisions); + + Map result = sessionTools.recentDecisions(3, "performance"); + + assertEquals(1, result.get("count")); + assertEquals(decisions, result.get("results")); + verify(sessionIngestionService).getRecentDecisions(3, "performance"); + } + + @Test + void recentDecisions_defaultMaxSessionsIs5() throws SQLException { + List> decisions = List.of(); + when(sessionIngestionService.getRecentDecisions(5, null)).thenReturn(decisions); + + Map result = sessionTools.recentDecisions(null, null); + + assertEquals(0, result.get("count")); + verify(sessionIngestionService).getRecentDecisions(5, null); + } +} diff --git a/src/test/java/com/javaducker/server/mcp/WatchToolsTest.java b/src/test/java/com/javaducker/server/mcp/WatchToolsTest.java new file mode 100644 index 0000000..27efa5f --- /dev/null +++ b/src/test/java/com/javaducker/server/mcp/WatchToolsTest.java @@ -0,0 +1,119 @@ +package com.javaducker.server.mcp; + +import com.javaducker.server.ingestion.FileWatcher; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.nio.file.Path; +import java.util.Map; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +class WatchToolsTest { + + @Mock FileWatcher fileWatcher; + + @InjectMocks WatchTools tools; + + // ── start ──────────────────────────────────────────────────────────── + + @Test + @SuppressWarnings("unchecked") + void watch_startCallsStartWatching() throws Exception { + Map result = tools.watch("start", "/project/src", ".java,.xml"); + + assertEquals("start", result.get("action")); + assertEquals(true, result.get("watching")); + + ArgumentCaptor pathCaptor = ArgumentCaptor.forClass(Path.class); + ArgumentCaptor extCaptor = ArgumentCaptor.forClass(Set.class); + verify(fileWatcher).startWatching(pathCaptor.capture(), extCaptor.capture()); + + assertEquals(Path.of("/project/src"), pathCaptor.getValue()); + Set exts = extCaptor.getValue(); + assertTrue(exts.contains(".java")); + assertTrue(exts.contains(".xml")); + } + + @Test + void watch_startRequiresDirectory() { + Map result = tools.watch("start", null, null); + + assertTrue(result.containsKey("error")); + assertTrue(((String) result.get("error")).contains("directory")); + } + + @Test + void watch_startWithNoExtensions() throws Exception { + Map result = tools.watch("start", "/project", null); + + assertEquals(true, result.get("watching")); + verify(fileWatcher).startWatching(eq(Path.of("/project")), eq(Set.of())); + } + + // ── stop ───────────────────────────────────────────────────────────── + + @Test + void watch_stopCallsStopWatching() { + Map result = tools.watch("stop", null, null); + + assertEquals("stop", result.get("action")); + assertEquals(false, result.get("watching")); + verify(fileWatcher).stopWatching(); + } + + // ── status ─────────────────────────────────────────────────────────── + + @Test + void watch_statusReturnsWatchingState() { + when(fileWatcher.isWatching()).thenReturn(true); + when(fileWatcher.getWatchedDirectory()).thenReturn(Path.of("/project/src")); + + Map result = tools.watch("status", null, null); + + assertEquals("status", result.get("action")); + assertEquals(true, result.get("watching")); + assertEquals(Path.of("/project/src").toString(), result.get("directory")); + } + + @Test + void watch_statusWhenNotWatching() { + when(fileWatcher.isWatching()).thenReturn(false); + when(fileWatcher.getWatchedDirectory()).thenReturn(null); + + Map result = tools.watch("status", null, null); + + assertEquals(false, result.get("watching")); + assertNull(result.get("directory")); + } + + // ── unknown action ─────────────────────────────────────────────────── + + @Test + void watch_unknownActionReturnsError() { + Map result = tools.watch("restart", null, null); + + assertTrue(result.containsKey("error")); + assertTrue(((String) result.get("error")).contains("restart")); + } + + // ── exception handling ─────────────────────────────────────────────── + + @Test + void watch_startReturnsErrorOnException() throws Exception { + doThrow(new RuntimeException("IO error")).when(fileWatcher).startWatching(any(), any()); + + Map result = tools.watch("start", "/bad/dir", null); + + assertEquals("IO error", result.get("error")); + } +} diff --git a/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerExtendedTest.java b/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerExtendedTest.java index ac6e686..5fd1b2d 100644 --- a/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerExtendedTest.java +++ b/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerExtendedTest.java @@ -39,6 +39,8 @@ class JavaDuckerRestControllerExtendedTest { @MockBean GitBlameService gitBlameService; @MockBean CoChangeService coChangeService; @MockBean SessionIngestionService sessionIngestionService; + @MockBean SemanticTagService semanticTagService; + @MockBean KnowledgeGraphService knowledgeGraphService; // ── Search with staleness banner ───────────────────────────────────── diff --git a/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerTest.java b/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerTest.java index 5a99b4c..304e253 100644 --- a/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerTest.java +++ b/src/test/java/com/javaducker/server/rest/JavaDuckerRestControllerTest.java @@ -39,6 +39,8 @@ class JavaDuckerRestControllerTest { @MockBean GitBlameService gitBlameService; @MockBean CoChangeService coChangeService; @MockBean SessionIngestionService sessionIngestionService; + @MockBean SemanticTagService semanticTagService; + @MockBean KnowledgeGraphService knowledgeGraphService; @Test void healthReturnsOk() throws Exception { diff --git a/src/test/java/com/javaducker/server/service/CommunityDetectionServiceTest.java b/src/test/java/com/javaducker/server/service/CommunityDetectionServiceTest.java new file mode 100644 index 0000000..6458520 --- /dev/null +++ b/src/test/java/com/javaducker/server/service/CommunityDetectionServiceTest.java @@ -0,0 +1,185 @@ +package com.javaducker.server.service; + +import com.javaducker.server.config.AppConfig; +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.db.SchemaBootstrap; +import com.javaducker.server.ingestion.*; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class CommunityDetectionServiceTest { + + @TempDir + static Path tempDir; + + static DuckDBDataSource dataSource; + static KnowledgeGraphService kgService; + static CommunityDetectionService service; + + @BeforeAll + static void setup() throws Exception { + AppConfig config = new AppConfig(); + config.setDbPath(tempDir.resolve("test-cd.duckdb").toString()); + config.setIntakeDir(tempDir.resolve("intake").toString()); + dataSource = new DuckDBDataSource(config); + ArtifactService artifactService = new ArtifactService(dataSource); + SearchService searchService = new SearchService(dataSource, new EmbeddingService(config), config); + IngestionWorker worker = new IngestionWorker(dataSource, artifactService, + new TextExtractor(), new TextNormalizer(), new Chunker(), + new EmbeddingService(config), new FileSummarizer(), new ImportParser(), + new ReladomoXmlParser(), new ReladomoService(dataSource), + new ReladomoFinderParser(), new ReladomoConfigParser(), + searchService, config); + SchemaBootstrap bootstrap = new SchemaBootstrap(dataSource, config, worker); + bootstrap.createSchema(); + EmbeddingService embeddingService = new EmbeddingService(config); + kgService = new KnowledgeGraphService(dataSource, embeddingService); + service = new CommunityDetectionService(dataSource); + + // Seed two clusters: + // Cluster 1: AuthService <-> TokenValidator <-> LoginController (auth cluster) + kgService.upsertEntity("AuthService", "class", "Authentication service", "art-1", null); + kgService.upsertEntity("TokenValidator", "class", "Validates JWT tokens", "art-1", null); + kgService.upsertEntity("LoginController", "class", "Handles login", "art-1", null); + kgService.upsertRelationship("class-authservice", "class-tokenvalidator", + "uses", "auth uses token", "art-1", null, 1.0); + kgService.upsertRelationship("class-logincontroller", "class-authservice", + "calls", "login calls auth", "art-1", null, 1.0); + + // Cluster 2: PaymentService <-> StripeClient (payment cluster) + kgService.upsertEntity("PaymentService", "class", "Processes payments", "art-1", null); + kgService.upsertEntity("StripeClient", "class", "Stripe API client", "art-1", null); + kgService.upsertRelationship("class-paymentservice", "class-stripeclient", + "uses", "payment uses stripe", "art-1", null, 1.0); + } + + @AfterAll + static void teardown() throws Exception { + dataSource.close(); + } + + @Test + @Order(1) + void detectCommunitiesFindsCluster() throws Exception { + var result = service.detectCommunities(); + int detected = ((Number) result.get("communities_detected")).intValue(); + assertTrue(detected >= 2, "Expected at least 2 communities, got " + detected); + } + + @Test + @Order(2) + void authEntitiesInSameCommunity() throws Exception { + var communities = service.getCommunities(); + // Find the community containing AuthService + String authCommunityId = null; + List authMembers = null; + for (Map c : communities) { + List ids = CommunityDetectionService.parseJsonArray( + (String) c.get("entity_ids")); + if (ids.contains("class-authservice")) { + authCommunityId = (String) c.get("community_id"); + authMembers = ids; + break; + } + } + assertNotNull(authCommunityId, "AuthService should be in a community"); + assertTrue(authMembers.contains("class-tokenvalidator"), + "TokenValidator should be in same community as AuthService"); + assertTrue(authMembers.contains("class-logincontroller"), + "LoginController should be in same community as AuthService"); + } + + @Test + @Order(3) + void paymentEntitiesInSameCommunity() throws Exception { + var communities = service.getCommunities(); + String payCommunityId = null; + List payMembers = null; + for (Map c : communities) { + List ids = CommunityDetectionService.parseJsonArray( + (String) c.get("entity_ids")); + if (ids.contains("class-paymentservice")) { + payCommunityId = (String) c.get("community_id"); + payMembers = ids; + break; + } + } + assertNotNull(payCommunityId, "PaymentService should be in a community"); + assertTrue(payMembers.contains("class-stripeclient"), + "StripeClient should be in same community as PaymentService"); + } + + @Test + @Order(4) + void getCommunityReturnsMemberDetails() throws Exception { + var communities = service.getCommunities(); + assertFalse(communities.isEmpty(), "Should have communities"); + String firstId = (String) communities.get(0).get("community_id"); + + var community = service.getCommunity(firstId); + assertNotNull(community, "getCommunity should return a result"); + assertNotNull(community.get("entity_ids"), "Should have entity_ids"); + @SuppressWarnings("unchecked") + List> members = + (List>) community.get("members"); + assertNotNull(members, "Should have members list"); + assertFalse(members.isEmpty(), "Members list should not be empty"); + // Each member should have entity details + for (Map member : members) { + assertNotNull(member.get("entity_id")); + assertNotNull(member.get("entity_name")); + } + } + + @Test + @Order(5) + void summarizeCommunityStoresSummary() throws Exception { + var communities = service.getCommunities(); + String firstId = (String) communities.get(0).get("community_id"); + String summaryText = "This community contains authentication-related classes."; + + var result = service.summarizeCommunity(firstId, summaryText); + assertEquals(firstId, result.get("community_id")); + assertTrue((Boolean) result.get("summary_stored")); + + // Retrieve and verify + var community = service.getCommunity(firstId); + assertEquals(summaryText, community.get("summary")); + } + + @Test + @Order(6) + void rebuildClearsPreviousCommunities() throws Exception { + // First detect (already done), then rebuild + var result = service.rebuildCommunities(); + int detected = ((Number) result.get("communities_detected")).intValue(); + assertTrue(detected >= 2, "Rebuild should re-detect communities"); + + // Verify communities exist and summaries are cleared (fresh detection) + var communities = service.getCommunities(); + assertFalse(communities.isEmpty()); + } + + @Test + @Order(7) + void listCommunitiesReturnsAll() throws Exception { + // Ensure detection has run + service.detectCommunities(); + var communities = service.getCommunities(); + assertTrue(communities.size() >= 2, + "Should list at least 2 communities, got " + communities.size()); + // Each community should have member_count + for (Map c : communities) { + assertNotNull(c.get("community_id")); + assertNotNull(c.get("community_name")); + assertTrue(((Number) c.get("member_count")).intValue() >= 2); + } + } +} diff --git a/src/test/java/com/javaducker/server/service/ExplainServiceTest.java b/src/test/java/com/javaducker/server/service/ExplainServiceTest.java index bdf3717..ce0b930 100644 --- a/src/test/java/com/javaducker/server/service/ExplainServiceTest.java +++ b/src/test/java/com/javaducker/server/service/ExplainServiceTest.java @@ -42,8 +42,10 @@ static void setup() throws Exception { SchemaBootstrap bootstrap = new SchemaBootstrap(dataSource, config, worker); bootstrap.createSchema(); + SemanticTagService semanticTagService = new SemanticTagService(dataSource); + KnowledgeGraphService knowledgeGraphService = new KnowledgeGraphService(dataSource, new EmbeddingService(config)); explainService = new ExplainService(artifactService, dependencyService, - ciService, dataSource, null, null); + ciService, dataSource, semanticTagService, knowledgeGraphService, null, null); } @AfterAll diff --git a/src/test/java/com/javaducker/server/service/GraphSearchServiceTest.java b/src/test/java/com/javaducker/server/service/GraphSearchServiceTest.java new file mode 100644 index 0000000..425b002 --- /dev/null +++ b/src/test/java/com/javaducker/server/service/GraphSearchServiceTest.java @@ -0,0 +1,263 @@ +package com.javaducker.server.service; + +import com.javaducker.server.config.AppConfig; +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.db.SchemaBootstrap; +import com.javaducker.server.ingestion.*; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.sql.Connection; +import java.sql.Statement; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +class GraphSearchServiceTest { + + @TempDir + static Path tempDir; + + static DuckDBDataSource dataSource; + static EmbeddingService embeddingService; + static KnowledgeGraphService kgService; + static GraphSearchService graphSearchService; + + @BeforeAll + static void setup() throws Exception { + AppConfig config = new AppConfig(); + config.setDbPath(tempDir.resolve("test-graphsearch.duckdb").toString()); + config.setIntakeDir(tempDir.resolve("intake").toString()); + dataSource = new DuckDBDataSource(config); + embeddingService = new EmbeddingService(config); + + ArtifactService artifactService = new ArtifactService(dataSource); + SearchService searchService = new SearchService(dataSource, embeddingService, config); + IngestionWorker worker = new IngestionWorker(dataSource, artifactService, + new TextExtractor(), new TextNormalizer(), new Chunker(), + embeddingService, new FileSummarizer(), new ImportParser(), + new ReladomoXmlParser(), new ReladomoService(dataSource), + new ReladomoFinderParser(), new ReladomoConfigParser(), + searchService, config); + SchemaBootstrap bootstrap = new SchemaBootstrap(dataSource, config, worker); + bootstrap.createSchema(); + + kgService = new KnowledgeGraphService(dataSource, embeddingService); + graphSearchService = new GraphSearchService(dataSource, embeddingService, kgService); + + seedTestData(); + } + + @AfterAll + static void teardown() throws Exception { + dataSource.close(); + } + + static void seedTestData() throws Exception { + Connection conn = dataSource.getConnection(); + + // Seed artifacts + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at) + VALUES ('gs-art-1', 'SearchService.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at) + VALUES ('gs-art-2', 'UserRepository.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + } + + // Seed entities with embeddings via KnowledgeGraphService (which generates embeddings) + kgService.upsertEntity("SearchService", "class", + "Service that performs full-text and semantic search across indexed code artifacts", + "gs-art-1", null); + kgService.upsertEntity("UserRepository", "class", + "Repository for accessing user data from the database", + "gs-art-2", null); + kgService.upsertEntity("EmbeddingService", "class", + "Service that generates TF-IDF hash embeddings for text content", + "gs-art-1", null); + + // Seed relationships with embeddings + kgService.upsertRelationship("class-searchservice", "class-embeddingservice", + "uses", "SearchService uses EmbeddingService to generate query embeddings", + "gs-art-1", null, 1.0); + kgService.upsertRelationship("class-userrepository", "class-searchservice", + "depends-on", "UserRepository depends on SearchService for search functionality", + "gs-art-2", null, 1.0); + + // Seed artifact chunks with embeddings for mix search + double[] chunkEmb = embeddingService.embed("search service handles full-text queries"); + String embSql = embeddingToSql(chunkEmb); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + INSERT INTO artifact_chunks (chunk_id, artifact_id, chunk_index, chunk_text, line_start, line_end) + VALUES ('gs-chunk-1', 'gs-art-1', 0, 'public class SearchService implements search functionality for indexed code artifacts', 1, 10) + """); + stmt.execute(""" + INSERT INTO artifact_chunks (chunk_id, artifact_id, chunk_index, chunk_text, line_start, line_end) + VALUES ('gs-chunk-2', 'gs-art-2', 0, 'public class UserRepository provides data access for user entities', 1, 8) + """); + stmt.execute("INSERT INTO chunk_embeddings (chunk_id, embedding) VALUES ('gs-chunk-1', " + + embSql + ")"); + double[] chunk2Emb = embeddingService.embed("user repository data access entities"); + stmt.execute("INSERT INTO chunk_embeddings (chunk_id, embedding) VALUES ('gs-chunk-2', " + + embeddingToSql(chunk2Emb) + ")"); + } + } + + private static String embeddingToSql(double[] embedding) { + if (embedding == null) return "NULL"; + StringBuilder sb = new StringBuilder("["); + for (int i = 0; i < embedding.length; i++) { + if (i > 0) sb.append(","); + sb.append(embedding[i]); + } + sb.append("]::DOUBLE[]"); + return sb.toString(); + } + + // ── Tests ──────────────────────────────────────────────────────────────── + + @Test + void localSearchFindsRelevantEntities() throws Exception { + List> results = graphSearchService.localSearch("search service", 10); + + assertFalse(results.isEmpty(), "Expected at least one local search result"); + // The top result should be SearchService (most relevant to "search service") + Map top = results.get(0); + assertEquals("SearchService", top.get("entity_name")); + assertEquals("class", top.get("entity_type")); + assertTrue((double) top.get("score") > 0); + assertEquals("LOCAL", top.get("match_type")); + // Should have relationships attached + assertNotNull(top.get("relationships")); + } + + @Test + void globalSearchFindsRelationships() throws Exception { + List> results = graphSearchService.globalSearch("embedding query generation", 10); + + assertFalse(results.isEmpty(), "Expected at least one global search result"); + Map top = results.get(0); + assertNotNull(top.get("relationship_id")); + assertNotNull(top.get("relationship_type")); + assertTrue((double) top.get("score") > 0); + assertEquals("GLOBAL", top.get("match_type")); + } + + @Test + void hybridGraphSearchCombinesBoth() throws Exception { + List> results = graphSearchService.hybridGraphSearch("search service embeddings", 10); + + assertFalse(results.isEmpty(), "Expected at least one hybrid graph result"); + // Should contain results from both local and global + boolean hasHybrid = results.stream() + .anyMatch(r -> "GRAPH_HYBRID".equals(r.get("match_type"))); + assertTrue(hasHybrid, "Expected GRAPH_HYBRID match type in results"); + } + + @Test + void mixSearchCombinesGraphAndChunks() throws Exception { + List> results = graphSearchService.mixSearch("search service", 10); + + assertFalse(results.isEmpty(), "Expected at least one mix search result"); + boolean hasMix = results.stream() + .anyMatch(r -> "MIX".equals(r.get("match_type"))); + assertTrue(hasMix, "Expected MIX match type in results"); + } + + @Test + void localSearchReturnsEmptyWhenNoEntitiesHaveEmbeddings() throws Exception { + // Create a separate service with its own empty database + AppConfig emptyConfig = new AppConfig(); + emptyConfig.setDbPath(tempDir.resolve("test-empty.duckdb").toString()); + emptyConfig.setIntakeDir(tempDir.resolve("intake2").toString()); + DuckDBDataSource emptyDs = new DuckDBDataSource(emptyConfig); + EmbeddingService emptyEmb = new EmbeddingService(emptyConfig); + + ArtifactService emptyArtService = new ArtifactService(emptyDs); + SearchService emptySearch = new SearchService(emptyDs, emptyEmb, emptyConfig); + IngestionWorker emptyWorker = new IngestionWorker(emptyDs, emptyArtService, + new TextExtractor(), new TextNormalizer(), new Chunker(), + emptyEmb, new FileSummarizer(), new ImportParser(), + new ReladomoXmlParser(), new ReladomoService(emptyDs), + new ReladomoFinderParser(), new ReladomoConfigParser(), + emptySearch, emptyConfig); + new SchemaBootstrap(emptyDs, emptyConfig, emptyWorker).createSchema(); + + KnowledgeGraphService emptyKg = new KnowledgeGraphService(emptyDs, emptyEmb); + GraphSearchService emptyGraphSearch = new GraphSearchService(emptyDs, emptyEmb, emptyKg); + + List> results = emptyGraphSearch.localSearch("anything", 10); + assertTrue(results.isEmpty(), "Expected empty results when no entities exist"); + emptyDs.close(); + } + + @Test + void searchFiltersNullEmbeddings() throws Exception { + // Insert entity WITHOUT embedding (null description -> null embedding) + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + INSERT INTO entities (entity_id, entity_name, entity_type, description, + source_artifact_ids, mention_count, embedding, created_at, updated_at) + VALUES ('class-noembedding', 'NoEmbedding', 'class', NULL, + '["gs-art-1"]', 1, NULL, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + } + + List> results = graphSearchService.localSearch("search", 100); + + // The entity with null embedding should NOT appear + boolean hasNullEntity = results.stream() + .anyMatch(r -> "NoEmbedding".equals(r.get("entity_name"))); + assertFalse(hasNullEntity, "Entity with null embedding should be filtered out"); + + // Cleanup + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entities WHERE entity_id = 'class-noembedding'"); + } + } + + @Test + void cosineSimilarityHandlesNulls() { + assertEquals(0.0, GraphSearchService.cosineSimilarity(null, new double[]{1})); + assertEquals(0.0, GraphSearchService.cosineSimilarity(new double[]{1}, null)); + } + + @Test + void cosineSimilarityHandlesLengthMismatch() { + assertEquals(0.0, GraphSearchService.cosineSimilarity(new double[]{1, 2}, new double[]{1})); + } + + @Test + void parseJsonArrayHandlesVariousInputs() { + assertEquals(List.of(), GraphSearchService.parseJsonArray(null)); + assertEquals(List.of(), GraphSearchService.parseJsonArray("")); + assertEquals(List.of(), GraphSearchService.parseJsonArray("[]")); + assertEquals(List.of("a", "b"), GraphSearchService.parseJsonArray("[\"a\",\"b\"]")); + assertEquals(List.of("single"), GraphSearchService.parseJsonArray("[\"single\"]")); + } + + @Test + void localSearchRespectsTopK() throws Exception { + List> results = graphSearchService.localSearch("service", 1); + + assertTrue(results.size() <= 1, "Should respect topK limit of 1"); + } + + @Test + void chunkSearchFindsChunks() throws Exception { + List> results = graphSearchService.chunkSearch("search service", 10); + + assertFalse(results.isEmpty(), "Expected chunk search results"); + Map top = results.get(0); + assertNotNull(top.get("chunk_id")); + assertNotNull(top.get("artifact_id")); + assertEquals("CHUNK", top.get("match_type")); + } +} diff --git a/src/test/java/com/javaducker/server/service/GraphUpdateServiceTest.java b/src/test/java/com/javaducker/server/service/GraphUpdateServiceTest.java new file mode 100644 index 0000000..4fa0ae7 --- /dev/null +++ b/src/test/java/com/javaducker/server/service/GraphUpdateServiceTest.java @@ -0,0 +1,171 @@ +package com.javaducker.server.service; + +import com.javaducker.server.config.AppConfig; +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.db.SchemaBootstrap; +import com.javaducker.server.ingestion.*; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.sql.*; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class GraphUpdateServiceTest { + + @TempDir + static Path tempDir; + + static DuckDBDataSource dataSource; + static KnowledgeGraphService kgService; + static GraphUpdateService service; + + @BeforeAll + static void setup() throws Exception { + AppConfig config = new AppConfig(); + config.setDbPath(tempDir.resolve("test-gu.duckdb").toString()); + config.setIntakeDir(tempDir.resolve("intake").toString()); + dataSource = new DuckDBDataSource(config); + ArtifactService artifactService = new ArtifactService(dataSource); + SearchService searchService = new SearchService(dataSource, new EmbeddingService(config), config); + IngestionWorker worker = new IngestionWorker(dataSource, artifactService, + new TextExtractor(), new TextNormalizer(), new Chunker(), + new EmbeddingService(config), new FileSummarizer(), new ImportParser(), + new ReladomoXmlParser(), new ReladomoService(dataSource), + new ReladomoFinderParser(), new ReladomoConfigParser(), + searchService, config); + SchemaBootstrap bootstrap = new SchemaBootstrap(dataSource, config, worker); + bootstrap.createSchema(); + + EmbeddingService embeddingService = new EmbeddingService(config); + kgService = new KnowledgeGraphService(dataSource, embeddingService); + service = new GraphUpdateService(dataSource, kgService); + + // Seed test artifacts + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at, indexed_at) + VALUES ('gu-art-1', 'Service.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at, indexed_at) + VALUES ('gu-art-2', 'Controller.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + } + } + + @AfterAll + static void teardown() throws Exception { + dataSource.close(); + } + + @Test + @Order(1) + void onArtifactReindexedRemovesExclusiveEntities() throws Exception { + // Create entity sourced only from gu-art-1 + kgService.upsertEntity("ExclusiveService", "class", + "Only from art-1", "gu-art-1", null); + assertNotNull(kgService.getEntity("class-exclusiveservice")); + + // Reindex gu-art-1 - should remove exclusive entity + Map result = service.onArtifactReindexed("gu-art-1"); + assertTrue(((Number) result.get("deleted_entities")).intValue() >= 1); + assertNull(kgService.getEntity("class-exclusiveservice"), + "Entity sourced only from gu-art-1 should be deleted after reindex"); + } + + @Test + @Order(2) + void onArtifactReindexedPreservesSharedEntities() throws Exception { + // Create entity sourced from both artifacts + kgService.upsertEntity("SharedController", "class", + "Shared between artifacts", "gu-art-1", null); + kgService.upsertEntity("SharedController", "class", + "Shared between artifacts", "gu-art-2", null); + + // Verify mention_count is 2 + var entity = kgService.getEntity("class-sharedcontroller"); + assertNotNull(entity); + assertEquals(2, ((Number) entity.get("mention_count")).intValue()); + + // Reindex gu-art-1 - shared entity should survive + service.onArtifactReindexed("gu-art-1"); + + var surviving = kgService.getEntity("class-sharedcontroller"); + assertNotNull(surviving, "Shared entity should survive reindex"); + assertEquals(1, ((Number) surviving.get("mention_count")).intValue()); + String sources = (String) surviving.get("source_artifact_ids"); + assertFalse(sources.contains("gu-art-1")); + assertTrue(sources.contains("gu-art-2")); + } + + @Test + @Order(3) + void onArtifactDeletedRemovesEntities() throws Exception { + // Create entity sourced only from gu-art-2 + kgService.upsertEntity("DeleteTarget", "service", + "Only from art-2", "gu-art-2", null); + assertNotNull(kgService.getEntity("service-deletetarget")); + + // Delete gu-art-2 entities + Map result = service.onArtifactDeleted("gu-art-2"); + assertTrue(((Number) result.get("deleted_entities")).intValue() >= 1); + assertNull(kgService.getEntity("service-deletetarget"), + "Entity should be deleted when artifact is deleted"); + } + + @Test + @Order(4) + void findStaleGraphEntries() throws Exception { + // Create a fresh entity + kgService.upsertEntity("FreshEntity", "class", + "A fresh entity", "gu-art-1", null); + assertNotNull(kgService.getEntity("class-freshentity")); + + // Update artifact indexed_at to the future so entity appears stale + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + UPDATE artifacts SET indexed_at = CURRENT_TIMESTAMP + INTERVAL '1' HOUR + WHERE artifact_id = 'gu-art-1' + """); + } + + List> stale = service.findStaleGraphEntries(); + assertFalse(stale.isEmpty(), "Should find stale entries when artifact was re-indexed"); + boolean found = stale.stream().anyMatch(e -> + "class-freshentity".equals(e.get("entity_id"))); + assertTrue(found, "FreshEntity should appear stale after artifact re-indexed"); + } + + @Test + @Order(5) + void noStaleEntriesWhenUpToDate() throws Exception { + // Reset indexed_at to past so no entity is stale + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + UPDATE artifacts SET indexed_at = CURRENT_TIMESTAMP - INTERVAL '1' HOUR + WHERE artifact_id = 'gu-art-1' + """); + } + + // Delete all entities to start clean, then create one with current timestamp + try (Statement stmt = conn.createStatement()) { + stmt.execute("DELETE FROM entities"); + } + kgService.upsertEntity("UpToDateEntity", "class", + "An up-to-date entity", "gu-art-1", null); + + List> stale = service.findStaleGraphEntries(); + boolean foundUpToDate = stale.stream().anyMatch(e -> + "class-uptodateentity".equals(e.get("entity_id"))); + assertFalse(foundUpToDate, + "Entity should not be stale when artifact indexed_at is in the past"); + } +} diff --git a/src/test/java/com/javaducker/server/service/KnowledgeGraphServiceTest.java b/src/test/java/com/javaducker/server/service/KnowledgeGraphServiceTest.java new file mode 100644 index 0000000..26d5703 --- /dev/null +++ b/src/test/java/com/javaducker/server/service/KnowledgeGraphServiceTest.java @@ -0,0 +1,328 @@ +package com.javaducker.server.service; + +import com.javaducker.server.config.AppConfig; +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.db.SchemaBootstrap; +import com.javaducker.server.ingestion.*; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.sql.*; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class KnowledgeGraphServiceTest { + + @TempDir + static Path tempDir; + + static DuckDBDataSource dataSource; + static KnowledgeGraphService service; + + @BeforeAll + static void setup() throws Exception { + AppConfig config = new AppConfig(); + config.setDbPath(tempDir.resolve("test-kg.duckdb").toString()); + config.setIntakeDir(tempDir.resolve("intake").toString()); + dataSource = new DuckDBDataSource(config); + ArtifactService artifactService = new ArtifactService(dataSource); + SearchService searchService = new SearchService(dataSource, new EmbeddingService(config), config); + IngestionWorker worker = new IngestionWorker(dataSource, artifactService, + new TextExtractor(), new TextNormalizer(), new Chunker(), + new EmbeddingService(config), new FileSummarizer(), new ImportParser(), + new ReladomoXmlParser(), new ReladomoService(dataSource), + new ReladomoFinderParser(), new ReladomoConfigParser(), + searchService, config); + SchemaBootstrap bootstrap = new SchemaBootstrap(dataSource, config, worker); + bootstrap.createSchema(); + EmbeddingService embeddingService = new EmbeddingService(config); + service = new KnowledgeGraphService(dataSource, embeddingService); + + // Seed test artifacts + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at) + VALUES ('kg-art-1', 'SearchService.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at) + VALUES ('kg-art-2', 'SearchController.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + } + } + + @AfterAll + static void teardown() throws Exception { + dataSource.close(); + } + + @Test + @Order(1) + void upsertNewEntity() throws Exception { + var result = service.upsertEntity("SearchService", "class", + "Service that handles search operations", "kg-art-1", "chunk-1"); + assertNotNull(result.get("entity_id")); + assertEquals("SearchService", result.get("entity_name")); + assertEquals("class", result.get("entity_type")); + assertEquals("created", result.get("action")); + assertEquals(1, ((Number) result.get("mention_count")).intValue()); + } + + @Test + @Order(2) + void upsertExistingEntityMerges() throws Exception { + var result = service.upsertEntity("SearchService", "class", + "Service handling full-text and semantic search operations across artifacts", + "kg-art-2", "chunk-2"); + assertEquals("merged", result.get("action")); + assertEquals(2, ((Number) result.get("mention_count")).intValue()); + + // Verify source_artifact_ids has both + var entity = service.getEntity((String) result.get("entity_id")); + assertNotNull(entity); + String sources = (String) entity.get("source_artifact_ids"); + assertTrue(sources.contains("kg-art-1")); + assertTrue(sources.contains("kg-art-2")); + } + + @Test + @Order(3) + void upsertRelationship() throws Exception { + // Create a second entity first + service.upsertEntity("SearchController", "class", + "REST controller for search endpoints", "kg-art-2", null); + + var result = service.upsertRelationship("class-searchservice", "class-searchcontroller", + "USED_BY", "SearchService is used by SearchController", + "kg-art-2", null, 1.0); + assertNotNull(result.get("relationship_id")); + assertEquals("created", result.get("action")); + } + + @Test + @Order(4) + void getEntityById() throws Exception { + var entity = service.getEntity("class-searchservice"); + assertNotNull(entity); + assertEquals("SearchService", entity.get("entity_name")); + assertEquals("class", entity.get("entity_type")); + } + + @Test + @Order(5) + void findEntitiesByName() throws Exception { + var results = service.findEntitiesByName("Search"); + assertFalse(results.isEmpty()); + assertTrue(results.stream().anyMatch(e -> "SearchService".equals(e.get("entity_name")))); + } + + @Test + @Order(6) + void findEntitiesByType() throws Exception { + var results = service.findEntitiesByType("class"); + assertFalse(results.isEmpty()); + assertTrue(results.stream().allMatch(e -> "class".equals(e.get("entity_type")))); + } + + @Test + @Order(7) + void getRelationshipsForEntity() throws Exception { + var rels = service.getRelationships("class-searchservice"); + assertFalse(rels.isEmpty()); + assertEquals("USED_BY", rels.get(0).get("relationship_type")); + } + + @Test + @Order(8) + void getNeighborhood() throws Exception { + // Add a third entity and chain: SearchService -> SearchController -> SearchConfig + service.upsertEntity("SearchConfig", "class", + "Configuration for search features", "kg-art-1", null); + service.upsertRelationship("class-searchcontroller", "class-searchconfig", + "CONFIGURES", "Controller uses config", "kg-art-1", null, 1.0); + + var neighborhood = service.getNeighborhood("class-searchservice", 2); + @SuppressWarnings("unchecked") + var nodes = (List>) neighborhood.get("nodes"); + @SuppressWarnings("unchecked") + var edges = (List>) neighborhood.get("edges"); + + // Should find all 3 nodes with depth 2 + assertTrue(nodes.size() >= 3, "Expected at least 3 nodes, got " + nodes.size()); + assertTrue(edges.size() >= 2, "Expected at least 2 edges, got " + edges.size()); + } + + @Test + @Order(9) + void getPath() throws Exception { + var path = service.getPath("class-searchservice", "class-searchconfig"); + assertTrue((Boolean) path.get("found")); + @SuppressWarnings("unchecked") + var pathNodes = (List) path.get("path"); + assertEquals("class-searchservice", pathNodes.get(0)); + assertEquals("class-searchconfig", pathNodes.get(pathNodes.size() - 1)); + } + + @Test + @Order(10) + void getStats() throws Exception { + var stats = service.getStats(); + assertTrue(((Number) stats.get("entity_count")).longValue() >= 3); + assertTrue(((Number) stats.get("relationship_count")).longValue() >= 2); + assertNotNull(stats.get("top_types")); + } + + @Test + @Order(11) + void mergeEntities() throws Exception { + // Create entities to merge + service.upsertEntity("SearchSvc", "class", "Alias for SearchService", "kg-art-1", null); + var mergeResult = service.mergeEntities("class-searchsvc", "class-searchservice", + "Unified search service handling all search operations"); + + assertEquals("class-searchservice", mergeResult.get("merged_into")); + assertEquals("class-searchsvc", mergeResult.get("source_deleted")); + + // Source should be gone + assertNull(service.getEntity("class-searchsvc")); + + // Target should have combined mention count + var merged = service.getEntity("class-searchservice"); + assertNotNull(merged); + assertTrue(((Number) merged.get("mention_count")).intValue() >= 3); + } + + @Test + @Order(12) + void deleteEntitiesForArtifact() throws Exception { + // Create entity sourced only from kg-art-2 + service.upsertEntity("OnlyFromArt2", "class", + "Entity only from art-2", "kg-art-2", null); + + var result = service.deleteEntitiesForArtifact("kg-art-2"); + assertTrue(((Number) result.get("deleted_entities")).intValue() >= 1); + + // Entity sourced only from kg-art-2 should be gone + assertNull(service.getEntity("class-onlyfromart2")); + } + + @Test + @Order(13) + void deleteEntitiesSharedAcrossArtifacts() throws Exception { + // Create entity sourced from both artifacts + service.upsertEntity("SharedEntity", "class", + "Shared between artifacts", "kg-art-1", null); + service.upsertEntity("SharedEntity", "class", + "Shared between artifacts", "kg-art-2", null); + + // Verify mention_count is 2 + var entity = service.getEntity("class-sharedentity"); + assertNotNull(entity); + assertEquals(2, ((Number) entity.get("mention_count")).intValue()); + + // Delete entities for kg-art-1 + service.deleteEntitiesForArtifact("kg-art-1"); + + // Should survive with decremented count + var surviving = service.getEntity("class-sharedentity"); + assertNotNull(surviving, "Entity shared across artifacts should survive"); + assertEquals(1, ((Number) surviving.get("mention_count")).intValue()); + String sources = (String) surviving.get("source_artifact_ids"); + assertFalse(sources.contains("kg-art-1")); + assertTrue(sources.contains("kg-art-2")); + } + + // ── Chapter 5: Duplicate detection tests ────────────────────────────────── + + @Test + @Order(14) + void findDuplicateCandidatesExactNameDifferentCase() throws Exception { + service.upsertEntity("DupService", "class", "A duplicate detection service", "kg-art-1", null); + service.upsertEntity("dupservice", "service", "A duplicate detection service lowercase", "kg-art-1", null); + + var candidates = service.findDuplicateCandidates(); + boolean found = candidates.stream().anyMatch(c -> + ("DupService".equals(c.get("source_name")) && "dupservice".equals(c.get("target_name"))) + || ("dupservice".equals(c.get("source_name")) && "DupService".equals(c.get("target_name")))); + assertTrue(found, "Should detect case-insensitive name match as duplicate candidate"); + + var match = candidates.stream().filter(c -> + "DupService".equalsIgnoreCase((String) c.get("source_name")) + && "dupservice".equalsIgnoreCase((String) c.get("target_name")) + || "dupservice".equalsIgnoreCase((String) c.get("source_name")) + && "DupService".equalsIgnoreCase((String) c.get("target_name"))).findFirst(); + assertTrue(match.isPresent()); + assertEquals(1.0, (double) match.get().get("confidence"), 0.001); + } + + @Test + @Order(15) + void findDuplicateCandidatesSimilarNames() throws Exception { + service.upsertEntity("UserService", "service", "Handles user operations", "kg-art-1", null); + service.upsertEntity("UserServce", "service", "Handles user operations (typo)", "kg-art-1", null); + + var candidates = service.findDuplicateCandidates(); + boolean found = candidates.stream().anyMatch(c -> { + String src = (String) c.get("source_name"); + String tgt = (String) c.get("target_name"); + return (src.equals("UserService") && tgt.equals("UserServce")) + || (src.equals("UserServce") && tgt.equals("UserService")); + }); + assertTrue(found, "Should detect Levenshtein-similar names as duplicate candidates"); + } + + @Test + @Order(16) + void findMergeCandidatesByEmbedding() throws Exception { + service.upsertEntity("PaymentProcessor", "service", + "Processes credit card payments and handles transaction validation", "kg-art-1", null); + service.upsertEntity("PaymentHandler", "service", + "Processes credit card payments and handles transaction validation and refunds", "kg-art-1", null); + + var candidates = service.findMergeCandidates("service-paymentprocessor"); + boolean found = candidates.stream().anyMatch(c -> + "service-paymenthandler".equals(c.get("entity_id"))); + assertTrue(found, "Should find PaymentHandler as a merge candidate for PaymentProcessor"); + } + + @Test + @Order(17) + void findNoDuplicatesWhenNoneExist() throws Exception { + service.upsertEntity("AlphaModule", "module", "First distinct module", "kg-art-1", null); + service.upsertEntity("ZetaWidget", "module", "Completely different widget", "kg-art-1", null); + + var candidates = service.findDuplicateCandidates(); + boolean falseMatch = candidates.stream().anyMatch(c -> { + String src = (String) c.get("source_name"); + String tgt = (String) c.get("target_name"); + return (src.equals("AlphaModule") && tgt.equals("ZetaWidget")) + || (src.equals("ZetaWidget") && tgt.equals("AlphaModule")); + }); + assertFalse(falseMatch, "Should not find false duplicate matches between distinct entities"); + } + + @Test + @Order(18) + void levenshteinDistanceBasicCases() { + assertEquals(0, KnowledgeGraphService.levenshteinDistance("abc", "abc")); + assertEquals(1, KnowledgeGraphService.levenshteinDistance("abc", "ab")); + assertEquals(1, KnowledgeGraphService.levenshteinDistance("abc", "abx")); + assertEquals(3, KnowledgeGraphService.levenshteinDistance("abc", "xyz")); + } + + @Test + @Order(19) + void cosineSimilarityBasicCases() { + double[] a = {1, 0, 0}; + double[] b = {1, 0, 0}; + assertEquals(1.0, KnowledgeGraphService.cosineSimilarity(a, b), 0.001); + double[] c = {1, 0, 0}; + double[] d = {0, 1, 0}; + assertEquals(0.0, KnowledgeGraphService.cosineSimilarity(c, d), 0.001); + } +} diff --git a/src/test/java/com/javaducker/server/service/SemanticTagServiceTest.java b/src/test/java/com/javaducker/server/service/SemanticTagServiceTest.java new file mode 100644 index 0000000..9ebe409 --- /dev/null +++ b/src/test/java/com/javaducker/server/service/SemanticTagServiceTest.java @@ -0,0 +1,188 @@ +package com.javaducker.server.service; + +import com.javaducker.server.config.AppConfig; +import com.javaducker.server.db.DuckDBDataSource; +import com.javaducker.server.db.SchemaBootstrap; +import com.javaducker.server.ingestion.*; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.sql.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +class SemanticTagServiceTest { + + @TempDir + static Path tempDir; + + static DuckDBDataSource dataSource; + static SemanticTagService service; + + @BeforeAll + static void setup() throws Exception { + AppConfig config = new AppConfig(); + config.setDbPath(tempDir.resolve("test-st.duckdb").toString()); + config.setIntakeDir(tempDir.resolve("intake").toString()); + dataSource = new DuckDBDataSource(config); + ArtifactService artifactService = new ArtifactService(dataSource); + SearchService searchService = new SearchService(dataSource, new EmbeddingService(config), config); + IngestionWorker worker = new IngestionWorker(dataSource, artifactService, + new TextExtractor(), new TextNormalizer(), new Chunker(), + new EmbeddingService(config), new FileSummarizer(), new ImportParser(), + new ReladomoXmlParser(), new ReladomoService(dataSource), + new ReladomoFinderParser(), new ReladomoConfigParser(), + searchService, config); + SchemaBootstrap bootstrap = new SchemaBootstrap(dataSource, config, worker); + bootstrap.createSchema(); + service = new SemanticTagService(dataSource); + + // Seed test artifacts + Connection conn = dataSource.getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at) + VALUES ('tag-art-1', 'UserService.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at) + VALUES ('tag-art-2', 'AuthController.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + stmt.execute(""" + INSERT INTO artifacts (artifact_id, file_name, status, created_at, updated_at) + VALUES ('tag-art-3', 'PaymentService.java', 'INDEXED', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + """); + } + } + + @AfterAll + static void teardown() throws Exception { + dataSource.close(); + } + + @Test + @Order(1) + void writeTagsForArtifact() throws Exception { + List> tags = List.of( + Map.of("tag", "user-management", "category", "functional", "confidence", 0.95), + Map.of("tag", "spring-service", "category", "architectural", "confidence", 0.9), + Map.of("tag", "crud-operations", "category", "pattern", "confidence", 0.85), + Map.of("tag", "authentication", "category", "domain", "confidence", 0.8), + Map.of("tag", "data-access", "category", "concern", "confidence", 0.75)); + var result = service.writeTags("tag-art-1", tags); + assertEquals("tag-art-1", result.get("artifact_id")); + assertEquals(5, result.get("tags_count")); + } + + @Test + @Order(2) + void rejectTooFewTags() { + List> tags = List.of( + Map.of("tag", "a", "category", "functional"), + Map.of("tag", "b", "category", "domain")); + assertThrows(IllegalArgumentException.class, () -> service.writeTags("tag-art-1", tags)); + } + + @Test + @Order(3) + void rejectTooManyTags() { + List> tags = new ArrayList<>(); + for (int i = 0; i < 12; i++) { + tags.add(Map.of("tag", "tag-" + i, "category", "functional")); + } + assertThrows(IllegalArgumentException.class, () -> service.writeTags("tag-art-1", tags)); + } + + @Test + @Order(4) + void findByTag() throws Exception { + var results = service.findByTag("user-management"); + assertEquals(1, results.size()); + assertEquals("tag-art-1", results.get(0).get("artifact_id")); + assertEquals("UserService.java", results.get(0).get("file_name")); + } + + @Test + @Order(5) + void findByCategory() throws Exception { + var results = service.findByCategory("architectural"); + assertFalse(results.isEmpty()); + assertTrue(results.stream().anyMatch(r -> "spring-service".equals(r.get("tag")))); + } + + @Test + @Order(6) + void searchByTagsAny() throws Exception { + // Write tags for art-2 to have some overlap + service.writeTags("tag-art-2", List.of( + Map.of("tag", "authentication", "category", "domain", "confidence", 0.9), + Map.of("tag", "spring-controller", "category", "architectural", "confidence", 0.85), + Map.of("tag", "rest-api", "category", "pattern", "confidence", 0.8), + Map.of("tag", "security", "category", "concern", "confidence", 0.75))); + + var results = service.searchByTags(List.of("authentication", "crud-operations"), false); + assertTrue(results.size() >= 2, "Both art-1 and art-2 have 'authentication'"); + } + + @Test + @Order(7) + void searchByTagsAll() throws Exception { + var results = service.searchByTags(List.of("authentication", "crud-operations"), true); + assertEquals(1, results.size(), "Only art-1 has both tags"); + assertEquals("tag-art-1", results.get(0).get("artifact_id")); + } + + @Test + @Order(8) + void tagCloud() throws Exception { + var cloud = service.getTagCloud(); + assertNotNull(cloud.get("categories")); + assertTrue((int) cloud.get("total_tags") >= 5); + @SuppressWarnings("unchecked") + Map>> categories = + (Map>>) cloud.get("categories"); + assertTrue(categories.containsKey("domain")); + assertTrue(categories.containsKey("architectural")); + } + + @Test + @Order(9) + void suggestTags() throws Exception { + // art-1 and art-2 share "authentication" + // Suggestions for art-1 should include art-2's unique tags + var suggestions = service.suggestTags("tag-art-1"); + assertFalse(suggestions.isEmpty()); + var suggestedTagNames = suggestions.stream() + .map(s -> (String) s.get("tag")).toList(); + // art-2 has "spring-controller", "rest-api", "security" that art-1 doesn't + assertTrue(suggestedTagNames.contains("spring-controller") + || suggestedTagNames.contains("rest-api") + || suggestedTagNames.contains("security")); + } + + @Test + @Order(10) + void overwriteTagsOnRewrite() throws Exception { + // Write new set of tags for art-1 + List> newTags = List.of( + Map.of("tag", "new-tag-1", "category", "functional"), + Map.of("tag", "new-tag-2", "category", "domain"), + Map.of("tag", "new-tag-3", "category", "architectural"), + Map.of("tag", "new-tag-4", "category", "pattern")); + service.writeTags("tag-art-1", newTags); + + // Old tags should be gone + var oldResults = service.findByTag("user-management"); + assertTrue(oldResults.isEmpty(), "Old tag 'user-management' should be gone"); + + // New tags should be present + var newResults = service.findByTag("new-tag-1"); + assertEquals(1, newResults.size()); + assertEquals("tag-art-1", newResults.get(0).get("artifact_id")); + } +} diff --git a/start-here.md b/start-here.md index 487f7e6..9e3e68a 100644 --- a/start-here.md +++ b/start-here.md @@ -47,6 +47,8 @@ bash scripts/local/run-server.sh **MCP stdio mode** (for Claude Code integration): ```bash bash scripts/local/run-mcp.sh +# or directly: +java -jar target/javaducker-1.0.0.jar --spring.profiles.active=mcp ``` **CLI client**: diff --git a/workflows/bug-fix.md b/workflows/bug-fix.md index 2dd7031..93ad31d 100644 --- a/workflows/bug-fix.md +++ b/workflows/bug-fix.md @@ -4,6 +4,7 @@ Spawn parallel agents in ONE message: - **Agent A**: Reproduce the bug — find failing test case or steps to trigger - **Agent B**: Search codebase — grep for related patterns, read recent git log for the area +- **Agent C** (if JavaDucker available): `javaducker_search` for error messages and related code paths, `javaducker_explain` on suspect files Wait for both. Combine findings. diff --git a/workflows/closed-loop.md b/workflows/closed-loop.md index f7c48ff..37db8db 100644 --- a/workflows/closed-loop.md +++ b/workflows/closed-loop.md @@ -68,6 +68,10 @@ Append to `context/MEMORY.md`: - Copy confirmed outputs to final location - Write final summary to `context/MEMORY.md` +### 9. JavaDucker hygiene (if available) +- Run `workflows/javaducker-hygiene.md` Phase 2 (enrich new artifacts) and Phase 3 (compact superseded artifacts from prior iterations) +- Record loop decisions with `javaducker_extract_decisions` + ## Anti-patterns to avoid - **Don't fix everything at once** — group by category, fix in parallel batches diff --git a/workflows/code-review.md b/workflows/code-review.md index 774b60c..9d2f1d5 100644 --- a/workflows/code-review.md +++ b/workflows/code-review.md @@ -1,7 +1,8 @@ # Code Review Workflow 1. **Read the diff holistically** — Understand the full change before commenting on details. -2. **Check each dimension:** +2. **If JavaDucker is available** — use `javaducker_dependents` on changed files to check for downstream impact that might need updating. +3. **Check each dimension:** - Correctness — Does it do what it's supposed to? - Security — Any injection, auth, or data exposure risks? - Performance — Any unnecessary loops, queries, or allocations? diff --git a/workflows/javaducker-hygiene.md b/workflows/javaducker-hygiene.md new file mode 100644 index 0000000..62e4a55 --- /dev/null +++ b/workflows/javaducker-hygiene.md @@ -0,0 +1,134 @@ +# JavaDucker Hygiene Workflow + +Claude is the brain. JavaDucker is the memory. This workflow is how Claude curates that memory — deciding what's current, what's stale, what supersedes what, and what threads matter. + +## When to run + +- Session-end hook detects un-enriched artifacts → run Phase 2 +- `javaducker_index_health` reports "degraded" → run Phase 1 +- After major architectural changes → run Phase 3 +- Periodically (weekly or after big feature work) → run all phases + +## Phase 1: Freshness — Keep the Index Current + +``` +1. javaducker_index_health → read the recommendation +2. If degraded: + - javaducker_stale with git_diff_ref: "HEAD" → list stale files + - Re-index each with javaducker_index_file (parallel) + - javaducker_wait_for_indexed on each +``` + +This phase is mechanical — no judgment needed. + +## Phase 2: Enrichment — Claude Reads and Classifies + +This is where Claude's judgment matters. JavaDucker doesn't know what a file *means* — Claude does. + +For each un-enriched artifact from `javaducker_enrich_queue`: + +1. **Read the content** — `javaducker_get_file_text` to understand what the file is about +2. **Classify it** — `javaducker_classify` with the doc_type Claude determines: + - `CODE` — source code (most files) + - `ADR` — architecture decision record + - `DESIGN_DOC` — design document or RFC + - `PLAN` — execution plan + - `MEETING_NOTES` — meeting notes or standup summaries + - `THREAD` — conversation thread or session transcript + - `REFERENCE` — reference material, guides, standards + - `TICKET` — issue or ticket description + - `SCRATCH` — throwaway notes, experiments +3. **Extract the important threads** — `javaducker_extract_points`. Claude reads the content and identifies: + - `DECISION` — choices made and why (the most important type — these form the decision history) + - `RISK` — identified risks, concerns, potential issues + - `ACTION` — things that need to be done + - `INSIGHT` — non-obvious learnings, "we discovered that..." + - `CONSTRAINT` — hard constraints that limit future choices + - `QUESTION` — open questions that haven't been resolved + - `STATUS` — current state of something +4. **Tag it** — `javaducker_tag` with semantic tags Claude chooses based on content (domain area, technology, component, feature) +5. **Mark done** — `javaducker_mark_enriched` + +**Key principle**: Claude must *read and understand* the content to enrich it properly. Don't guess from filenames. A file called `notes.md` might contain critical architecture decisions. A file called `ArchitectureDecision.md` might be an abandoned draft. + +## Phase 3: Compaction — Claude Decides What's Obsolete + +This is the most judgment-heavy phase. JavaDucker doesn't know which decisions invalidate others. Claude does. + +### Step 1: Identify candidates + +``` +javaducker_stale_content → list stale/superseded artifacts +javaducker_find_points with DECISION type → all recorded decisions +javaducker_concept_health → concepts with fading/cold status +``` + +### Step 2: For each stale artifact, Claude decides + +Read the artifact's summary, tags, and points. Then ask: + +- **Is there a newer version?** `javaducker_latest` on the topic. If yes → supersede. +- **Does a recent decision invalidate this?** Compare against `javaducker_recent_decisions`. If a newer decision contradicts this artifact's decisions → supersede and note why. +- **Is it just old but still valid?** Some things don't change (core design principles, external API contracts). If still accurate → `javaducker_set_freshness` → `current`. Don't prune valid content just because it's old. +- **Is it partially valid?** Some points are still relevant, others are stale. Extract the still-valid points into the synthesis summary. + +### Step 3: Synthesize what's truly obsolete + +For artifacts Claude has decided to supersede: + +``` +1. javaducker_set_freshness → "superseded", superseded_by: +2. javaducker_synthesize: + - summary_text: what this artifact contained and why it's superseded + - key_points: any points that are STILL relevant (carry forward) + - outcome: what replaced it and why + - tags: preserve existing tags +3. javaducker_link_concepts → connect concepts from old → new artifact +``` + +**What gets pruned**: full text and embeddings (heavy, noisy in search results) +**What stays**: summary, tags, key points, concept links (lightweight, searchable as reference) + +### Step 4: Decision chain maintenance + +The most important thread to maintain is the **decision chain** — the sequence of decisions that led to the current state. + +``` +1. javaducker_find_points with DECISION type → all decisions +2. Group by domain/topic +3. For each topic, read the decisions chronologically +4. Identify: which decisions are still active? which were superseded? +5. If a decision was superseded but not marked: + - javaducker_set_freshness → "superseded" on the old artifact + - javaducker_synthesize with outcome noting what replaced it +6. If a decision chain has gaps (decision A → ??? → decision C): + - Extract the implicit decision B from context and record it +``` + +This ensures that `javaducker_recent_decisions` always returns the *current* decisions, not a mix of old and new. + +## Phase 4: Concept Health — Identify Important vs. Dead Threads + +``` +1. javaducker_concept_health → list all concepts with trend (active/fading/cold) +2. For each fading/cold concept, Claude decides: + - Was it a real concept (e.g., "authentication", "payment processing")? + → Check if it's still in the codebase (grep/search) + → If still relevant: the concept needs fresh content, flag for investigation + → If truly gone: no action, the synthesis records serve as history + - Was it noise (e.g., "temporary", "workaround")? + → No action needed +3. javaducker_concepts → verify the concept map reflects reality +``` + +## What stays, what goes + +| State | Full text | Embeddings | Summary | Tags | Points | Searchable? | +|-------|-----------|------------|---------|------|--------|------------| +| **INDEXED** | yes | yes | yes | no | no | by text + semantic | +| **ENRICHED** | yes | yes | yes | yes | yes | full: text + semantic + tags + points + type | +| **SUPERSEDED** | **pruned** | **pruned** | yes | yes | yes | reference: summary + tags + points only | + +## The judgment principle + +JavaDucker stores. Claude curates. Every `set_freshness`, `synthesize`, `extract_points`, and `classify` call is a **judgment call** that Claude makes after reading and understanding the content. Never run enrichment or compaction mechanically — always read first, decide second, write third. diff --git a/workflows/new-feature.md b/workflows/new-feature.md index 4609b94..86a44c9 100644 --- a/workflows/new-feature.md +++ b/workflows/new-feature.md @@ -4,6 +4,7 @@ Spawn parallel agents in ONE message: - **Agent A**: Read requirements + check `context/MEMORY.md` and `context/CONVENTIONS.md` - **Agent B**: Explore existing code — find related files, patterns, interfaces to extend +- **Agent C** (if JavaDucker available): `javaducker_search` for similar features and patterns, `javaducker_map` for project orientation Wait for both. Combine into implementation plan. diff --git a/workflows/refactor.md b/workflows/refactor.md index 8c92e29..2b9cf9b 100644 --- a/workflows/refactor.md +++ b/workflows/refactor.md @@ -4,6 +4,7 @@ Spawn parallel agents in ONE message: - **Agent A**: Identify refactoring targets — duplication, complexity, unclear naming, tight coupling - **Agent B**: Run existing tests to establish passing baseline, note coverage gaps +- **Agent C** (if JavaDucker available): `javaducker_dependents` on target files to discover all callers and importers before restructuring Wait for both.