vitali87 · PatD42 · Dec 6, 2025 · Dec 6, 2025 · Dec 7, 2025 · Dec 21, 2025
diff --git a/codebase_rag/graph_updater.py b/codebase_rag/graph_updater.py
@@ -268,7 +268,7 @@ def __init__(
         self.repo_path = repo_path
         self.parsers = parsers
         self.queries = self._prepare_queries_with_parsers(queries, parsers)
-        self.project_name = repo_path.name
+        self.project_name = repo_path.resolve().name
         self.function_registry = FunctionRegistryTrie()
         self.simple_name_lookup: dict[str, set[str]] = defaultdict(set)
         self.ast_cache = BoundedASTCache(max_entries=1000, max_memory_mb=500)

diff --git a/codebase_rag/mcp/tools.py b/codebase_rag/mcp/tools.py
@@ -81,10 +81,58 @@ def __init__(
 
         # Build tool registry - single source of truth for all tool metadata
         self._tools: dict[str, ToolMetadata] = {
+            "list_projects": ToolMetadata(
+                name="list_projects",
+                description="List all indexed projects in the knowledge graph database. "
+                "Returns a list of project names that have been indexed.",
+                input_schema={
+                    "type": "object",
+                    "properties": {},
+                    "required": [],
+                },
+                handler=self.list_projects,
+                returns_json=True,
+            ),
+            "delete_project": ToolMetadata(
+                name="delete_project",
+                description="Delete a specific project from the knowledge graph database. "
+                "This removes all nodes associated with the project while preserving other projects. "
+                "Use list_projects first to see available projects.",
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "project_name": {
+                            "type": "string",
+                            "description": "Name of the project to delete (e.g., 'my-project')",
+                        }
+                    },
+                    "required": ["project_name"],
+                },
+                handler=self.delete_project,
+                returns_json=True,
+            ),
+            "wipe_database": ToolMetadata(
+                name="wipe_database",
+                description="WARNING: Completely wipe the entire database, removing ALL indexed projects. "
+                "This cannot be undone. Use delete_project for removing individual projects.",
+                input_schema={
+                    "type": "object",
+                    "properties": {
+                        "confirm": {
+                            "type": "boolean",
+                            "description": "Must be true to confirm the wipe operation",
+                        }
+                    },
+                    "required": ["confirm"],
+                },
+                handler=self.wipe_database,
+                returns_json=False,
+            ),
             "index_repository": ToolMetadata(
                 name="index_repository",
                 description="Parse and ingest the repository into the Memgraph knowledge graph. "
-                "This builds a comprehensive graph of functions, classes, dependencies, and relationships.",
+                "This builds a comprehensive graph of functions, classes, dependencies, and relationships. "
+                "Note: This now preserves other projects - only the current project is re-indexed.",
                 input_schema={
                     "type": "object",
                     "properties": {},
@@ -216,26 +264,91 @@ def __init__(
             ),
         }
 
+    async def list_projects(self) -> dict[str, Any]:
+        """List all indexed projects in the knowledge graph database.
+
+        Returns:
+            Dictionary with list of project names
+        """
+        logger.info("[MCP] Listing all projects...")
+        try:
+            projects = self.ingestor.list_projects()
+            return {
+                "projects": projects,
+                "count": len(projects),
+            }
+        except Exception as e:
+            logger.error(f"[MCP] Error listing projects: {e}")
+            return {"error": str(e), "projects": [], "count": 0}
+
+    async def delete_project(self, project_name: str) -> dict[str, Any]:
+        """Delete a specific project from the knowledge graph database.
+
+        Args:
+            project_name: Name of the project to delete
+
+        Returns:
+            Dictionary with deletion status
+        """
+        logger.info(f"[MCP] Deleting project: {project_name}")
+        try:
+            # Verify project exists
+            projects = self.ingestor.list_projects()
+            if project_name not in projects:
+                return {
+                    "success": False,
+                    "error": f"Project '{project_name}' not found. Available projects: {projects}",
+                }
+
+            self.ingestor.delete_project(project_name)
+            return {
+                "success": True,
+                "project": project_name,
+                "message": f"Successfully deleted project '{project_name}'.",
+            }
+        except Exception as e:
+            logger.error(f"[MCP] Error deleting project: {e}")
+            return {"success": False, "error": str(e)}
+
+    async def wipe_database(self, confirm: bool) -> str:
+        """Completely wipe the entire database.
+
+        Args:
+            confirm: Must be True to proceed with the wipe
+
+        Returns:
+            Status message
+        """
+        if not confirm:
+            return "Database wipe cancelled. Set confirm=true to proceed."
+
+        logger.warning("[MCP] Wiping entire database!")
+        try:
+            self.ingestor.clean_database()
+            return "Database completely wiped. All projects have been removed."
+        except Exception as e:
+            logger.error(f"[MCP] Error wiping database: {e}")
+            return f"Error wiping database: {str(e)}"
+
     async def index_repository(self) -> str:
         """Parse and ingest the repository into the Memgraph knowledge graph.
 
         This tool analyzes the codebase using Tree-sitter parsers and builds
         a comprehensive knowledge graph with functions, classes, dependencies,
         and relationships.
 
-        Note: This clears all existing data in the database before indexing.
-        Only one repository can be indexed at a time.
+        Note: This now only clears data for the current project, preserving other projects.
 
         Returns:
             Success message with indexing statistics
         """
         logger.info(f"[MCP] Indexing repository at: {self.project_root}")
+        project_name = Path(self.project_root).resolve().name
 
         try:
-            # Clear existing data to ensure clean state for the new repository
-            logger.info("[MCP] Clearing existing database to avoid conflicts...")
-            self.ingestor.clean_database()
-            logger.info("[MCP] Database cleared. Starting fresh indexing...")
+            # Delete only the current project's data (preserves other projects)
+            logger.info(f"[MCP] Clearing existing data for project '{project_name}'...")
+            self.ingestor.delete_project(project_name)
 
             updater = GraphUpdater(
                 ingestor=self.ingestor,
@@ -245,7 +358,7 @@ async def index_repository(self) -> str:
             )
             updater.run()
 
-            return f"Successfully indexed repository at {self.project_root}. Knowledge graph has been updated (previous data cleared)."
+            return f"Successfully indexed repository at {self.project_root}. Project '{project_name}' has been updated."
         except Exception as e:
             logger.error(f"[MCP] Error indexing repository: {e}")
             return f"Error indexing repository: {str(e)}"

diff --git a/codebase_rag/prompts.py b/codebase_rag/prompts.py
@@ -125,28 +125,43 @@
 
 {GRAPH_SCHEMA_AND_RULES}
 
-**3. Query Patterns & Examples**
-Your goal is to return the `name`, `path`, and `qualified_name` of the found nodes.
+**3. Query Optimization Rules**
+
+- **LIMIT Results**: ALWAYS add `LIMIT 50` to queries that list items. This prevents overwhelming responses.
+- **Aggregation Queries**: When asked "how many", "count", or "total", return ONLY the count, not all items:
+  - CORRECT: `MATCH (c:Class) RETURN count(c) AS total`
+  - WRONG: `MATCH (c:Class) RETURN c.name, c.path, count(c) AS total` (returns all items!)
+- **List vs Count**: If asked to "list" or "show", return items with LIMIT. If asked to "count" or "how many", return only the count.
+
+**4. Query Patterns & Examples**
+When listing items, return the `name`, `path`, and `qualified_name` with a LIMIT.
+
+**Pattern: Counting Items**
+cypher// "How many classes are there?" or "Count all functions"
+MATCH (c:Class) RETURN count(c) AS total
 
 **Pattern: Finding Decorated Functions/Methods (e.g., Workflows, Tasks)**
 cypher// "Find all prefect flows" or "what are the workflows?" or "show me the tasks"
 // Use the 'IN' operator to check the 'decorators' list property.
 MATCH (n:Function|Method)
 WHERE ANY(d IN n.decorators WHERE toLower(d) IN ['flow', 'task'])
 RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type
+LIMIT 50
 
 **Pattern: Finding Content by Path (Robustly)**
 cypher// "what is in the 'workflows/src' directory?" or "list files in workflows"
 // Use `STARTS WITH` for path matching.
 MATCH (n)
 WHERE n.path IS NOT NULL AND n.path STARTS WITH 'workflows'
 RETURN n.name AS name, n.path AS path, labels(n) AS type
+LIMIT 50
 
 **Pattern: Keyword & Concept Search (Fallback for general terms)**
 cypher// "find things related to 'database'"
 MATCH (n)
 WHERE toLower(n.name) CONTAINS 'database' OR (n.qualified_name IS NOT NULL AND toLower(n.qualified_name) CONTAINS 'database')
 RETURN n.name AS name, n.qualified_name AS qualified_name, labels(n) AS type
+LIMIT 50
 
 **Pattern: Finding a Specific File**
 cypher// "Find the main README.md"
@@ -173,31 +188,41 @@
     - For code nodes (`Class`, `Function`, etc.), return `n.qualified_name AS qualified_name`.
 4.  **KEEP IT SIMPLE**: Do not try to be clever. A simple query that returns a few relevant nodes is better than a complex one that fails.
 5.  **CLAUSE ORDER**: You MUST follow the standard Cypher clause order: `MATCH`, `WHERE`, `RETURN`, `LIMIT`.
+6.  **ALWAYS ADD LIMIT**: For queries that list items, ALWAYS add `LIMIT 50` to prevent overwhelming responses.
+7.  **AGGREGATION QUERIES**: When asked "how many" or "count", return ONLY the count:
+    - CORRECT: `MATCH (c:Class) RETURN count(c) AS total`
+    - WRONG: `MATCH (c:Class) RETURN c.name, count(c) AS total` (returns all items!)
 
 **Examples:**
 
+*   **Natural Language:** "How many classes are there?"
+*   **Cypher Query:**
+    ```cypher
+    MATCH (c:Class) RETURN count(c) AS total
+    ```
+
 *   **Natural Language:** "Find the main README file"
 *   **Cypher Query:**
     ```cypher
-    MATCH (f:File) WHERE toLower(f.name) CONTAINS 'readme' RETURN f.path AS path, f.name AS name, labels(f) AS type
+    MATCH (f:File) WHERE toLower(f.name) CONTAINS 'readme' RETURN f.path AS path, f.name AS name, labels(f) AS type LIMIT 50
     ```
 
 *   **Natural Language:** "Find all python files"
 *   **Cypher Query (Note the '.' in extension):**
     ```cypher
-    MATCH (f:File) WHERE f.extension = '.py' RETURN f.path AS path, f.name AS name, labels(f) AS type
+    MATCH (f:File) WHERE f.extension = '.py' RETURN f.path AS path, f.name AS name, labels(f) AS type LIMIT 50
     ```
 
 *   **Natural Language:** "show me the tasks"
 *   **Cypher Query:**
     ```cypher
-    MATCH (n:Function|Method) WHERE 'task' IN n.decorators RETURN n.qualified_name AS qualified_name, n.name AS name, labels(n) AS type
+    MATCH (n:Function|Method) WHERE 'task' IN n.decorators RETURN n.qualified_name AS qualified_name, n.name AS name, labels(n) AS type LIMIT 50
     ```
 
 *   **Natural Language:** "list files in the services folder"
 *   **Cypher Query:**
     ```cypher
-    MATCH (f:File) WHERE f.path STARTS WITH 'services' RETURN f.path AS path, f.name AS name, labels(f) AS type
+    MATCH (f:File) WHERE f.path STARTS WITH 'services' RETURN f.path AS path, f.name AS name, labels(f) AS type LIMIT 50
     ```
 
 *   **Natural Language:** "Find just one file to test"

diff --git a/codebase_rag/services/graph_service.py b/codebase_rag/services/graph_service.py
@@ -124,10 +124,44 @@ def _execute_batch_with_return(
                 cursor.close()
 
     def clean_database(self) -> None:
+        """Wipe the entire database. Use with caution."""
         logger.info("--- Cleaning database... ---")
         self._execute_query("MATCH (n) DETACH DELETE n;")
         logger.info("--- Database cleaned. ---")
 
+    def list_projects(self) -> list[str]:
+        """List all indexed projects in the database.
+
+        Returns:
+            List of project names
+        """
+        result = self.fetch_all("MATCH (p:Project) RETURN p.name AS name ORDER BY p.name")
+        return [r["name"] for r in result]
+
+    def delete_project(self, project_name: str) -> None:
+        """Delete all nodes associated with a specific project.
+
+        This is an atomic operation that removes the Project node and all nodes
+        whose qualified_name starts with the project name prefix, preserving
+        other projects.
+
+        Args:
+            project_name: Name of the project to delete
+        """
+        logger.info(f"--- Deleting project: {project_name} ---")
+
+        self._execute_query(
+            """
+            MATCH (n)
+            WHERE n.qualified_name STARTS WITH $prefix
+               OR (n:Project AND n.name = $project_name)
+            DETACH DELETE n
+            """,
+            {"prefix": f"{project_name}.", "project_name": project_name},
+        )
+
+        logger.info(f"--- Project {project_name} deleted. ---")
+
     def ensure_constraints(self) -> None:
         logger.info("Ensuring constraints...")
         for label, prop in self.unique_constraints.items():

diff --git a/codebase_rag/tests/test_mcp_query_and_index.py b/codebase_rag/tests/test_mcp_query_and_index.py
@@ -297,10 +297,10 @@ async def test_index_repository_multiple_times(
             # Should have been called twice
             assert mock_updater.run.call_count == 2
 
-    async def test_index_repository_clears_database_first(
+    async def test_index_repository_clears_project_data_first(
         self, mcp_registry: MCPToolsRegistry, temp_project_root: Path
     ) -> None:
-        """Test that database is cleared before indexing."""
+        """Test that project data is cleared before indexing."""
         with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class:
             mock_updater = MagicMock()
             mock_updater.run.return_value = None
@@ -309,25 +309,24 @@ async def test_index_repository_clears_database_first(
             # Index repository
             result = await mcp_registry.index_repository()
 
-            # Verify clean_database was called
-            mcp_registry.ingestor.clean_database.assert_called_once()  # type: ignore[attr-defined]
+            # Verify delete_project was called with correct project name
+            project_name = temp_project_root.name
+            mcp_registry.ingestor.delete_project.assert_called_once_with(project_name)  # type: ignore[attr-defined]
             assert "Error:" not in result
-            # Verify message indicates data was cleared
-            assert "cleared" in result.lower() or "previous data" in result.lower()
 
-    async def test_index_repository_clears_before_updater_runs(
+    async def test_index_repository_deletes_project_before_updater_runs(
         self, mcp_registry: MCPToolsRegistry, temp_project_root: Path
     ) -> None:
-        """Test that database clearing happens before GraphUpdater runs."""
+        """Test that project deletion happens before GraphUpdater runs."""
         call_order: list[str] = []
 
-        def mock_clean() -> None:
-            call_order.append("clean")
+        def mock_delete(project_name: str) -> None:
+            call_order.append("delete")
 
         def mock_run() -> None:
             call_order.append("run")
 
-        mcp_registry.ingestor.clean_database = MagicMock(side_effect=mock_clean)  # type: ignore[method-assign]
+        mcp_registry.ingestor.delete_project = MagicMock(side_effect=mock_delete)  # type: ignore[method-assign]
 
         with patch("codebase_rag.mcp.tools.GraphUpdater") as mock_updater_class:
             mock_updater = MagicMock()
@@ -336,13 +335,13 @@ def mock_run() -> None:
 
             await mcp_registry.index_repository()
 
-            # Verify clean was called before run
-            assert call_order == ["clean", "run"]
+            # Verify delete was called before run
+            assert call_order == ["delete", "run"]
 
-    async def test_sequential_index_clears_previous_repo_data(
+    async def test_sequential_index_only_clears_own_project_data(
         self, tmp_path: Path
     ) -> None:
-        """Test that indexing a second repository clears the first repository's data."""
+        """Test that indexing repositories only clears their own project data."""
         # Create two mock registries for different projects
         mock_ingestor = MagicMock()
         mock_cypher = MagicMock()
@@ -370,11 +369,14 @@ async def test_sequential_index_clears_previous_repo_data(
 
             # Index first repository
             await registry1.index_repository()
-            assert mock_ingestor.clean_database.call_count == 1
+            mock_ingestor.delete_project.assert_called_with("project1")
 
-            # Index second repository - should clear database again
+            # Index second repository - should only delete project2, not project1
             await registry2.index_repository()
-            assert mock_ingestor.clean_database.call_count == 2
+            mock_ingestor.delete_project.assert_called_with("project2")
+
+            # Total of 2 delete_project calls (one per project)
+            assert mock_ingestor.delete_project.call_count == 2
 
 
 class TestQueryAndIndexIntegration: