From c633720792348fa41d897e74d6098ba4ec3a4915 Mon Sep 17 00:00:00 2001 From: prpeh Date: Sat, 29 Mar 2025 00:39:22 +0700 Subject: [PATCH] feat: enhance domain knowledge storage with documentation support and improved metadata handling --- .env.example | 12 +- .eslintrc.js => .eslintrc.cjs | 0 README.md | 307 ++++++++++++++----------- index.ts | 58 ++--- jest.config.js => jest.config.cjs | 0 package-lock.json | 38 --- package.json | 9 +- run-cursor-mcp.sh | 8 + src/core/db-service.ts | 93 +++++--- src/core/knowledge-management-tools.ts | 184 +++++++++++++++ src/legacy-retrieval-server.ts | 113 +++++++++ src/scripts/store-documentation.ts | 65 ++++++ tsconfig.json | 2 +- 13 files changed, 636 insertions(+), 253 deletions(-) rename .eslintrc.js => .eslintrc.cjs (100%) rename jest.config.js => jest.config.cjs (100%) create mode 100755 run-cursor-mcp.sh create mode 100644 src/core/knowledge-management-tools.ts create mode 100644 src/legacy-retrieval-server.ts create mode 100644 src/scripts/store-documentation.ts diff --git a/.env.example b/.env.example index 035b67a..3e1bae1 100644 --- a/.env.example +++ b/.env.example @@ -3,16 +3,16 @@ DATABASE_TYPE=qdrant # Common settings -COLLECTION_NAME=documents +COLLECTION_NAME=your_collection_name HTTP_SERVER=false PORT=3000 # Qdrant settings -QDRANT_URL=http://localhost:6333 -QDRANT_API_KEY= +QDRANT_URL=https://your-qdrant-instance-url.example.com:6333 +QDRANT_API_KEY=your_qdrant_api_key + +# Gemini API for embeddings +GEMINI_API_KEY=your_gemini_api_key # Chroma settings CHROMA_URL=http://localhost:8000 - -# API key for embeddings -GEMINI_API_KEY=your_gemini_api_key_here diff --git a/.eslintrc.js b/.eslintrc.cjs similarity index 100% rename from .eslintrc.js rename to .eslintrc.cjs diff --git a/README.md b/README.md index 77faaa8..a94355f 100644 --- a/README.md +++ b/README.md @@ -1,207 +1,236 @@ -# Qdrant MCP Server for RAG +# Qdrant MCP Server -A Model Context Protocol (MCP) server implementation for RAG (Retrieval-Augmented Generation) using Qdrant vector database. +A server implementation that supports both Qdrant and Chroma vector databases for storing and retrieving domain knowledge. ## Features -* **Vector Search**: Perform semantic search over your vector embeddings stored in Qdrant. -* **Customizable Parameters**: Configure search parameters like limit and score threshold. -* **Ready for LLM Integration**: Seamlessly integrates with Claude Desktop and other MCP-compatible tools. - -## Tools - -* **retrieve_from_qdrant** - * Perform vector similarity search against a Qdrant collection. - * Inputs: - * `query` (string): The search query for retrieval. - * `limit` (number, optional): Number of results to retrieve (default: 3). - * `scoreThreshold` (number, optional): Minimum similarity score threshold (default: 0.7). +- Support for both Qdrant and Chroma vector databases +- Configurable database selection via environment variables +- Domain knowledge storage and retrieval +- Documentation file storage with metadata +- Support for PDF and TXT file formats ## Prerequisites -* Node.js v16+ -* Qdrant instance (local or cloud) -* Optional: OpenAI API key (for production embedding generation) +- Node.js 20.x or later (LTS recommended) +- npm 10.x or later +- Qdrant or Chroma vector database -## Setup - -1. Clone this repository -2. Install dependencies: - ```bash - npm install - ``` -3. Configure environment variables in `.env`: - ``` - QDRANT_URL=http://localhost:6333 - QDRANT_COLLECTION=documents - QDRANT_API_KEY=your_api_key_if_needed - PORT=3000 - ``` -4. Build the project: - ```bash - npm run build - ``` -5. Start the server: - ```bash - npm start - ``` - -## Testing - -Run the test suite with: +## Installation +1. Clone the repository: ```bash -npm test +git clone +cd qdrant-mcp-server ``` -Run tests in watch mode during development: - +2. Install dependencies: ```bash -npm run test:watch +npm install ``` -Run linting checks: - +3. Create a `.env` file in the root directory based on the `.env.example` template: ```bash -npm run lint +cp .env.example .env ``` -## Continuous Integration - -This project uses GitHub Actions for continuous integration: - -- **CI Workflow**: Runs linting, builds the project, and executes tests on Node.js 18.x and 20.x. -- **Docker Workflow**: Builds and pushes Docker images to GitHub Container Registry. +4. Update the `.env` file with your own settings: +```env +DATABASE_TYPE=qdrant +QDRANT_URL=https://your-qdrant-instance.example.com:6333 +QDRANT_API_KEY=your_api_key +COLLECTION_NAME=your_collection_name +GEMINI_API_KEY=your_gemini_api_key +``` -The CI workflows are automatically triggered on: -- Push to main/master branches -- Pull requests to main/master branches -- Tag creation (for Docker image releases) +5. Build the project: +```bash +npm run build +``` -## Docker Deployment +## AI IDE Integration -Build and run with Docker: +### Cursor AI IDE +Create the script `run-cursor-mcp.sh` in the project root: ```bash -# Build the image -docker build -t mcp/qdrant-server . +#!/bin/zsh +cd /path/to/your/project +source ~/.zshrc +nvm use --lts -# Run the container -docker run -p 3000:3000 \ - -e QDRANT_URL=http://your-qdrant-instance:6333 \ - -e QDRANT_COLLECTION=documents \ - -e QDRANT_API_KEY=your_api_key_if_needed \ - mcp/qdrant-server +# Let the app load environment variables from .env file +node dist/index.js ``` -## Usage with Claude Desktop - -Add this to your `claude_desktop_config.json`: +Make the script executable: +```bash +chmod +x run-cursor-mcp.sh +``` +Add this configuration to your `~/.cursor/mcp.json` or `.cursor/mcp.json` file: ```json { "mcpServers": { "qdrant-retrieval": { - "command": "docker", - "args": [ - "run", - "-i", - "--rm", - "-e", "QDRANT_URL", - "-e", "QDRANT_COLLECTION", - "-e", "QDRANT_API_KEY", - "mcp/qdrant-server" - ], - "env": { - "QDRANT_URL": "http://your-qdrant-instance:6333", - "QDRANT_COLLECTION": "documents", - "QDRANT_API_KEY": "your_api_key_if_needed" - } + "command": "/path/to/your/project/run-cursor-mcp.sh", + "args": [] } } } ``` -Alternatively, for NPM usage: - +### Claude Desktop +Add this configuration in Claude's settings: ```json { - "mcpServers": { - "qdrant-retrieval": { - "command": "npx", - "args": [ - "-y", - "qdrant-mcp-server" - ], - "env": { - "QDRANT_URL": "http://your-qdrant-instance:6333", - "QDRANT_COLLECTION": "documents", - "QDRANT_API_KEY": "your_api_key_if_needed" - } + "processes": { + "knowledge_server": { + "command": "/path/to/your/project/run-cursor-mcp.sh", + "args": [] } - } + }, + "tools": [ + { + "name": "store_knowledge", + "description": "Store domain-specific knowledge in a vector database", + "provider": "process", + "process": "knowledge_server" + }, + { + "name": "retrieve_knowledge_context", + "description": "Retrieve relevant domain knowledge from a vector database", + "provider": "process", + "process": "knowledge_server" + } + ] } ``` -## Customization +## Usage + +### Starting the Server + +```bash +npm start +``` + +For development with auto-reload: +```bash +npm run dev +``` + +### Storing Documentation -### Using a Different Embedding Model +The server includes a script to store documentation files (PDF and TXT) with metadata: -To use a different embedding model, modify the `generateEmbedding` function in `embedding-utils.ts`. Replace the mock implementation with your preferred embedding API call. +```bash +npm run store-doc +``` -### Extending the Search Response +Example: +```bash +# Store a PDF file +npm run store-doc docs/manual.pdf -To modify the search response format, update the type definitions in `qdrant-types.ts` and adjust the formatting in the `execute` function of the `retrieveFromQdrant` tool. +# Store a text file +npm run store-doc docs/readme.txt +``` -## Vector Database Support +The script will: +- Extract content from the file (text from PDF or plain text) +- Store the content with metadata including: + - Source: "documentation" + - File name and extension + - File size + - Last modified date + - Creation date + - Content type -This MCP server supports two vector databases: +### API Endpoints -1. **Qdrant** (default) - A high-performance vector database -2. **Chroma** - A simpler, lightweight vector database +#### Store Domain Knowledge -You can switch between these databases by setting the `DATABASE_TYPE` environment variable to either `qdrant` or `chroma`. +```http +POST /api/store +Content-Type: application/json -### Setting up Qdrant +{ + "content": "Your domain knowledge content here", + "source": "your-source", + "metadata": { + "key": "value" + } +} +``` -For Qdrant, you need to: +#### Query Domain Knowledge -1. Run a Qdrant server instance -2. Configure the connection in your .env file: - ``` - DATABASE_TYPE=qdrant - QDRANT_URL=http://localhost:6333 - QDRANT_API_KEY=your_api_key_if_needed - COLLECTION_NAME=your_collection_name - ``` +```http +POST /api/query +Content-Type: application/json -### Setting up Chroma +{ + "query": "Your search query here", + "limit": 5 +} +``` -For Chroma, you need to: +## Development -1. Run a Chroma server instance or use the in-memory version -2. Configure the connection in your .env file: - ``` - DATABASE_TYPE=chroma - CHROMA_URL=http://localhost:8000 - COLLECTION_NAME=your_collection_name - ``` +### Running Tests -### Running Chroma Server +```bash +npm test +``` -You can run a Chroma server using Docker: +### Building the Project ```bash -docker run -p 8000:8000 chromadb/chroma +npm run build ``` -Or install it directly: +### Linting ```bash -pip install chromadb -chromadb run --host 0.0.0.0 --port 8000 +npm run lint ``` +## Project Structure + +``` +src/ +├── core/ +│ ├── db-service.ts # Database service implementation +│ └── embedding-utils.ts # Embedding utilities +├── scripts/ +│ └── store-documentation.ts # Documentation storage script +└── index.ts # Main server file +``` + +## Using with Remote Qdrant + +When using with a remote Qdrant instance (like Qdrant Cloud): + +1. Ensure your `.env` has the correct URL with port number: +``` +QDRANT_URL=https://your-instance-id.region.gcp.cloud.qdrant.io:6333 +``` + +2. Set your API key: +``` +QDRANT_API_KEY=your_qdrant_api_key +``` + +## Troubleshooting + +If you encounter issues: + +1. Make sure you're using Node.js LTS version (`nvm use --lts`) +2. Verify your environment variables are correct +3. Check Qdrant/Chroma connectivity +4. Ensure the GEMINI_API_KEY is valid if using Gemini for embeddings + ## License -This MCP server is licensed under the MIT License. \ No newline at end of file +MIT \ No newline at end of file diff --git a/index.ts b/index.ts index 367124b..6931afa 100644 --- a/index.ts +++ b/index.ts @@ -5,36 +5,22 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { CallToolRequestSchema, ListToolsRequestSchema, - Tool, } from "@modelcontextprotocol/sdk/types.js"; import dotenv from 'dotenv'; import { DatabaseService } from './src/core/db-service'; +import { KnowledgeManagementTools } from './src/core/knowledge-management-tools'; // Load environment variables dotenv.config(); -// Database service initialization +// Service initialization const dbService = new DatabaseService(); - -// Define the retrieval tool -const RETRIEVAL_TOOL: Tool = { - name: 'retrieve_information', - description: 'Retrieve information from vector database based on semantic similarity', - inputSchema: { - type: "object", - properties: { - query: { type: "string", description: "The search query for retrieval" }, - limit: { type: "number", default: 3, description: "Number of results to retrieve" }, - scoreThreshold: { type: "number", default: 0.7, description: "Minimum similarity score threshold (0-1)" }, - }, - required: ["query"], - }, -}; +const knowledgeTools = new KnowledgeManagementTools(dbService); // Server setup const server = new Server( { - name: "vector-retrieval-server", + name: "knowledge-management-server", version: "1.0.0", }, { @@ -46,34 +32,29 @@ const server = new Server( // Request handlers server.setRequestHandler(ListToolsRequestSchema, async () => ({ - tools: [RETRIEVAL_TOOL], + tools: knowledgeTools.getTools(), })); server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; - if (name === 'retrieve_information') { - const { query, limit = 3, scoreThreshold = 0.7 } = args as Record; - - try { - // Search using the database service - const formattedResults = await dbService.search(query, limit, scoreThreshold); - - return { - content: [ - { type: "text", text: JSON.stringify({ results: formattedResults }) } - ], - }; - } catch (error) { - console.error('Error during vector search:', error); + try { + const tool = knowledgeTools.getTools().find(t => t.name === name); + if (!tool) { return { - content: [{ type: "text", text: `Failed to retrieve information: ${error}` }], + content: [{ type: "text", text: `Unknown tool: ${name}` }], isError: true, }; } - } else { + + const result = await tool.handler(args); + return { + content: [{ type: "text", text: JSON.stringify(result) }] + }; + } catch (error) { + console.error(`Error during tool execution (${name}):`, error); return { - content: [{ type: "text", text: `Unknown tool: ${name}` }], + content: [{ type: "text", text: `Failed to execute tool: ${error}` }], isError: true, }; } @@ -87,13 +68,12 @@ async function runServer() { const transport = new StdioServerTransport(); await server.connect(transport); - console.error("Vector Retrieval Server running on stdio"); + console.error("Knowledge Management Server running on stdio"); // Optional HTTP server if (process.env.HTTP_SERVER === "true") { const port = parseInt(process.env.PORT || '3000', 10); - // HTTP server code would go here - console.log(`Vector Retrieval MCP Server running on port ${port}`); + console.log(`Knowledge Management MCP Server running on port ${port}`); } } diff --git a/jest.config.js b/jest.config.cjs similarity index 100% rename from jest.config.js rename to jest.config.cjs diff --git a/package-lock.json b/package-lock.json index 5a4afe8..9e28c90 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,6 @@ "@google/generative-ai": "^0.24.0", "@modelcontextprotocol/sdk": "^1.8.0", "@qdrant/js-client-rest": "^1.13.0", - "axios": "^1.8.4", "chromadb": "^2.1.0", "dotenv": "^16.4.7", "pdf-parse": "^1.1.1", @@ -3481,17 +3480,6 @@ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, - "node_modules/axios": { - "version": "1.8.4", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz", - "integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==", - "license": "MIT", - "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.0", - "proxy-from-env": "^1.1.0" - } - }, "node_modules/b4a": { "version": "1.6.7", "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.7.tgz", @@ -5231,26 +5219,6 @@ "dev": true, "license": "ISC" }, - "node_modules/follow-redirects": { - "version": "1.15.9", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz", - "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==", - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/RubenVerborgh" - } - ], - "license": "MIT", - "engines": { - "node": ">=4.0" - }, - "peerDependenciesMeta": { - "debug": { - "optional": true - } - } - }, "node_modules/form-data": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz", @@ -7664,12 +7632,6 @@ "node": ">= 0.10" } }, - "node_modules/proxy-from-env": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", - "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", - "license": "MIT" - }, "node_modules/pump": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.2.tgz", diff --git a/package.json b/package.json index 0b2c761..5642c09 100644 --- a/package.json +++ b/package.json @@ -6,12 +6,15 @@ "scripts": { "build": "tsc", "start": "node dist/index.js", - "dev": "ts-node index.ts", + "start:legacy": "node dist/legacy-retrieval-server.js", + "dev": "ts-node --esm index.ts", + "dev:legacy": "ts-node --esm src/legacy-retrieval-server.ts", "import": "ts-node src/scripts/data-import.ts", "upload": "ts-node src/scripts/document-uploader.ts", - "test": "jest", + "test": "jest --config jest.config.cjs", "test:watch": "jest --watch", - "lint": "eslint . --ext .ts" + "lint": "eslint . --ext .ts", + "store-doc": "ts-node src/scripts/store-documentation.ts" }, "dependencies": { "@google/generative-ai": "^0.24.0", diff --git a/run-cursor-mcp.sh b/run-cursor-mcp.sh new file mode 100755 index 0000000..43daf94 --- /dev/null +++ b/run-cursor-mcp.sh @@ -0,0 +1,8 @@ +#!/bin/zsh +cd /Users/hadv/vito-mcp +source ~/.zshrc +nvm use --lts + +# Let the app load environment variables from .env file +# No need to export variables here as dotenv will handle that +node dist/index.js \ No newline at end of file diff --git a/src/core/db-service.ts b/src/core/db-service.ts index 417b646..0d0b763 100644 --- a/src/core/db-service.ts +++ b/src/core/db-service.ts @@ -40,12 +40,16 @@ export class DatabaseService { ? DatabaseType.CHROMA : DatabaseType.QDRANT; - this.collectionName = process.env.COLLECTION_NAME || 'documents'; + this.collectionName = process.env.COLLECTION_NAME || 'vito'; this.embeddingFunction = new CustomEmbeddingFunction(); console.log(`Using database type: ${this.dbType}`); } + getDbType(): DatabaseType { + return this.dbType; + } + async initialize(): Promise { if (this.dbType === DatabaseType.QDRANT) { await this.initializeQdrant(); @@ -88,7 +92,6 @@ export class DatabaseService { }); try { - // List collections to check if ours exists const collections = await this.chromaClient.listCollections(); const collectionExists = collections.some((collection: any) => collection.name === this.collectionName); @@ -114,7 +117,6 @@ export class DatabaseService { } async search(query: string, limit: number = 3, scoreThreshold: number = 0.7): Promise { - // Generate embedding for query const queryEmbedding = await generateEmbedding(query); if (this.dbType === DatabaseType.QDRANT) { @@ -141,6 +143,7 @@ export class DatabaseService { metadata: { source: String(result.payload?.source || ''), score: result.score, + ...result.payload }, })); } @@ -156,44 +159,23 @@ export class DatabaseService { include: [IncludeEnum.Documents, IncludeEnum.Metadatas, IncludeEnum.Distances] }); - // Convert results to FormattedResult format const formattedResults: FormattedResult[] = []; if (searchResults.documents && searchResults.documents.length > 0 && searchResults.metadatas && searchResults.distances) { - // First array is for the first query const docs = searchResults.documents[0] || []; const metas = searchResults.metadatas[0] || []; const distances = searchResults.distances[0] || []; for (let i = 0; i < docs.length; i++) { - // In Chroma, lower distance is better, so convert to a similarity score (1 - distance) - // Assuming distances are normalized between 0-1 const similarityScore = 1 - (distances[i] || 0); + const docText = docs[i] !== null && docs[i] !== undefined ? String(docs[i]) : ''; + const metaObj = metas[i] && typeof metas[i] === 'object' ? metas[i] as Record : {}; - // Handle documents - ensure we have a valid string - let docText = ''; - if (docs[i] !== null && docs[i] !== undefined) { - docText = String(docs[i]); - } - - // Handle metadata - ensure we have a valid object - let metaObj: Record = {}; - let source = ''; - - if (metas[i] && typeof metas[i] === 'object') { - metaObj = metas[i] as Record; - // Ensure source is a string - if (metaObj.source !== undefined) { - source = String(metaObj.source); - } - } - - // Construct the formatted result with proper types formattedResults.push({ text: docText, metadata: { - source: source, + source: String(metaObj.source || ''), score: similarityScore, ...metaObj } @@ -203,4 +185,61 @@ export class DatabaseService { return formattedResults; } + + async storeDomainKnowledge( + text: string, + domain: string, + metadata: Record = {} + ): Promise { + const pointId = crypto.randomUUID(); + const timestamp = new Date().toISOString(); + + const enhancedMetadata = { + ...metadata, + domain, + timestamp, + type: 'domain_knowledge', + version: 1 + }; + + if (this.dbType === DatabaseType.QDRANT) { + await this.storeDocumentQdrant(text, enhancedMetadata); + } else { + await this.storeDocumentChroma(text, enhancedMetadata); + } + + return pointId; + } + + private async storeDocumentQdrant(text: string, metadata: Record): Promise { + if (!this.qdrantClient) { + throw new Error('Qdrant client not initialized'); + } + + const embedding = await generateEmbedding(text); + const pointId = crypto.randomUUID(); + + await this.qdrantClient.upsert(this.collectionName, { + points: [{ + id: pointId, + vector: embedding, + payload: { + text, + ...metadata + } + }] + }); + } + + private async storeDocumentChroma(text: string, metadata: Record): Promise { + if (!this.chromaCollection) { + throw new Error('Chroma collection not initialized'); + } + + await this.chromaCollection.add({ + ids: [crypto.randomUUID()], + documents: [text], + metadatas: [metadata] + }); + } } \ No newline at end of file diff --git a/src/core/knowledge-management-tools.ts b/src/core/knowledge-management-tools.ts new file mode 100644 index 0000000..e61d374 --- /dev/null +++ b/src/core/knowledge-management-tools.ts @@ -0,0 +1,184 @@ +import { Tool } from '@modelcontextprotocol/sdk/types.js'; +import { DatabaseService } from './db-service'; + +interface DomainTool extends Tool { + handler: (args?: any) => Promise; +} + +export class KnowledgeManagementTools { + constructor(private dbService: DatabaseService) {} + + getTools(): DomainTool[] { + return [ + { + name: 'store_knowledge', + description: 'Store various types of knowledge, insights, and experiences in a specific domain. This includes but is not limited to: best practices, lessons learned, understandings, experiences, and solutions.', + inputSchema: { + type: "object", + properties: { + content: { + type: "string", + description: "The knowledge content to store (e.g., best practice, lesson learned, insight)" + }, + domain: { + type: "string", + description: "The knowledge domain this belongs to" + }, + knowledgeType: { + type: "string", + enum: ["best_practice", "lesson_learned", "insight", "experience", "solution", "understanding", "pattern", "anti_pattern", "tip", "troubleshooting"], + description: "Type of knowledge being stored" + }, + context: { + type: "object", + description: "Context about this knowledge", + properties: { + situation: { type: "string", description: "The situation or problem this knowledge addresses" }, + impact: { type: "string", description: "The impact or benefit of this knowledge" }, + prerequisites: { type: "array", items: { type: "string" }, description: "Prerequisites or requirements for this knowledge" }, + relatedTopics: { type: "array", items: { type: "string" }, description: "Related topics or concepts" } + } + }, + metadata: { + type: "object", + description: "Additional metadata about the knowledge", + properties: { + source: { type: "string", description: "Source of this knowledge (e.g., 'conversation', 'documentation', 'experience')" }, + timestamp: { type: "string", description: "When this knowledge was captured" }, + confidence: { type: "number", description: "Confidence level in this knowledge (0-1)" }, + verified: { type: "boolean", description: "Whether this knowledge has been verified" } + } + } + }, + required: ["content", "domain", "knowledgeType"] + }, + handler: async ({ content, domain, knowledgeType, context = {}, metadata = {} }) => { + const docId = await this.dbService.storeDomainKnowledge( + content, + domain, + { + ...metadata, + knowledge_type: knowledgeType, + context, + source: metadata.source || 'llm_interaction', + timestamp: metadata.timestamp || new Date().toISOString(), + confidence: metadata.confidence || 0.8, + verified: metadata.verified || false + } + ); + return { + success: true, + documentId: docId, + domain, + knowledgeType, + message: 'Knowledge stored successfully' + }; + } + }, + { + name: 'retrieve_knowledge_context', + description: 'Retrieve relevant knowledge and context for a given task or query. This tool helps LLM access its long-term memory and relevant context for better task execution.', + inputSchema: { + type: "object", + properties: { + query: { + type: "string", + description: "The current task or query to find relevant knowledge for" + }, + domains: { + type: "array", + items: { type: "string" }, + description: "Optional list of domains to search in. If not provided, searches across all domains." + }, + knowledgeTypes: { + type: "array", + items: { + type: "string", + enum: ["best_practice", "lesson_learned", "insight", "experience", "solution", "understanding", "pattern", "anti_pattern", "tip", "troubleshooting"] + }, + description: "Optional list of knowledge types to include" + }, + context: { + type: "object", + description: "Current context to find relevant knowledge", + properties: { + currentSituation: { type: "string", description: "Current situation or problem being addressed" }, + relatedTopics: { type: "array", items: { type: "string" }, description: "Topics related to the current task" }, + constraints: { type: "array", items: { type: "string" }, description: "Any constraints or requirements for the current task" } + } + }, + options: { + type: "object", + description: "Additional retrieval options", + properties: { + maxResults: { type: "number", default: 5, description: "Maximum number of results to return" }, + minConfidence: { type: "number", default: 0.7, description: "Minimum confidence threshold for results" }, + includeContext: { type: "boolean", default: true, description: "Whether to include context information in results" }, + prioritizeRecent: { type: "boolean", default: true, description: "Whether to prioritize more recent knowledge" } + } + } + }, + required: ["query"] + }, + handler: async ({ query, domains, knowledgeTypes, context = {}, options = {} }) => { + const { + maxResults = 5, + minConfidence = 0.7, + includeContext = true, + prioritizeRecent = true + } = options; + + // Search for relevant knowledge + const results = await this.dbService.search(query, maxResults, minConfidence); + + // Filter and format results for LLM context + const formattedResults = results + .filter(result => { + // Apply domain filter if specified + if (domains && domains.length > 0) { + return domains.includes(result.metadata.domain); + } + return true; + }) + .filter(result => { + // Apply knowledge type filter if specified + if (knowledgeTypes && knowledgeTypes.length > 0) { + return knowledgeTypes.includes(result.metadata.knowledge_type); + } + return true; + }) + .map(result => ({ + content: result.text, + type: result.metadata.knowledge_type, + domain: result.metadata.domain, + confidence: result.metadata.score, + timestamp: result.metadata.timestamp, + ...(includeContext && { + context: { + situation: result.metadata.context?.situation, + impact: result.metadata.context?.impact, + prerequisites: result.metadata.context?.prerequisites, + relatedTopics: result.metadata.context?.relatedTopics + } + }) + })) + .sort((a, b) => prioritizeRecent ? + new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime() : + 0 + ); + + return { + relevantKnowledge: formattedResults, + currentContext: context, + metadata: { + totalResults: formattedResults.length, + domains: domains || 'all', + knowledgeTypes: knowledgeTypes || 'all', + confidenceThreshold: minConfidence + } + }; + } + } + ]; + } +} \ No newline at end of file diff --git a/src/legacy-retrieval-server.ts b/src/legacy-retrieval-server.ts new file mode 100644 index 0000000..5f9dd41 --- /dev/null +++ b/src/legacy-retrieval-server.ts @@ -0,0 +1,113 @@ +#!/usr/bin/env node + +import { Server } from "@modelcontextprotocol/sdk/server/index.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { + CallToolRequestSchema, + ListToolsRequestSchema, + Tool, +} from "@modelcontextprotocol/sdk/types.js"; +import dotenv from 'dotenv'; +import { DatabaseService } from './core/db-service'; + +// Load environment variables +dotenv.config(); + +interface DomainTool extends Tool { + handler: (args?: any) => Promise; +} + +class LegacyRetrievalTools { + constructor(private dbService: DatabaseService) {} + + getTools(): DomainTool[] { + return [ + { + name: 'retrieve_information', + description: 'Retrieve information from vector database based on semantic similarity', + inputSchema: { + type: "object", + properties: { + query: { type: "string", description: "The search query for retrieval" }, + limit: { type: "number", default: 3, description: "Number of results to retrieve" }, + scoreThreshold: { type: "number", default: 0.7, description: "Minimum similarity score threshold (0-1)" }, + }, + required: ["query"], + }, + handler: async ({ query, limit = 3, scoreThreshold = 0.7 }) => { + const formattedResults = await this.dbService.search(query, limit, scoreThreshold); + return { results: formattedResults }; + } + } + ]; + } +} + +// Service initialization +const dbService = new DatabaseService(); +const legacyTools = new LegacyRetrievalTools(dbService); + +// Server setup +const server = new Server( + { + name: "legacy-retrieval-server", + version: "1.0.0", + }, + { + capabilities: { + tools: {}, + }, + }, +); + +// Request handlers +server.setRequestHandler(ListToolsRequestSchema, async () => ({ + tools: legacyTools.getTools(), +})); + +server.setRequestHandler(CallToolRequestSchema, async (request) => { + const { name, arguments: args } = request.params; + + try { + const tool = legacyTools.getTools().find(t => t.name === name); + if (!tool) { + return { + content: [{ type: "text", text: `Unknown tool: ${name}` }], + isError: true, + }; + } + + const result = await tool.handler(args); + return { + content: [{ type: "text", text: JSON.stringify(result) }] + }; + } catch (error) { + console.error(`Error during tool execution (${name}):`, error); + return { + content: [{ type: "text", text: `Failed to execute tool: ${error}` }], + isError: true, + }; + } +}); + +// Server startup +async function runServer() { + // Initialize the database service + await dbService.initialize(); + + const transport = new StdioServerTransport(); + await server.connect(transport); + + console.error("Legacy Retrieval Server running on stdio"); + + // Optional HTTP server + if (process.env.HTTP_SERVER === "true") { + const port = parseInt(process.env.PORT || '3000', 10); + console.log(`Legacy Retrieval MCP Server running on port ${port}`); + } +} + +runServer().catch((error) => { + console.error('Fatal error running server:', error); + process.exit(1); +}); \ No newline at end of file diff --git a/src/scripts/store-documentation.ts b/src/scripts/store-documentation.ts new file mode 100644 index 0000000..7b85af9 --- /dev/null +++ b/src/scripts/store-documentation.ts @@ -0,0 +1,65 @@ +import { DatabaseService } from '../core/db-service'; +import fs from 'fs/promises'; +import path from 'path'; +import pdf from 'pdf-parse'; + +async function extractContent(filePath: string, fileExtension: string): Promise { + switch (fileExtension.toLowerCase()) { + case '.pdf': + const dataBuffer = await fs.readFile(filePath); + const pdfData = await pdf(dataBuffer); + return pdfData.text; + case '.txt': + return await fs.readFile(filePath, 'utf-8'); + default: + throw new Error(`Unsupported file type: ${fileExtension}`); + } +} + +async function storeDocumentation(filePath: string) { + try { + const dbService = new DatabaseService(); + await dbService.initialize(); + + // Get file metadata + const stats = await fs.stat(filePath); + const fileName = path.basename(filePath); + const fileExtension = path.extname(filePath); + + // Extract content based on file type + const content = await extractContent(filePath, fileExtension); + + // Prepare metadata + const metadata = { + source: 'documentation', + file_name: fileName, + file_extension: fileExtension, + file_size: stats.size, + last_modified: stats.mtime.toISOString(), + created_at: stats.birthtime.toISOString(), + content_type: fileExtension.toLowerCase() === '.pdf' ? 'application/pdf' : 'text/plain', + }; + + // Store in database + const pointId = await dbService.storeDomainKnowledge( + content, + 'documentation', + metadata + ); + + console.log(`Successfully stored documentation from ${fileName} with ID: ${pointId}`); + console.log(`Content length: ${content.length} characters`); + } catch (error) { + console.error('Error storing documentation:', error); + process.exit(1); + } +} + +// Get file path from command line argument +const filePath = process.argv[2]; +if (!filePath) { + console.error('Please provide a file path as an argument'); + process.exit(1); +} + +storeDocumentation(filePath); \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index 9268db2..f8c15e5 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "target": "ES2020", - "module": "commonjs", + "module": "CommonJS", "outDir": "./dist", "strict": true, "esModuleInterop": true,