diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..4ebd8e81 --- /dev/null +++ b/.env.example @@ -0,0 +1,15 @@ +# Glean MCP Server Configuration +# Copy this file to .env and fill in your values + +# Your Glean server URL (copy from your Glean admin panel) +GLEAN_SERVER_URL=https://your-company-be.glean.com/ + +# Your Glean API token (generate from Glean settings) +GLEAN_API_TOKEN=your_api_token_here + +# Optional: User to impersonate (only valid with global tokens) +# GLEAN_ACT_AS=user@company.com + +# Alternative configuration (legacy): +# GLEAN_INSTANCE=your-company # Note: -be is automatically appended +# GLEAN_BASE_URL=https://your-company-be.glean.com/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8269f720..e788e335 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ build sandbox sand\ box debug.log +.env diff --git a/docs/pagination.md b/docs/pagination.md new file mode 100644 index 00000000..e6e2ba5d --- /dev/null +++ b/docs/pagination.md @@ -0,0 +1,126 @@ +# Pagination Support in Glean MCP Server + +The Glean MCP Server now supports pagination for search results and chat responses, helping to manage large result sets and prevent token limit errors. + +## Search Pagination + +Both `company_search` and `people_profile_search` tools support pagination through the `cursor` parameter. + +### Basic Usage + +```json +// First request +{ + "query": "Docker projects", + "pageSize": 20 +} + +// Response includes pagination info +{ + "results": [...], + "cursor": "abc123", + "hasMoreResults": true, + "totalResults": 150 +} + +// Next page request +{ + "query": "Docker projects", + "pageSize": 20, + "cursor": "abc123" +} +``` + +### People Search Example + +```json +// Initial search +{ + "query": "DevOps engineers", + "filters": { + "department": "Engineering" + }, + "pageSize": 25 +} + +// Continue with cursor from response +{ + "query": "DevOps engineers", + "filters": { + "department": "Engineering" + }, + "pageSize": 25, + "cursor": "next-page-cursor" +} +``` + +## Chat Response Chunking + +The chat tool automatically chunks large responses that exceed token limits (~25k tokens). + +### Automatic Chunking + +When a chat response is too large, it's automatically split into manageable chunks: + +```json +// Initial chat request +{ + "message": "Explain all our microservices architecture" +} + +// Response with chunk metadata +{ + "content": "... first part of response ...", + "_chunkMetadata": { + "responseId": "uuid-123", + "chunkIndex": 0, + "totalChunks": 3, + "hasMore": true + } +} +``` + +### Continuing Chunked Responses + +To get subsequent chunks: + +```json +{ + "message": "", + "continueFrom": { + "responseId": "uuid-123", + "chunkIndex": 1 + } +} +``` + +## Implementation Details + +### Token Limits +- Maximum tokens per response: 20,000 (safe limit below 25k) +- Character to token ratio: ~4 characters per token + +### Chunking Strategy +1. Attempts to split at paragraph boundaries (double newlines) +2. Falls back to sentence boundaries if paragraphs are too large +3. Force splits at character level for extremely long unbroken text + +### Response Format +All paginated responses include: +- `cursor` or `_chunkMetadata`: Pagination state +- `hasMoreResults` or `hasMore`: Boolean indicating more data available +- `totalResults` or `totalChunks`: Total count when available + +## Best Practices + +1. **Set appropriate page sizes**: Balance between response size and number of requests +2. **Handle pagination in loops**: When fetching all results, continue until `hasMoreResults` is false +3. **Store cursors**: Keep track of cursors for user sessions to allow navigation +4. **Error handling**: Always check for continuation metadata before attempting to continue + +## Error Handling + +Common errors: +- Invalid cursor: Returns error if cursor is expired or invalid +- Invalid chunk index: Returns null if chunk doesn't exist +- Missing continuation data: Normal chat response if no previous chunks exist \ No newline at end of file diff --git a/packages/configure-mcp-server/src/index.ts b/packages/configure-mcp-server/src/index.ts index d9f7dba5..b9cbcc60 100644 --- a/packages/configure-mcp-server/src/index.ts +++ b/packages/configure-mcp-server/src/index.ts @@ -33,7 +33,7 @@ import { forceRefreshTokens, setupMcpRemote, } from '@gleanwork/mcp-server-utils/auth'; -import { chat, formatResponse } from '@gleanwork/local-mcp-server/tools/chat'; +import { chat, formatChunkedResponse } from '@gleanwork/local-mcp-server/tools/chat'; import { VERSION } from './common/version.js'; import { checkAndOpenLaunchWarning } from '@gleanwork/mcp-server-utils/util'; @@ -277,7 +277,7 @@ connect after configuration. case 'auth-test': { try { const chatResponse = await chat({ message: 'Who am I?' }); - trace('auth-test search', formatResponse(chatResponse)); + trace('auth-test search', formatChunkedResponse(chatResponse)); console.log('Access token accepted.'); } catch (err: any) { error('auth-test error', err); diff --git a/packages/local-mcp-server/README.md b/packages/local-mcp-server/README.md index 5bed50c2..04fa2ffb 100644 --- a/packages/local-mcp-server/README.md +++ b/packages/local-mcp-server/README.md @@ -9,30 +9,36 @@ The Glean MCP Server is a [Model Context Protocol (MCP)](https://modelcontextpro ## Features -- **Company Search**: Access Glean's powerful content search capabilities -- **People Profile Search**: Access Glean's people directory -- **Chat**: Interact with Glean's AI assistant +- **Company Search**: Access Glean's powerful content search capabilities with pagination support +- **People Profile Search**: Access Glean's people directory with pagination support +- **Chat**: Interact with Glean's AI assistant with automatic response chunking for large responses - **Read Documents**: Retrieve documents from Glean by ID or URL +- **Pagination Support**: Handle large result sets efficiently with cursor-based pagination +- **Response Chunking**: Automatically splits large chat responses to avoid token limits - **MCP Compliant**: Implements the Model Context Protocol specification ## Tools - ### company_search - Search Glean's content index using the Glean Search API. This tool allows you to query Glean's content index with various filtering and configuration options. + Search Glean's content index using the Glean Search API. This tool allows you to query Glean's content index with various filtering and configuration options. Supports pagination through cursor parameter for handling large result sets. - ### chat - Interact with Glean's AI assistant using the Glean Chat API. This tool allows you to have conversational interactions with Glean's AI, including support for message history, citations, and various configuration options. + Interact with Glean's AI assistant using the Glean Chat API. This tool allows you to have conversational interactions with Glean's AI, including support for message history, citations, and various configuration options. Automatically chunks large responses to avoid token limits and provides continuation support. - ### people_profile_search - Search Glean's People directory to find employee information. + Search Glean's People directory to find employee information. Supports pagination through cursor parameter for handling large result sets. - ### read_documents Read documents from Glean by providing document IDs or URLs. This tool allows you to retrieve the full content of specific documents for detailed analysis or reference. +## Pagination + +For detailed information about pagination support and examples, see [Pagination Documentation](../../docs/pagination.md). + ## MCP Client Configuration To configure this MCP server in your MCP client (such as Claude Desktop, Windsurf, Cursor, etc.), run [@gleanwork/configure-mcp-server](https://github.com/gleanwork/mcp-server/tree/main/packages/configure-mcp-server) passing in your client, token and instance. @@ -58,7 +64,7 @@ To manually configure an MCP client (such as Claude Desktop, Windsurf, Cursor, e "command": "npx", "args": ["-y", "@gleanwork/local-mcp-server"], "env": { - "GLEAN_INSTANCE": "", + "GLEAN_SERVER_URL": "", "GLEAN_API_TOKEN": "" } } @@ -66,7 +72,53 @@ To manually configure an MCP client (such as Claude Desktop, Windsurf, Cursor, e } ``` -Replace the environment variable values with your actual Glean credentials. +Example values: +- `GLEAN_SERVER_URL`: `https://acme-corp-be.glean.com/` (copy from your Glean admin panel) +- `GLEAN_API_TOKEN`: Your API token from Glean settings + +Alternative configuration (legacy - note that `-be` is automatically appended): +```json +"env": { + "GLEAN_INSTANCE": "acme-corp", // becomes https://acme-corp-be.glean.com/ + "GLEAN_API_TOKEN": "" +} +``` + +### Local Development + +For local development, you can use a `.env` file to store your credentials: + +1. Copy the example environment file: +```bash +cp ../../.env.example ../../.env +``` + +2. Edit `.env` with your values: +```bash +# .env +GLEAN_SERVER_URL=https://your-company-be.glean.com/ +GLEAN_API_TOKEN=your_api_token_here +``` + +3. Run the server locally: +```bash +npm run build +node build/index.js +``` + +3. For use with MCP clients during development: +```json +{ + "mcpServers": { + "glean-dev": { + "command": "node", + "args": ["/path/to/packages/local-mcp-server/build/index.js"] + } + } +} +``` + +The server will automatically load environment variables from the `.env` file. ### Debugging diff --git a/packages/local-mcp-server/src/index.ts b/packages/local-mcp-server/src/index.ts index 32ee8a46..5c48aafe 100644 --- a/packages/local-mcp-server/src/index.ts +++ b/packages/local-mcp-server/src/index.ts @@ -8,6 +8,7 @@ * @module @gleanwork/local-mcp-server */ +import 'dotenv/config'; import meow from 'meow'; import { runServer } from './server.js'; import { Logger, trace, LogLevel } from '@gleanwork/mcp-server-utils/logger'; diff --git a/packages/local-mcp-server/src/server.ts b/packages/local-mcp-server/src/server.ts index 1ce5dbdd..3fd4cf12 100644 --- a/packages/local-mcp-server/src/server.ts +++ b/packages/local-mcp-server/src/server.ts @@ -60,12 +60,20 @@ export async function listToolsHandler() { name: TOOL_NAMES.companySearch, description: `Find relevant company documents and data - Example request: + Example requests: + // Basic search { "query": "What are the company holidays this year?", "datasources": ["drive", "confluence"] } + + // Search with pagination + { + "query": "Docker projects", + "pageSize": 20, + "cursor": "pagination_cursor" // From previous response + } `, inputSchema: zodToJsonSchema(search.ToolSearchSchema), }, @@ -73,8 +81,9 @@ export async function listToolsHandler() { name: TOOL_NAMES.chat, description: `Chat with Glean Assistant using Glean's RAG - Example request: + Example requests: + // Basic chat { "message": "What are the company holidays this year?", "context": [ @@ -82,6 +91,15 @@ export async function listToolsHandler() { "I'm planning my vacation for next year." ] } + + // Continue from chunked response + { + "message": "", + "continueFrom": { + "responseId": "uuid-here", + "chunkIndex": 1 + } + } `, inputSchema: zodToJsonSchema(chat.ToolChatSchema), }, @@ -89,17 +107,24 @@ export async function listToolsHandler() { name: TOOL_NAMES.peopleProfileSearch, description: `Search for people profiles in the company - Example request: + Example requests: + // Basic search { "query": "Find people named John Doe", "filters": { - "department": "Engineering", + "department": "Engineering", "city": "San Francisco" }, "pageSize": 10 } + // Search with pagination + { + "query": "DevOps engineers", + "pageSize": 25, + "cursor": "pagination_cursor" // From previous response + } `, inputSchema: zodToJsonSchema( peopleProfileSearch.ToolPeopleProfileSearchSchema, @@ -152,7 +177,7 @@ export async function callToolHandler( case TOOL_NAMES.chat: { const args = chat.ToolChatSchema.parse(request.params.arguments); const result = await chat.chat(args); - const formattedResults = chat.formatResponse(result); + const formattedResults = chat.formatChunkedResponse(result); return { content: [{ type: 'text', text: formattedResults }], diff --git a/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts b/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts index 11afe3f7..6b74d623 100644 --- a/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts +++ b/packages/local-mcp-server/src/test/formatters/chat-formatter.test.ts @@ -1,12 +1,13 @@ import { describe, it, expect } from 'vitest'; import { formatResponse } from '../../tools/chat.js'; +import { Author, MessageType } from '@gleanwork/api-client/models/components'; describe('Chat Formatter', () => { it('should format chat responses correctly', () => { const mockChatResponse = { messages: [ { - author: 'USER', + author: Author.User, fragments: [ { text: 'What is Glean?', @@ -15,7 +16,7 @@ describe('Chat Formatter', () => { messageId: 'user-msg-1', }, { - author: 'GLEAN_AI', + author: Author.GleanAi, fragments: [ { text: 'Glean is an AI platform for work that helps organizations find and understand information. It provides enterprise search, AI assistants, and agent capabilities.', @@ -36,7 +37,7 @@ describe('Chat Formatter', () => { }, ], messageId: 'assistant-msg-1', - messageType: 'UPDATE', + messageType: MessageType.Update, stepId: 'RESPOND', }, ], @@ -62,7 +63,7 @@ describe('Chat Formatter', () => { const mockChatResponse = { messages: [ { - author: 'GLEAN_AI', + author: Author.GleanAi, fragments: [ { querySuggestion: { @@ -72,7 +73,7 @@ describe('Chat Formatter', () => { }, ], messageId: 'query-msg-1', - messageType: 'UPDATE', + messageType: MessageType.Update, stepId: 'SEARCH', }, ], @@ -87,7 +88,7 @@ describe('Chat Formatter', () => { const mockChatResponse = { messages: [ { - author: 'GLEAN_AI', + author: Author.GleanAi, fragments: [ { structuredResults: [ @@ -107,7 +108,7 @@ describe('Chat Formatter', () => { }, ], messageId: 'results-msg-1', - messageType: 'UPDATE', + messageType: MessageType.Update, stepId: 'SEARCH', }, ], @@ -143,10 +144,10 @@ describe('Chat Formatter', () => { const messagesWithoutFragments = { messages: [ { - author: 'USER', + author: Author.User, }, { - author: 'GLEAN_AI', + author: Author.GleanAi, citations: [ { sourceDocument: { @@ -155,7 +156,7 @@ describe('Chat Formatter', () => { }, }, ], - messageType: 'CONTENT', + messageType: MessageType.Content, }, ], }; @@ -170,7 +171,7 @@ describe('Chat Formatter', () => { const messagesWithoutCitations = { messages: [ { - author: 'USER', + author: Author.User, fragments: [ { text: 'Hello', @@ -178,13 +179,13 @@ describe('Chat Formatter', () => { ], }, { - author: 'GLEAN_AI', + author: Author.GleanAi, fragments: [ { text: 'Hi there! How can I help you today?', }, ], - messageType: 'CONTENT', + messageType: MessageType.Content, }, ], }; @@ -201,7 +202,7 @@ describe('Chat Formatter', () => { const mixedFragmentsMessage = { messages: [ { - author: 'GLEAN_AI', + author: Author.GleanAi, fragments: [ { text: 'Searching for:', @@ -227,7 +228,7 @@ describe('Chat Formatter', () => { }, ], messageId: 'mixed-msg-1', - messageType: 'UPDATE', + messageType: MessageType.Update, stepId: 'SEARCH', }, ], diff --git a/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts b/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts index f1ab4401..fdde2a10 100644 --- a/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts +++ b/packages/local-mcp-server/src/test/formatters/search-formatter.test.ts @@ -102,7 +102,7 @@ describe('Search Formatter', () => { const formattedResults = formatResponse(emptyResults); expect(formattedResults).toContain( - 'Search results for "nonexistent term" (0 results)', + 'Search results for "nonexistent term" (showing 0 of 0 results)', ); }); diff --git a/packages/local-mcp-server/src/test/server.test.ts b/packages/local-mcp-server/src/test/server.test.ts index 6c5428d5..ece9d25c 100644 --- a/packages/local-mcp-server/src/test/server.test.ts +++ b/packages/local-mcp-server/src/test/server.test.ts @@ -174,7 +174,7 @@ describe('MCP Server Handlers (integration)', () => { { "content": [ { - "text": "Found 1 people: + "text": "Found 1 of 1 people: 1. Jane Doe – Software Engineer, Engineering (San Francisco) • jane.doe@example.com", "type": "text", diff --git a/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts new file mode 100644 index 00000000..610a339b --- /dev/null +++ b/packages/local-mcp-server/src/test/tools/chat-response-buffer.test.ts @@ -0,0 +1,191 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import { ChatResponseBuffer } from '../../tools/chat-response-buffer.js'; + +describe('ChatResponseBuffer', () => { + let buffer: ChatResponseBuffer; + + beforeEach(() => { + buffer = new ChatResponseBuffer(); + }); + + describe('Token Estimation', () => { + it('should estimate tokens correctly', async () => { + const text = 'Hello world!'; // 12 chars = 4 tokens (3 chars/token) + const result = await buffer.processResponse(text); + + expect(result.metadata).toBeUndefined(); // Should not chunk small text + expect(result.content).toBe(text); + }); + + it('should not chunk responses under token limit', async () => { + // 10k chars = ~3.3k tokens, under 15k limit + const smallText = 'a'.repeat(10000); + const result = await buffer.processResponse(smallText); + + expect(result.metadata).toBeUndefined(); + expect(result.content).toBe(smallText); + }); + }); + + describe('Text Chunking Logic', () => { + it('should chunk large responses', async () => { + // 100k chars = ~33k tokens, over 15k limit + const largeText = 'This is a test paragraph.\n\n'.repeat(4000); + const result = await buffer.processResponse(largeText); + + expect(result.metadata).toBeDefined(); + expect(result.metadata!.chunkIndex).toBe(0); + expect(result.metadata!.totalChunks).toBeGreaterThan(1); + expect(result.metadata!.hasMore).toBe(true); + expect(result.metadata!.responseId).toBeTruthy(); + }); + + it('should prefer splitting at paragraph boundaries', async () => { + // Create text with manageable paragraph breaks that will trigger chunking + const paragraph = 'This is a test paragraph with some content.\n\n'; + const largeText = paragraph.repeat(2000); // Creates text large enough to chunk + + const result = await buffer.processResponse(largeText); + + expect(result.metadata).toBeDefined(); + // The content should be chunked and may end with paragraph boundary + expect(result.content.length).toBeGreaterThan(0); + expect(result.content.length).toBeLessThan(largeText.length); + }); + + it('should fall back to sentence boundaries when paragraphs are too large', async () => { + // Create one huge paragraph with sentences + const sentence = 'A'.repeat(1000) + '. '; + const hugeParagraph = sentence.repeat(100); // 100k+ chars, no paragraph breaks + + const result = await buffer.processResponse(hugeParagraph); + + expect(result.metadata).toBeDefined(); + expect(result.content).toMatch(/\.\s*$/); // Should end at sentence boundary + }); + + it('should force split when no natural boundaries exist', async () => { + // Create text with no natural boundaries + // Use 50k chars (just over the 45k chunk limit) to test force split + const largeText = 'A'.repeat(50000); + + const result = await buffer.processResponse(largeText); + + expect(result.metadata).toBeDefined(); + expect(result.content.length).toBeLessThan(largeText.length); + expect(result.content.length).toBeGreaterThan(0); + expect(result.metadata!.totalChunks).toBe(2); // Should split into 2 chunks + }); + }); + + describe('Chunk Storage and Retrieval', () => { + it('should store and retrieve chunks correctly', async () => { + const largeText = 'Test paragraph.\n\n'.repeat(5000); + const firstChunk = await buffer.processResponse(largeText); + + expect(firstChunk.metadata).toBeDefined(); + const responseId = firstChunk.metadata!.responseId; + + // Get second chunk + const secondChunk = buffer.getChunk(responseId, 1); + + expect(secondChunk).toBeDefined(); + expect(secondChunk!.metadata!.chunkIndex).toBe(1); + expect(secondChunk!.metadata!.responseId).toBe(responseId); + expect(secondChunk!.content).toBeTruthy(); + }); + + it('should return null for invalid chunk requests', async () => { + const invalidChunk = buffer.getChunk('invalid-id', 0); + expect(invalidChunk).toBeNull(); + + // Create a response first that will definitely chunk + const largeText = 'Test.\n\n'.repeat(10000); // Much larger - 70k chars + const result = await buffer.processResponse(largeText); + const responseId = result.metadata!.responseId; + + // Request chunk beyond available range + const beyondRange = buffer.getChunk(responseId, 999); + expect(beyondRange).toBeNull(); + }); + + it('should handle last chunk correctly', async () => { + const largeText = 'Test paragraph.\n\n'.repeat(10000); // Much larger - 160k chars + const firstChunk = await buffer.processResponse(largeText); + + const responseId = firstChunk.metadata!.responseId; + const totalChunks = firstChunk.metadata!.totalChunks; + + // Get last chunk + const lastChunk = buffer.getChunk(responseId, totalChunks - 1); + + expect(lastChunk).toBeDefined(); + expect(lastChunk!.metadata!.hasMore).toBe(false); + expect(lastChunk!.metadata!.chunkIndex).toBe(totalChunks - 1); + }); + }); + + describe('Chunk Metadata', () => { + it('should provide accurate chunk metadata', async () => { + const largeText = 'Test paragraph.\n\n'.repeat(10000); // Much larger - 160k chars + const result = await buffer.processResponse(largeText); + + expect(result.metadata).toBeDefined(); + expect(result.metadata!.chunkIndex).toBe(0); + expect(result.metadata!.totalChunks).toBeGreaterThan(1); + expect(result.metadata!.responseId).toMatch(/^[a-f0-9-]{36}$/); // UUID format + expect(result.metadata!.hasMore).toBe(true); + }); + + it('should correctly identify when no more chunks exist', async () => { + const largeText = 'Short text that fits in one chunk.'; + const result = await buffer.processResponse(largeText); + + expect(result.metadata).toBeUndefined(); // No chunking needed + }); + }); + + describe('Cleanup', () => { + it('should allow manual cleanup of stored responses', async () => { + const largeText = 'Test.\n\n'.repeat(10000); // Much larger - 70k chars + const result = await buffer.processResponse(largeText); + const responseId = result.metadata!.responseId; + + // Should be able to get chunk before cleanup + const chunk = buffer.getChunk(responseId, 1); + expect(chunk).toBeDefined(); + + // Clean up + buffer.cleanup(responseId); + + // Should not be able to get chunk after cleanup + const cleanedChunk = buffer.getChunk(responseId, 1); + expect(cleanedChunk).toBeNull(); + }); + }); + + describe('Edge Cases', () => { + it('should handle empty strings', async () => { + const result = await buffer.processResponse(''); + expect(result.content).toBe(''); + expect(result.metadata).toBeUndefined(); + }); + + it('should handle strings with only whitespace', async () => { + const whitespaceText = ' \n\n \t\t '; + const result = await buffer.processResponse(whitespaceText); + expect(result.content).toBe(whitespaceText); + expect(result.metadata).toBeUndefined(); + }); + + it('should handle text exactly at the token limit boundary', async () => { + // Create text that's close to the limit (15k tokens = 45k chars) + const borderlineText = 'a'.repeat(45000); + const result = await buffer.processResponse(borderlineText); + + // Should just fit in one chunk + expect(result.metadata).toBeUndefined(); + expect(result.content).toBe(borderlineText); + }); + }); +}); \ No newline at end of file diff --git a/packages/local-mcp-server/src/test/tools/chat.test.ts b/packages/local-mcp-server/src/test/tools/chat.test.ts index 8007ed17..9ab270af 100644 --- a/packages/local-mcp-server/src/test/tools/chat.test.ts +++ b/packages/local-mcp-server/src/test/tools/chat.test.ts @@ -21,33 +21,50 @@ describe('Chat Tool', () => { it('generates correct JSON schema', () => { expect(zodToJsonSchema(ToolChatSchema, 'GleanChat')) .toMatchInlineSnapshot(` - { - "$ref": "#/definitions/GleanChat", - "$schema": "http://json-schema.org/draft-07/schema#", - "definitions": { - "GleanChat": { - "additionalProperties": false, - "properties": { - "context": { - "description": "Optional previous messages for context. Will be included in order before the current message.", - "items": { + { + "$ref": "#/definitions/GleanChat", + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "GleanChat": { + "additionalProperties": false, + "properties": { + "context": { + "description": "Optional previous messages for context. Will be included in order before the current message.", + "items": { + "type": "string", + }, + "type": "array", + }, + "continueFrom": { + "additionalProperties": false, + "description": "Continue from a previous chunked response", + "properties": { + "chunkIndex": { + "type": "number", + }, + "responseId": { + "type": "string", + }, + }, + "required": [ + "responseId", + "chunkIndex", + ], + "type": "object", + }, + "message": { + "description": "The user question or message to send to Glean Assistant.", "type": "string", }, - "type": "array", - }, - "message": { - "description": "The user question or message to send to Glean Assistant.", - "type": "string", }, + "required": [ + "message", + ], + "type": "object", }, - "required": [ - "message", - ], - "type": "object", }, - }, - } - `); + } + `); }); }); diff --git a/packages/local-mcp-server/src/test/tools/pagination.test.ts b/packages/local-mcp-server/src/test/tools/pagination.test.ts new file mode 100644 index 00000000..cc169ca8 --- /dev/null +++ b/packages/local-mcp-server/src/test/tools/pagination.test.ts @@ -0,0 +1,149 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { search } from '../../tools/search.js'; +import { peopleProfileSearch } from '../../tools/people_profile_search.js'; +import { chat } from '../../tools/chat.js'; +import { chatResponseBuffer } from '../../tools/chat-response-buffer.js'; +import { getClient } from '../../common/client.js'; + +vi.mock('../../common/client.js'); + +describe('Pagination Tests', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('Search Pagination', () => { + it('should handle pagination cursor in search requests', async () => { + const mockClient = { + search: { + query: vi.fn().mockResolvedValue({ + results: [ + { title: 'Result 1', url: 'http://example1.com' }, + { title: 'Result 2', url: 'http://example2.com' }, + ], + cursor: 'next-page-cursor', + hasMoreResults: true, + totalResults: 50, + metadata: { searchedQuery: 'test query' }, + }), + }, + }; + + vi.mocked(getClient).mockResolvedValue(mockClient as any); + + const result = await search({ + query: 'test query', + pageSize: 2, + cursor: 'initial-cursor', + }); + + expect(mockClient.search.query).toHaveBeenCalledWith({ + query: 'test query', + pageSize: 2, + cursor: 'initial-cursor', + }); + + expect(result.cursor).toBe('next-page-cursor'); + expect(result.hasMoreResults).toBe(true); + }); + }); + + describe('People Search Pagination', () => { + it('should handle pagination cursor in people search requests', async () => { + const mockClient = { + entities: { + list: vi.fn().mockResolvedValue({ + results: [ + { name: 'Person 1', metadata: { email: 'person1@example.com' } }, + { name: 'Person 2', metadata: { email: 'person2@example.com' } }, + ], + cursor: 'people-next-cursor', + hasMoreResults: true, + totalCount: 100, + }), + }, + }; + + vi.mocked(getClient).mockResolvedValue(mockClient as any); + + const result = await peopleProfileSearch({ + query: 'engineers', + pageSize: 2, + cursor: 'people-cursor', + }); + + expect(mockClient.entities.list).toHaveBeenCalledWith( + expect.objectContaining({ + cursor: 'people-cursor', + pageSize: 2, + }) + ); + + expect(result.cursor).toBe('people-next-cursor'); + expect(result.hasMoreResults).toBe(true); + }); + }); + + describe('Chat Response Chunking', () => { + it('should chunk large chat responses', async () => { + // Create a large response that exceeds the token limit + const largeText = 'This is a test paragraph. '.repeat(5000); // ~25k chars = ~6.25k tokens + + const chunked = await chatResponseBuffer.processResponse(largeText); + + expect(chunked.metadata).toBeDefined(); + expect(chunked.metadata?.totalChunks).toBeGreaterThan(1); + expect(chunked.metadata?.hasMore).toBe(true); + expect(chunked.content.length).toBeLessThan(largeText.length); + }); + + it('should retrieve subsequent chunks', async () => { + const largeText = 'This is a test paragraph. '.repeat(5000); + + const initial = await chatResponseBuffer.processResponse(largeText); + const responseId = initial.metadata!.responseId; + + // Get second chunk + const chunk2 = chatResponseBuffer.getChunk(responseId, 1); + + expect(chunk2).toBeDefined(); + expect(chunk2?.metadata?.chunkIndex).toBe(1); + expect(chunk2?.content).toBeTruthy(); + }); + + it('should handle chat continuation requests', async () => { + const mockClient = { + chat: { + create: vi.fn().mockResolvedValue({ + messages: [{ text: 'Response' }], + }), + }, + }; + + vi.mocked(getClient).mockResolvedValue(mockClient as any); + + // First, make a regular chat request + await chat({ + message: 'Hello', + }); + + // Create a large response manually + const largeText = 'This is a test paragraph. '.repeat(5000); + const chunked = await chatResponseBuffer.processResponse(largeText); + const responseId = chunked.metadata!.responseId; + + // Now test continuation + const continued = await chat({ + message: '', + continueFrom: { + responseId, + chunkIndex: 1, + }, + }); + + expect(continued).toBeTruthy(); + expect('content' in continued && continued.content).toBeTruthy(); + expect('metadata' in continued && continued.metadata?.chunkIndex).toBe(1); + }); + }); +}); \ No newline at end of file diff --git a/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts b/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts index 659611fe..71bb2124 100644 --- a/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts +++ b/packages/local-mcp-server/src/test/tools/people_profile_search.test.ts @@ -31,6 +31,10 @@ describe('People Profile Search Tool', () => { "PeopleProfileSearch": { "additionalProperties": false, "properties": { + "cursor": { + "description": "Pagination cursor from previous response to fetch next page", + "type": "string", + }, "filters": { "additionalProperties": { "type": "string", diff --git a/packages/local-mcp-server/src/test/tools/search.test.ts b/packages/local-mcp-server/src/test/tools/search.test.ts index d9b22ada..03477cb9 100644 --- a/packages/local-mcp-server/src/test/tools/search.test.ts +++ b/packages/local-mcp-server/src/test/tools/search.test.ts @@ -28,6 +28,10 @@ describe('Search Tool', () => { "GleanSearch": { "additionalProperties": false, "properties": { + "cursor": { + "description": "Pagination cursor from previous response to fetch next page", + "type": "string", + }, "datasources": { "description": "Optional list of data sources to search in. Examples: "github", "gdrive", "confluence", "jira".", "items": { @@ -35,6 +39,13 @@ describe('Search Tool', () => { }, "type": "array", }, + "pageSize": { + "default": 10, + "description": "Number of results to return per page (default: 10, max: 100)", + "maximum": 100, + "minimum": 1, + "type": "number", + }, "query": { "description": "The search query. This is what you want to search for.", "type": "string", diff --git a/packages/local-mcp-server/src/tools/chat-response-buffer.ts b/packages/local-mcp-server/src/tools/chat-response-buffer.ts new file mode 100644 index 00000000..c4a6be63 --- /dev/null +++ b/packages/local-mcp-server/src/tools/chat-response-buffer.ts @@ -0,0 +1,207 @@ +/** + * @fileoverview Chat response buffer for handling large responses that exceed token limits. + * + * This module provides intelligent chunking of chat responses to stay within token limits + * while maintaining readability by splitting at natural boundaries. + * + * @module tools/chat-response-buffer + */ + +import { randomUUID } from 'crypto'; + +export interface ChatChunkMetadata { + chunkIndex: number; + totalChunks: number; + responseId: string; + hasMore: boolean; +} + +export interface ChunkedResponse { + content: string; + metadata?: ChatChunkMetadata; +} + +/** + * Manages chunking of large chat responses to avoid token limit errors. + */ +export class ChatResponseBuffer { + private static readonly MAX_TOKENS = 15000; // Safe limit below 25k with buffer + private static readonly CHARS_PER_TOKEN = 3; // More conservative estimation + private responses = new Map(); + + /** + * Process a chat response, chunking it if necessary. + * + * @param response The full response text + * @param responseId Optional ID for continuation support + * @returns The first chunk and metadata if chunked + */ + async processResponse( + response: string, + responseId?: string, + ): Promise { + // If response is small enough, return as-is + if (this.estimateTokens(response) <= ChatResponseBuffer.MAX_TOKENS) { + return { content: response }; + } + + // Generate responseId if not provided + const id = responseId || randomUUID(); + + // Split response intelligently + const chunks = this.splitResponse(response); + this.responses.set(id, chunks); + + return { + content: chunks[0], + metadata: { + chunkIndex: 0, + totalChunks: chunks.length, + responseId: id, + hasMore: chunks.length > 1, + }, + }; + } + + /** + * Get a specific chunk from a previously chunked response. + * + * @param responseId The response ID + * @param chunkIndex The chunk index to retrieve + * @returns The requested chunk and metadata + */ + getChunk(responseId: string, chunkIndex: number): ChunkedResponse | null { + const chunks = this.responses.get(responseId); + if (!chunks || chunkIndex >= chunks.length || chunkIndex < 0) { + return null; + } + + return { + content: chunks[chunkIndex], + metadata: { + chunkIndex, + totalChunks: chunks.length, + responseId, + hasMore: chunkIndex < chunks.length - 1, + }, + }; + } + + /** + * Split a response into chunks at natural boundaries. + * + * @param response The full response text + * @returns Array of chunks + */ + private splitResponse(response: string): string[] { + const chunks: string[] = []; + let currentChunk = ''; + let currentTokens = 0; + + // First try to split by double newlines (paragraphs) + const paragraphs = response.split('\n\n'); + + for (const paragraph of paragraphs) { + const paragraphTokens = this.estimateTokens(paragraph); + + if (currentTokens + paragraphTokens > ChatResponseBuffer.MAX_TOKENS) { + if (currentChunk) { + chunks.push(currentChunk.trim()); + currentChunk = paragraph; + currentTokens = paragraphTokens; + } else { + // Single paragraph exceeds limit, split by sentences + chunks.push(...this.splitLargeParagraph(paragraph)); + } + } else { + currentChunk += (currentChunk ? '\n\n' : '') + paragraph; + currentTokens += paragraphTokens; + } + } + + if (currentChunk) { + chunks.push(currentChunk.trim()); + } + + return chunks; + } + + /** + * Split a large paragraph by sentences. + * + * @param paragraph The paragraph to split + * @returns Array of chunks + */ + private splitLargeParagraph(paragraph: string): string[] { + const chunks: string[] = []; + let currentChunk = ''; + let currentTokens = 0; + + // Split by sentence endings (. ! ?) + const sentences = paragraph.match(/[^.!?]+[.!?]+/g) || [paragraph]; + + for (const sentence of sentences) { + const sentenceTokens = this.estimateTokens(sentence); + + if (currentTokens + sentenceTokens > ChatResponseBuffer.MAX_TOKENS) { + if (currentChunk) { + chunks.push(currentChunk.trim()); + currentChunk = sentence; + currentTokens = sentenceTokens; + } else { + // Single sentence exceeds limit, force split + chunks.push(...this.forceSplit(sentence)); + } + } else { + currentChunk += (currentChunk ? ' ' : '') + sentence; + currentTokens += sentenceTokens; + } + } + + if (currentChunk) { + chunks.push(currentChunk.trim()); + } + + return chunks; + } + + /** + * Force split text that can't be split naturally. + * + * @param text The text to force split + * @returns Array of chunks + */ + private forceSplit(text: string): string[] { + const maxChars = ChatResponseBuffer.MAX_TOKENS * ChatResponseBuffer.CHARS_PER_TOKEN; + const chunks: string[] = []; + + for (let i = 0; i < text.length; i += maxChars) { + chunks.push(text.slice(i, i + maxChars)); + } + + return chunks; + } + + /** + * Estimate the number of tokens in a text. + * + * @param text The text to estimate + * @returns Estimated token count + */ + private estimateTokens(text: string): number { + // Rough estimation: 1 token ≈ 4 characters + return Math.ceil(text.length / ChatResponseBuffer.CHARS_PER_TOKEN); + } + + /** + * Clean up stored chunks after a certain time. + * + * @param responseId The response ID to clean up + */ + cleanup(responseId: string): void { + this.responses.delete(responseId); + } +} + +// Export singleton instance +export const chatResponseBuffer = new ChatResponseBuffer(); \ No newline at end of file diff --git a/packages/local-mcp-server/src/tools/chat.ts b/packages/local-mcp-server/src/tools/chat.ts index d75ab337..182461c4 100644 --- a/packages/local-mcp-server/src/tools/chat.ts +++ b/packages/local-mcp-server/src/tools/chat.ts @@ -3,9 +3,49 @@ import { getClient } from '../common/client.js'; import { ChatRequest, ChatRequest$inboundSchema as ChatRequestSchema, + ChatResponse, + ChatMessage, + ChatMessageFragment, + ChatMessageCitation, MessageType, } from '@gleanwork/api-client/models/components'; import { Author } from '@gleanwork/api-client/models/components'; +import { chatResponseBuffer, ChatChunkMetadata } from './chat-response-buffer.js'; + +/** + * Extended ChatResponse with chunking metadata + */ +interface ChunkedChatResponse extends ChatResponse { + _formatted?: string; + _chunkMetadata?: ChatChunkMetadata; +} + +/** + * Chat chunk for continuation responses + */ +interface ChatChunk { + content: string; + metadata: ChatChunkMetadata; +} + +/** + * Union type for formattable responses + */ +type FormattableResponse = ChunkedChatResponse | ChatChunk; + +/** + * Type guard to check if response is a ChunkedChatResponse + */ +function isChunkedChatResponse(response: FormattableResponse): response is ChunkedChatResponse { + return 'messages' in response; +} + +/** + * Type guard to check if response is a ChatChunk + */ +function isChatChunk(response: FormattableResponse): response is ChatChunk { + return 'content' in response && 'metadata' in response; +} /** * Simplified schema for Glean chat requests designed for LLM interaction @@ -21,6 +61,14 @@ export const ToolChatSchema = z.object({ 'Optional previous messages for context. Will be included in order before the current message.', ) .optional(), + + continueFrom: z + .object({ + responseId: z.string(), + chunkIndex: z.number(), + }) + .describe('Continue from a previous chunked response') + .optional(), }); export type ToolChatRequest = z.infer; @@ -59,15 +107,44 @@ function convertToAPIChatRequest(input: ToolChatRequest) { * Initiates or continues a chat conversation with Glean's AI. * * @param params The chat parameters using the simplified schema - * @returns The chat response + * @returns The chat response with automatic chunking if needed * @throws If the chat request fails */ -export async function chat(params: ToolChatRequest) { +export async function chat(params: ToolChatRequest): Promise { + // Handle continuation requests + if (params.continueFrom) { + const chunk = chatResponseBuffer.getChunk( + params.continueFrom.responseId, + params.continueFrom.chunkIndex + ); + + if (!chunk) { + throw new Error('Invalid continuation request: chunk not found'); + } + + // The chunk from buffer already matches ChatChunk interface + return chunk as ChatChunk; + } + + // Normal chat request const mappedParams = convertToAPIChatRequest(params); const parsedParams = ChatRequestSchema.parse(mappedParams); const client = await getClient(); - return await client.chat.create(parsedParams); + const response = await client.chat.create(parsedParams); + + // Format and chunk the response if needed + const formattedResponse = formatResponse(response); + const chunked = await chatResponseBuffer.processResponse(formattedResponse); + + // Return the response with chunk metadata if applicable + const result: ChunkedChatResponse = { + ...response, + _formatted: chunked.content, + _chunkMetadata: chunked.metadata, + }; + + return result; } /** @@ -76,7 +153,7 @@ export async function chat(params: ToolChatRequest) { * @param chatResponse The raw chat response from Glean API * @returns Formatted chat response as text */ -export function formatResponse(chatResponse: any): string { +export function formatResponse(chatResponse: ChatResponse): string { if ( !chatResponse || !chatResponse.messages || @@ -87,14 +164,14 @@ export function formatResponse(chatResponse: any): string { } const formattedMessages = chatResponse.messages - .map((message: any) => { + .map((message: ChatMessage) => { const author = message.author || 'Unknown'; let messageText = ''; if (message.fragments && Array.isArray(message.fragments)) { messageText = message.fragments - .map((fragment: any) => { + .map((fragment: ChatMessageFragment) => { if (fragment.text) { return fragment.text; } else if (fragment.querySuggestion) { @@ -104,7 +181,7 @@ export function formatResponse(chatResponse: any): string { Array.isArray(fragment.structuredResults) ) { return fragment.structuredResults - .map((result: any) => { + .map((result) => { if (result.document) { const doc = result.document; @@ -134,7 +211,7 @@ export function formatResponse(chatResponse: any): string { citationsText = '\n\nSources:\n' + message.citations - .map((citation: any, index: number) => { + .map((citation: ChatMessageCitation, index: number) => { const sourceDoc = citation.sourceDocument || {}; const title = sourceDoc.title || 'Unknown source'; const url = sourceDoc.url || ''; @@ -146,7 +223,7 @@ export function formatResponse(chatResponse: any): string { const messageType = message.messageType ? ` (${message.messageType})` : ''; - const stepId = message.stepId ? ` [Step: ${message.stepId}]` : ''; + const stepId = (message as any).stepId ? ` [Step: ${(message as any).stepId}]` : ''; return `${author}${messageType}${stepId}: ${messageText}${citationsText}`; }) @@ -154,3 +231,47 @@ export function formatResponse(chatResponse: any): string { return formattedMessages; } + +/** + * Formats a chunked response for display, including metadata about chunks. + * + * @param response The response object with potential chunk metadata + * @returns Formatted response with chunk information if applicable + */ +export function formatChunkedResponse(response: FormattableResponse): string { + // Handle continuation chunks + if (isChatChunk(response)) { + const { chunkIndex, totalChunks, hasMore } = response.metadata; + let result = response.content; + + if (hasMore) { + result += `\n\n---\n[Chunk ${chunkIndex + 1} of ${totalChunks}] `; + result += `To continue, use continueFrom: { responseId: "${response.metadata.responseId}", chunkIndex: ${chunkIndex + 1} }`; + } + + return result; + } + + // Handle initial chunked response + if (isChunkedChatResponse(response)) { + if (response._formatted) { + let result = response._formatted; + + if (response._chunkMetadata) { + const { totalChunks, hasMore, responseId } = response._chunkMetadata; + if (hasMore) { + result += `\n\n---\n[Chunk 1 of ${totalChunks}] `; + result += `To continue, use continueFrom: { responseId: "${responseId}", chunkIndex: 1 }`; + } + } + + return result; + } + + // Fall back to standard formatting + return formatResponse(response); + } + + // This should never happen with proper types + throw new Error('Unknown response type'); +} diff --git a/packages/local-mcp-server/src/tools/people_profile_search.ts b/packages/local-mcp-server/src/tools/people_profile_search.ts index bce1185e..9abd1c1a 100644 --- a/packages/local-mcp-server/src/tools/people_profile_search.ts +++ b/packages/local-mcp-server/src/tools/people_profile_search.ts @@ -59,6 +59,11 @@ export const ToolPeopleProfileSearchSchema = z 'Hint to the server for how many people to return (1-100, default 10).', ) .optional(), + + cursor: z + .string() + .describe('Pagination cursor from previous response to fetch next page') + .optional(), }) .refine( (val) => val.query || (val.filters && Object.keys(val.filters).length > 0), @@ -79,7 +84,7 @@ export type ToolPeopleProfileSearchRequest = z.infer< * @returns The Glean API compatible request */ function convertToAPIEntitiesRequest(input: ToolPeopleProfileSearchRequest) { - const { query, filters = {}, pageSize } = input; + const { query, filters = {}, pageSize, cursor } = input; const request: ListEntitiesRequest = { entityType: ListEntitiesRequestEntityType.People, @@ -90,6 +95,11 @@ function convertToAPIEntitiesRequest(input: ToolPeopleProfileSearchRequest) { request.query = query; } + // Add pagination cursor if provided + if (cursor) { + request.cursor = cursor; + } + const filterKeys = Object.keys(filters) as Array; if (filterKeys.length > 0) { request.filter = filterKeys.map((fieldName) => { @@ -175,6 +185,18 @@ export function formatResponse(searchResults: any): string { typeof searchResults.totalCount === 'number' ? searchResults.totalCount : searchResults.results.length; + const resultsShown = searchResults.results.length; + + // Add pagination info to response + let paginationInfo = ''; + if (searchResults.hasMoreResults) { + paginationInfo = '\n\n---\nMore results available. '; + if (searchResults.cursor) { + paginationInfo += `Use cursor="${searchResults.cursor}" to fetch the next page.`; + } else { + paginationInfo += 'Additional pages may be available.'; + } + } - return `Found ${total} people:\n\n${formatted}`; + return `Found ${resultsShown} of ${total} people:\n\n${formatted}${paginationInfo}`; } diff --git a/packages/local-mcp-server/src/tools/search.ts b/packages/local-mcp-server/src/tools/search.ts index fa0d6826..a3dce1c2 100644 --- a/packages/local-mcp-server/src/tools/search.ts +++ b/packages/local-mcp-server/src/tools/search.ts @@ -29,6 +29,19 @@ export const ToolSearchSchema = z.object({ 'Optional list of data sources to search in. Examples: "github", "gdrive", "confluence", "jira".', ) .optional(), + + pageSize: z + .number() + .min(1) + .max(100) + .default(10) + .describe('Number of results to return per page (default: 10, max: 100)') + .optional(), + + cursor: z + .string() + .describe('Pagination cursor from previous response to fetch next page') + .optional(), }); export type ToolSearchRequest = z.infer; @@ -40,13 +53,18 @@ export type ToolSearchRequest = z.infer; * @returns Glean API compatible search request */ function convertToAPISearchRequest(input: ToolSearchRequest) { - const { query, datasources } = input; + const { query, datasources, pageSize, cursor } = input; const searchRequest: SearchRequest = { query, - pageSize: 10, + pageSize: pageSize || 10, }; + // Add pagination cursor if provided + if (cursor) { + searchRequest.cursor = cursor; + } + if (datasources && datasources.length > 0) { searchRequest.requestOptions = { datasourcesFilter: datasources, @@ -120,6 +138,18 @@ export function formatResponse(searchResults: any): string { const totalResults = searchResults.totalResults || searchResults.results.length; const query = searchResults.metadata.searchedQuery || 'your query'; + const resultsShown = searchResults.results.length; + + // Add pagination info to response + let paginationInfo = ''; + if (searchResults.hasMoreResults) { + paginationInfo = '\n\n---\nMore results available. '; + if (searchResults.cursor) { + paginationInfo += `Use cursor="${searchResults.cursor}" to fetch the next page.`; + } else { + paginationInfo += 'Additional pages may be available.'; + } + } - return `Search results for "${query}" (${totalResults} results):\n\n${formattedResults}`; + return `Search results for "${query}" (showing ${resultsShown} of ${totalResults} results):\n\n${formattedResults}${paginationInfo}`; } diff --git a/packages/mcp-server-utils/src/config/index.ts b/packages/mcp-server-utils/src/config/index.ts index 47e32c30..15e5c152 100644 --- a/packages/mcp-server-utils/src/config/index.ts +++ b/packages/mcp-server-utils/src/config/index.ts @@ -135,7 +135,7 @@ export async function getConfig( function getLocalConfig(): GleanConfig { const instance = process.env.GLEAN_INSTANCE || process.env.GLEAN_SUBDOMAIN; - const baseUrl = process.env.GLEAN_BASE_URL; + const baseUrl = process.env.GLEAN_BASE_URL || process.env.GLEAN_SERVER_URL; const token = process.env.GLEAN_API_TOKEN; const actAs = process.env.GLEAN_ACT_AS; const issuer = process.env.GLEAN_OAUTH_ISSUER;