diff --git a/docs/docs/tutorials/sdk/sessions.md b/docs/docs/tutorials/sdk/sessions.md index d4e1fc434..1272c713e 100644 --- a/docs/docs/tutorials/sdk/sessions.md +++ b/docs/docs/tutorials/sdk/sessions.md @@ -153,6 +153,41 @@ await agent.deleteSession('demo'); // Session no longer exists ``` +## Compacting Sessions + +Sessions are Dexto's working memory layer. When a transcript gets large, you can +compact older turns into a persisted compaction artifact and then decide how +to continue: + +```typescript +const compaction = await agent.compactSession({ + sessionId: session.id, + mode: 'continue-in-child', + trigger: 'manual', +}); + +const nextSessionId = compaction?.targetSessionId ?? session.id; +``` + +`compactSession()` supports three modes: + +- `artifact-only`: create the artifact without changing the active session +- `continue-in-place`: compact the current session and keep using the same ID +- `continue-in-child`: create a new child session seeded with the compacted continuation + +You can also resolve a stored artifact later: + +```typescript +if (compaction) { + const artifact = await agent.getSessionCompaction(compaction.id); + console.log(artifact?.summaryMessages); +} +``` + +This is still session lifecycle and working-memory management. Long-term memory +systems, semantic extraction, and user preference storage should build on top of +these artifacts rather than replacing them. + ## When to Create Sessions **One session per conversation thread.** Here are common patterns: diff --git a/docs/static/openapi/openapi.json b/docs/static/openapi/openapi.json index dd36b7a5e..8a2854ac3 100644 --- a/docs/static/openapi/openapi.json +++ b/docs/static/openapi/openapi.json @@ -2,7 +2,7 @@ "openapi": "3.0.0", "info": { "title": "Dexto API", - "version": "1.6.14", + "version": "1.6.17", "description": "OpenAPI spec for the Dexto REST API server" }, "servers": [ @@ -8838,6 +8838,1997 @@ } } }, + "/api/sessions/{sessionId}/compact": { + "post": { + "summary": "Compact Session", + "description": "Generates a persisted compaction artifact for a session and can optionally apply it in place or seed a new child session.", + "tags": [ + "sessions" + ], + "parameters": [ + { + "schema": { + "type": "string", + "description": "Source session identifier" + }, + "required": true, + "description": "Source session identifier", + "name": "sessionId", + "in": "path" + } + ], + "requestBody": { + "required": false, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": [ + "artifact-only", + "continue-in-place", + "continue-in-child" + ], + "description": "Whether to persist the artifact only, update the current session in place, or seed a new child session" + }, + "childTitle": { + "type": "string", + "description": "Optional title for a continuation child session" + } + }, + "additionalProperties": false, + "description": "Request body for compacting a session" + } + } + } + }, + "responses": { + "200": { + "description": "Compaction result. Returns null when the session was too short or nothing needed compaction.", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "compaction": { + "type": "object", + "nullable": true, + "properties": { + "id": { + "type": "string", + "description": "Unique compaction artifact identifier" + }, + "sourceSessionId": { + "type": "string", + "description": "Source session identifier" + }, + "targetSessionId": { + "type": "string", + "nullable": true, + "description": "Target child session when continuation was applied into a new session" + }, + "createdAt": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "Creation timestamp (Unix ms)" + }, + "strategy": { + "type": "string", + "description": "Compaction strategy name used to produce the artifact" + }, + "mode": { + "type": "string", + "enum": [ + "artifact-only", + "continue-in-place", + "continue-in-child" + ], + "description": "How the compaction artifact should be applied" + }, + "trigger": { + "type": "string", + "enum": [ + "manual", + "api", + "scheduled", + "overflow" + ], + "description": "Why the compaction was triggered" + }, + "originalTokens": { + "type": "integer", + "minimum": 0, + "description": "Estimated tokens before compaction" + }, + "compactedTokens": { + "type": "integer", + "minimum": 0, + "description": "Estimated tokens after compaction" + }, + "originalMessages": { + "type": "integer", + "minimum": 0, + "description": "Prepared message count before compaction" + }, + "compactedMessages": { + "type": "integer", + "minimum": 0, + "description": "Prepared message count after compaction" + }, + "summaryMessages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid", + "description": "Unique message identifier (UUID)" + }, + "role": { + "type": "string", + "enum": [ + "system", + "user", + "assistant", + "tool" + ], + "description": "Role of the message sender" + }, + "timestamp": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "Creation timestamp (Unix ms)" + }, + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "nullable": true + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ], + "description": "Part type: text" + }, + "text": { + "type": "string", + "description": "Text content" + } + }, + "required": [ + "type", + "text" + ], + "additionalProperties": false, + "description": "Text content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "image" + ], + "description": "Part type: image" + }, + "image": { + "type": "string", + "description": "Base64-encoded image data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the image" + } + }, + "required": [ + "type", + "image" + ], + "additionalProperties": false, + "description": "Image content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "file" + ], + "description": "Part type: file" + }, + "data": { + "type": "string", + "description": "Base64-encoded file data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the file" + }, + "filename": { + "type": "string", + "description": "Optional filename" + } + }, + "required": [ + "type", + "data", + "mimeType" + ], + "additionalProperties": false, + "description": "File content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ui-resource" + ], + "description": "Part type: ui-resource" + }, + "uri": { + "type": "string", + "description": "URI identifying the UI resource (must start with ui://)" + }, + "mimeType": { + "type": "string", + "description": "MIME type: text/html, text/uri-list, or application/vnd.mcp-ui.remote-dom" + }, + "content": { + "type": "string", + "description": "Inline HTML content or URL" + }, + "blob": { + "type": "string", + "description": "Base64-encoded content (alternative to content)" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Display title for the UI resource" + }, + "preferredSize": { + "type": "object", + "properties": { + "width": { + "type": "number", + "description": "Preferred width in pixels" + }, + "height": { + "type": "number", + "description": "Preferred height in pixels" + } + }, + "required": [ + "width", + "height" + ], + "additionalProperties": false, + "description": "Preferred rendering size" + } + }, + "additionalProperties": false, + "description": "Optional metadata for the UI resource" + } + }, + "required": [ + "type", + "uri", + "mimeType" + ], + "additionalProperties": false, + "description": "UI Resource content part for MCP-UI interactive components" + } + ], + "description": "Message content part (text, image, file, or UI resource)" + } + }, + { + "nullable": true + } + ], + "description": "Message content (string, null, or array of parts)" + }, + "reasoning": { + "type": "string", + "description": "Optional model reasoning text" + }, + "tokenUsage": { + "type": "object", + "properties": { + "inputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of input tokens" + }, + "outputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of output tokens" + }, + "reasoningTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of reasoning tokens" + }, + "cacheReadTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache read tokens" + }, + "cacheWriteTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache write tokens" + }, + "totalTokens": { + "type": "integer", + "minimum": 0, + "description": "Total tokens used" + } + }, + "additionalProperties": false, + "description": "Optional token usage accounting" + }, + "estimatedCost": { + "type": "number", + "minimum": 0, + "description": "Estimated cost in USD for this response" + }, + "pricingStatus": { + "type": "string", + "enum": [ + "estimated", + "unpriced" + ], + "description": "Whether pricing was resolved for this response" + }, + "usageScopeId": { + "type": "string", + "description": "Optional usage scope identifier for runtime-scoped metering" + }, + "model": { + "type": "string", + "description": "Model identifier for assistant messages" + }, + "provider": { + "type": "string", + "enum": [ + "openai", + "openai-compatible", + "anthropic", + "google", + "groq", + "xai", + "cohere", + "minimax", + "glm", + "openrouter", + "litellm", + "glama", + "vertex", + "bedrock", + "local", + "ollama", + "dexto-nova" + ], + "description": "Provider identifier for assistant messages" + }, + "toolCalls": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for this tool call" + }, + "type": { + "type": "string", + "enum": [ + "function" + ], + "description": "Tool call type (currently only function is supported)" + }, + "function": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the function to call" + }, + "arguments": { + "type": "string", + "description": "Arguments for the function in JSON string format" + } + }, + "required": [ + "name", + "arguments" + ], + "additionalProperties": false, + "description": "Function call details" + } + }, + "required": [ + "id", + "type", + "function" + ], + "additionalProperties": false, + "description": "Tool call made by the assistant" + }, + "description": "Tool calls made by the assistant" + }, + "toolCallId": { + "type": "string", + "description": "ID of the tool call this message responds to" + }, + "name": { + "type": "string", + "description": "Name of the tool that produced this result" + }, + "success": { + "type": "boolean", + "description": "Whether tool execution succeeded (present for role=tool messages)" + }, + "metadata": { + "type": "object", + "properties": { + "isSummary": { + "type": "boolean", + "description": "Whether this message marks a compaction summary boundary" + }, + "isSessionSummary": { + "type": "boolean", + "description": "Whether this message marks a session-level summary boundary" + }, + "isRecompaction": { + "type": "boolean", + "description": "Whether this summary was produced from already-compacted history" + }, + "originalMessageCount": { + "type": "integer", + "minimum": 0, + "description": "How many original messages were summarized by this boundary" + }, + "preservedMessageIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Stable message IDs preserved in the continuation window when this summary supersedes older working memory" + } + }, + "additionalProperties": { + "nullable": true + }, + "description": "Optional message metadata" + } + }, + "required": [ + "role", + "content" + ], + "additionalProperties": false, + "description": "Internal message representation" + }, + "description": "Generated summary messages returned by the compaction strategy" + }, + "continuationMessages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid", + "description": "Unique message identifier (UUID)" + }, + "role": { + "type": "string", + "enum": [ + "system", + "user", + "assistant", + "tool" + ], + "description": "Role of the message sender" + }, + "timestamp": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "Creation timestamp (Unix ms)" + }, + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "nullable": true + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ], + "description": "Part type: text" + }, + "text": { + "type": "string", + "description": "Text content" + } + }, + "required": [ + "type", + "text" + ], + "additionalProperties": false, + "description": "Text content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "image" + ], + "description": "Part type: image" + }, + "image": { + "type": "string", + "description": "Base64-encoded image data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the image" + } + }, + "required": [ + "type", + "image" + ], + "additionalProperties": false, + "description": "Image content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "file" + ], + "description": "Part type: file" + }, + "data": { + "type": "string", + "description": "Base64-encoded file data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the file" + }, + "filename": { + "type": "string", + "description": "Optional filename" + } + }, + "required": [ + "type", + "data", + "mimeType" + ], + "additionalProperties": false, + "description": "File content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ui-resource" + ], + "description": "Part type: ui-resource" + }, + "uri": { + "type": "string", + "description": "URI identifying the UI resource (must start with ui://)" + }, + "mimeType": { + "type": "string", + "description": "MIME type: text/html, text/uri-list, or application/vnd.mcp-ui.remote-dom" + }, + "content": { + "type": "string", + "description": "Inline HTML content or URL" + }, + "blob": { + "type": "string", + "description": "Base64-encoded content (alternative to content)" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Display title for the UI resource" + }, + "preferredSize": { + "type": "object", + "properties": { + "width": { + "type": "number", + "description": "Preferred width in pixels" + }, + "height": { + "type": "number", + "description": "Preferred height in pixels" + } + }, + "required": [ + "width", + "height" + ], + "additionalProperties": false, + "description": "Preferred rendering size" + } + }, + "additionalProperties": false, + "description": "Optional metadata for the UI resource" + } + }, + "required": [ + "type", + "uri", + "mimeType" + ], + "additionalProperties": false, + "description": "UI Resource content part for MCP-UI interactive components" + } + ], + "description": "Message content part (text, image, file, or UI resource)" + } + }, + { + "nullable": true + } + ], + "description": "Message content (string, null, or array of parts)" + }, + "reasoning": { + "type": "string", + "description": "Optional model reasoning text" + }, + "tokenUsage": { + "type": "object", + "properties": { + "inputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of input tokens" + }, + "outputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of output tokens" + }, + "reasoningTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of reasoning tokens" + }, + "cacheReadTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache read tokens" + }, + "cacheWriteTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache write tokens" + }, + "totalTokens": { + "type": "integer", + "minimum": 0, + "description": "Total tokens used" + } + }, + "additionalProperties": false, + "description": "Optional token usage accounting" + }, + "estimatedCost": { + "type": "number", + "minimum": 0, + "description": "Estimated cost in USD for this response" + }, + "pricingStatus": { + "type": "string", + "enum": [ + "estimated", + "unpriced" + ], + "description": "Whether pricing was resolved for this response" + }, + "usageScopeId": { + "type": "string", + "description": "Optional usage scope identifier for runtime-scoped metering" + }, + "model": { + "type": "string", + "description": "Model identifier for assistant messages" + }, + "provider": { + "type": "string", + "enum": [ + "openai", + "openai-compatible", + "anthropic", + "google", + "groq", + "xai", + "cohere", + "minimax", + "glm", + "openrouter", + "litellm", + "glama", + "vertex", + "bedrock", + "local", + "ollama", + "dexto-nova" + ], + "description": "Provider identifier for assistant messages" + }, + "toolCalls": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for this tool call" + }, + "type": { + "type": "string", + "enum": [ + "function" + ], + "description": "Tool call type (currently only function is supported)" + }, + "function": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the function to call" + }, + "arguments": { + "type": "string", + "description": "Arguments for the function in JSON string format" + } + }, + "required": [ + "name", + "arguments" + ], + "additionalProperties": false, + "description": "Function call details" + } + }, + "required": [ + "id", + "type", + "function" + ], + "additionalProperties": false, + "description": "Tool call made by the assistant" + }, + "description": "Tool calls made by the assistant" + }, + "toolCallId": { + "type": "string", + "description": "ID of the tool call this message responds to" + }, + "name": { + "type": "string", + "description": "Name of the tool that produced this result" + }, + "success": { + "type": "boolean", + "description": "Whether tool execution succeeded (present for role=tool messages)" + }, + "metadata": { + "type": "object", + "properties": { + "isSummary": { + "type": "boolean", + "description": "Whether this message marks a compaction summary boundary" + }, + "isSessionSummary": { + "type": "boolean", + "description": "Whether this message marks a session-level summary boundary" + }, + "isRecompaction": { + "type": "boolean", + "description": "Whether this summary was produced from already-compacted history" + }, + "originalMessageCount": { + "type": "integer", + "minimum": 0, + "description": "How many original messages were summarized by this boundary" + }, + "preservedMessageIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Stable message IDs preserved in the continuation window when this summary supersedes older working memory" + } + }, + "additionalProperties": { + "nullable": true + }, + "description": "Optional message metadata" + } + }, + "required": [ + "role", + "content" + ], + "additionalProperties": false, + "description": "Internal message representation" + }, + "description": "Messages that can seed a continued session after compaction" + } + }, + "required": [ + "id", + "sourceSessionId", + "createdAt", + "strategy", + "mode", + "trigger", + "originalTokens", + "compactedTokens", + "originalMessages", + "compactedMessages", + "summaryMessages", + "continuationMessages" + ], + "additionalProperties": false, + "description": "Persisted session compaction artifact" + } + }, + "required": [ + "compaction" + ], + "additionalProperties": false + } + } + } + }, + "400": { + "description": "Invalid compaction request", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Error code" + }, + "message": { + "type": "string", + "description": "Error message" + }, + "scope": { + "type": "string", + "description": "Error scope" + }, + "type": { + "type": "string", + "description": "Error type" + }, + "context": { + "nullable": true, + "description": "Error context" + }, + "recovery": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Recovery guidance" + }, + "traceId": { + "type": "string", + "description": "Trace identifier" + }, + "endpoint": { + "type": "string", + "description": "Request endpoint" + }, + "method": { + "type": "string", + "description": "HTTP method" + } + }, + "required": [ + "code", + "message", + "scope", + "type", + "traceId", + "endpoint", + "method" + ], + "additionalProperties": false, + "description": "Standard API error envelope" + } + } + } + }, + "404": { + "description": "Source session not found", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Error code" + }, + "message": { + "type": "string", + "description": "Error message" + }, + "scope": { + "type": "string", + "description": "Error scope" + }, + "type": { + "type": "string", + "description": "Error type" + }, + "context": { + "nullable": true, + "description": "Error context" + }, + "recovery": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Recovery guidance" + }, + "traceId": { + "type": "string", + "description": "Trace identifier" + }, + "endpoint": { + "type": "string", + "description": "Request endpoint" + }, + "method": { + "type": "string", + "description": "HTTP method" + } + }, + "required": [ + "code", + "message", + "scope", + "type", + "traceId", + "endpoint", + "method" + ], + "additionalProperties": false, + "description": "Standard API error envelope" + } + } + } + } + } + } + }, + "/api/sessions/compactions/{compactionId}": { + "get": { + "summary": "Get Session Compaction", + "description": "Retrieves a previously persisted session compaction artifact.", + "tags": [ + "sessions" + ], + "parameters": [ + { + "schema": { + "type": "string", + "description": "Compaction artifact identifier" + }, + "required": true, + "description": "Compaction artifact identifier", + "name": "compactionId", + "in": "path" + } + ], + "responses": { + "200": { + "description": "Persisted session compaction artifact", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "compaction": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique compaction artifact identifier" + }, + "sourceSessionId": { + "type": "string", + "description": "Source session identifier" + }, + "targetSessionId": { + "type": "string", + "nullable": true, + "description": "Target child session when continuation was applied into a new session" + }, + "createdAt": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "Creation timestamp (Unix ms)" + }, + "strategy": { + "type": "string", + "description": "Compaction strategy name used to produce the artifact" + }, + "mode": { + "type": "string", + "enum": [ + "artifact-only", + "continue-in-place", + "continue-in-child" + ], + "description": "How the compaction artifact should be applied" + }, + "trigger": { + "type": "string", + "enum": [ + "manual", + "api", + "scheduled", + "overflow" + ], + "description": "Why the compaction was triggered" + }, + "originalTokens": { + "type": "integer", + "minimum": 0, + "description": "Estimated tokens before compaction" + }, + "compactedTokens": { + "type": "integer", + "minimum": 0, + "description": "Estimated tokens after compaction" + }, + "originalMessages": { + "type": "integer", + "minimum": 0, + "description": "Prepared message count before compaction" + }, + "compactedMessages": { + "type": "integer", + "minimum": 0, + "description": "Prepared message count after compaction" + }, + "summaryMessages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid", + "description": "Unique message identifier (UUID)" + }, + "role": { + "type": "string", + "enum": [ + "system", + "user", + "assistant", + "tool" + ], + "description": "Role of the message sender" + }, + "timestamp": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "Creation timestamp (Unix ms)" + }, + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "nullable": true + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ], + "description": "Part type: text" + }, + "text": { + "type": "string", + "description": "Text content" + } + }, + "required": [ + "type", + "text" + ], + "additionalProperties": false, + "description": "Text content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "image" + ], + "description": "Part type: image" + }, + "image": { + "type": "string", + "description": "Base64-encoded image data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the image" + } + }, + "required": [ + "type", + "image" + ], + "additionalProperties": false, + "description": "Image content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "file" + ], + "description": "Part type: file" + }, + "data": { + "type": "string", + "description": "Base64-encoded file data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the file" + }, + "filename": { + "type": "string", + "description": "Optional filename" + } + }, + "required": [ + "type", + "data", + "mimeType" + ], + "additionalProperties": false, + "description": "File content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ui-resource" + ], + "description": "Part type: ui-resource" + }, + "uri": { + "type": "string", + "description": "URI identifying the UI resource (must start with ui://)" + }, + "mimeType": { + "type": "string", + "description": "MIME type: text/html, text/uri-list, or application/vnd.mcp-ui.remote-dom" + }, + "content": { + "type": "string", + "description": "Inline HTML content or URL" + }, + "blob": { + "type": "string", + "description": "Base64-encoded content (alternative to content)" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Display title for the UI resource" + }, + "preferredSize": { + "type": "object", + "properties": { + "width": { + "type": "number", + "description": "Preferred width in pixels" + }, + "height": { + "type": "number", + "description": "Preferred height in pixels" + } + }, + "required": [ + "width", + "height" + ], + "additionalProperties": false, + "description": "Preferred rendering size" + } + }, + "additionalProperties": false, + "description": "Optional metadata for the UI resource" + } + }, + "required": [ + "type", + "uri", + "mimeType" + ], + "additionalProperties": false, + "description": "UI Resource content part for MCP-UI interactive components" + } + ], + "description": "Message content part (text, image, file, or UI resource)" + } + }, + { + "nullable": true + } + ], + "description": "Message content (string, null, or array of parts)" + }, + "reasoning": { + "type": "string", + "description": "Optional model reasoning text" + }, + "tokenUsage": { + "type": "object", + "properties": { + "inputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of input tokens" + }, + "outputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of output tokens" + }, + "reasoningTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of reasoning tokens" + }, + "cacheReadTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache read tokens" + }, + "cacheWriteTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache write tokens" + }, + "totalTokens": { + "type": "integer", + "minimum": 0, + "description": "Total tokens used" + } + }, + "additionalProperties": false, + "description": "Optional token usage accounting" + }, + "estimatedCost": { + "type": "number", + "minimum": 0, + "description": "Estimated cost in USD for this response" + }, + "pricingStatus": { + "type": "string", + "enum": [ + "estimated", + "unpriced" + ], + "description": "Whether pricing was resolved for this response" + }, + "usageScopeId": { + "type": "string", + "description": "Optional usage scope identifier for runtime-scoped metering" + }, + "model": { + "type": "string", + "description": "Model identifier for assistant messages" + }, + "provider": { + "type": "string", + "enum": [ + "openai", + "openai-compatible", + "anthropic", + "google", + "groq", + "xai", + "cohere", + "minimax", + "glm", + "openrouter", + "litellm", + "glama", + "vertex", + "bedrock", + "local", + "ollama", + "dexto-nova" + ], + "description": "Provider identifier for assistant messages" + }, + "toolCalls": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for this tool call" + }, + "type": { + "type": "string", + "enum": [ + "function" + ], + "description": "Tool call type (currently only function is supported)" + }, + "function": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the function to call" + }, + "arguments": { + "type": "string", + "description": "Arguments for the function in JSON string format" + } + }, + "required": [ + "name", + "arguments" + ], + "additionalProperties": false, + "description": "Function call details" + } + }, + "required": [ + "id", + "type", + "function" + ], + "additionalProperties": false, + "description": "Tool call made by the assistant" + }, + "description": "Tool calls made by the assistant" + }, + "toolCallId": { + "type": "string", + "description": "ID of the tool call this message responds to" + }, + "name": { + "type": "string", + "description": "Name of the tool that produced this result" + }, + "success": { + "type": "boolean", + "description": "Whether tool execution succeeded (present for role=tool messages)" + }, + "metadata": { + "type": "object", + "properties": { + "isSummary": { + "type": "boolean", + "description": "Whether this message marks a compaction summary boundary" + }, + "isSessionSummary": { + "type": "boolean", + "description": "Whether this message marks a session-level summary boundary" + }, + "isRecompaction": { + "type": "boolean", + "description": "Whether this summary was produced from already-compacted history" + }, + "originalMessageCount": { + "type": "integer", + "minimum": 0, + "description": "How many original messages were summarized by this boundary" + }, + "preservedMessageIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Stable message IDs preserved in the continuation window when this summary supersedes older working memory" + } + }, + "additionalProperties": { + "nullable": true + }, + "description": "Optional message metadata" + } + }, + "required": [ + "role", + "content" + ], + "additionalProperties": false, + "description": "Internal message representation" + }, + "description": "Generated summary messages returned by the compaction strategy" + }, + "continuationMessages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid", + "description": "Unique message identifier (UUID)" + }, + "role": { + "type": "string", + "enum": [ + "system", + "user", + "assistant", + "tool" + ], + "description": "Role of the message sender" + }, + "timestamp": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "Creation timestamp (Unix ms)" + }, + "content": { + "anyOf": [ + { + "type": "string" + }, + { + "nullable": true + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ], + "description": "Part type: text" + }, + "text": { + "type": "string", + "description": "Text content" + } + }, + "required": [ + "type", + "text" + ], + "additionalProperties": false, + "description": "Text content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "image" + ], + "description": "Part type: image" + }, + "image": { + "type": "string", + "description": "Base64-encoded image data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the image" + } + }, + "required": [ + "type", + "image" + ], + "additionalProperties": false, + "description": "Image content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "file" + ], + "description": "Part type: file" + }, + "data": { + "type": "string", + "description": "Base64-encoded file data" + }, + "mimeType": { + "type": "string", + "description": "MIME type of the file" + }, + "filename": { + "type": "string", + "description": "Optional filename" + } + }, + "required": [ + "type", + "data", + "mimeType" + ], + "additionalProperties": false, + "description": "File content part" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ui-resource" + ], + "description": "Part type: ui-resource" + }, + "uri": { + "type": "string", + "description": "URI identifying the UI resource (must start with ui://)" + }, + "mimeType": { + "type": "string", + "description": "MIME type: text/html, text/uri-list, or application/vnd.mcp-ui.remote-dom" + }, + "content": { + "type": "string", + "description": "Inline HTML content or URL" + }, + "blob": { + "type": "string", + "description": "Base64-encoded content (alternative to content)" + }, + "metadata": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Display title for the UI resource" + }, + "preferredSize": { + "type": "object", + "properties": { + "width": { + "type": "number", + "description": "Preferred width in pixels" + }, + "height": { + "type": "number", + "description": "Preferred height in pixels" + } + }, + "required": [ + "width", + "height" + ], + "additionalProperties": false, + "description": "Preferred rendering size" + } + }, + "additionalProperties": false, + "description": "Optional metadata for the UI resource" + } + }, + "required": [ + "type", + "uri", + "mimeType" + ], + "additionalProperties": false, + "description": "UI Resource content part for MCP-UI interactive components" + } + ], + "description": "Message content part (text, image, file, or UI resource)" + } + }, + { + "nullable": true + } + ], + "description": "Message content (string, null, or array of parts)" + }, + "reasoning": { + "type": "string", + "description": "Optional model reasoning text" + }, + "tokenUsage": { + "type": "object", + "properties": { + "inputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of input tokens" + }, + "outputTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of output tokens" + }, + "reasoningTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of reasoning tokens" + }, + "cacheReadTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache read tokens" + }, + "cacheWriteTokens": { + "type": "integer", + "minimum": 0, + "description": "Number of cache write tokens" + }, + "totalTokens": { + "type": "integer", + "minimum": 0, + "description": "Total tokens used" + } + }, + "additionalProperties": false, + "description": "Optional token usage accounting" + }, + "estimatedCost": { + "type": "number", + "minimum": 0, + "description": "Estimated cost in USD for this response" + }, + "pricingStatus": { + "type": "string", + "enum": [ + "estimated", + "unpriced" + ], + "description": "Whether pricing was resolved for this response" + }, + "usageScopeId": { + "type": "string", + "description": "Optional usage scope identifier for runtime-scoped metering" + }, + "model": { + "type": "string", + "description": "Model identifier for assistant messages" + }, + "provider": { + "type": "string", + "enum": [ + "openai", + "openai-compatible", + "anthropic", + "google", + "groq", + "xai", + "cohere", + "minimax", + "glm", + "openrouter", + "litellm", + "glama", + "vertex", + "bedrock", + "local", + "ollama", + "dexto-nova" + ], + "description": "Provider identifier for assistant messages" + }, + "toolCalls": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for this tool call" + }, + "type": { + "type": "string", + "enum": [ + "function" + ], + "description": "Tool call type (currently only function is supported)" + }, + "function": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the function to call" + }, + "arguments": { + "type": "string", + "description": "Arguments for the function in JSON string format" + } + }, + "required": [ + "name", + "arguments" + ], + "additionalProperties": false, + "description": "Function call details" + } + }, + "required": [ + "id", + "type", + "function" + ], + "additionalProperties": false, + "description": "Tool call made by the assistant" + }, + "description": "Tool calls made by the assistant" + }, + "toolCallId": { + "type": "string", + "description": "ID of the tool call this message responds to" + }, + "name": { + "type": "string", + "description": "Name of the tool that produced this result" + }, + "success": { + "type": "boolean", + "description": "Whether tool execution succeeded (present for role=tool messages)" + }, + "metadata": { + "type": "object", + "properties": { + "isSummary": { + "type": "boolean", + "description": "Whether this message marks a compaction summary boundary" + }, + "isSessionSummary": { + "type": "boolean", + "description": "Whether this message marks a session-level summary boundary" + }, + "isRecompaction": { + "type": "boolean", + "description": "Whether this summary was produced from already-compacted history" + }, + "originalMessageCount": { + "type": "integer", + "minimum": 0, + "description": "How many original messages were summarized by this boundary" + }, + "preservedMessageIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Stable message IDs preserved in the continuation window when this summary supersedes older working memory" + } + }, + "additionalProperties": { + "nullable": true + }, + "description": "Optional message metadata" + } + }, + "required": [ + "role", + "content" + ], + "additionalProperties": false, + "description": "Internal message representation" + }, + "description": "Messages that can seed a continued session after compaction" + } + }, + "required": [ + "id", + "sourceSessionId", + "createdAt", + "strategy", + "mode", + "trigger", + "originalTokens", + "compactedTokens", + "originalMessages", + "compactedMessages", + "summaryMessages", + "continuationMessages" + ], + "additionalProperties": false, + "description": "Persisted session compaction artifact" + } + }, + "required": [ + "compaction" + ], + "additionalProperties": false + } + } + } + }, + "404": { + "description": "Compaction artifact not found", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Error code" + }, + "message": { + "type": "string", + "description": "Error message" + }, + "scope": { + "type": "string", + "description": "Error scope" + }, + "type": { + "type": "string", + "description": "Error type" + }, + "context": { + "nullable": true, + "description": "Error context" + }, + "recovery": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Recovery guidance" + }, + "traceId": { + "type": "string", + "description": "Trace identifier" + }, + "endpoint": { + "type": "string", + "description": "Request endpoint" + }, + "method": { + "type": "string", + "description": "HTTP method" + } + }, + "required": [ + "code", + "message", + "scope", + "type", + "traceId", + "endpoint", + "method" + ], + "additionalProperties": false, + "description": "Standard API error envelope" + } + } + } + } + } + } + }, "/api/sessions/{sessionId}": { "get": { "summary": "Get Session Details", @@ -9740,6 +11731,39 @@ "success": { "type": "boolean", "description": "Whether tool execution succeeded (present for role=tool messages)" + }, + "metadata": { + "type": "object", + "properties": { + "isSummary": { + "type": "boolean", + "description": "Whether this message marks a compaction summary boundary" + }, + "isSessionSummary": { + "type": "boolean", + "description": "Whether this message marks a session-level summary boundary" + }, + "isRecompaction": { + "type": "boolean", + "description": "Whether this summary was produced from already-compacted history" + }, + "originalMessageCount": { + "type": "integer", + "minimum": 0, + "description": "How many original messages were summarized by this boundary" + }, + "preservedMessageIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Stable message IDs preserved in the continuation window when this summary supersedes older working memory" + } + }, + "additionalProperties": { + "nullable": true + }, + "description": "Optional message metadata" } }, "required": [ @@ -10907,6 +12931,39 @@ "success": { "type": "boolean", "description": "Whether tool execution succeeded (present for role=tool messages)" + }, + "metadata": { + "type": "object", + "properties": { + "isSummary": { + "type": "boolean", + "description": "Whether this message marks a compaction summary boundary" + }, + "isSessionSummary": { + "type": "boolean", + "description": "Whether this message marks a session-level summary boundary" + }, + "isRecompaction": { + "type": "boolean", + "description": "Whether this summary was produced from already-compacted history" + }, + "originalMessageCount": { + "type": "integer", + "minimum": 0, + "description": "How many original messages were summarized by this boundary" + }, + "preservedMessageIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Stable message IDs preserved in the continuation window when this summary supersedes older working memory" + } + }, + "additionalProperties": { + "nullable": true + }, + "description": "Optional message metadata" } }, "required": [ @@ -11352,6 +13409,39 @@ "success": { "type": "boolean", "description": "Whether tool execution succeeded (present for role=tool messages)" + }, + "metadata": { + "type": "object", + "properties": { + "isSummary": { + "type": "boolean", + "description": "Whether this message marks a compaction summary boundary" + }, + "isSessionSummary": { + "type": "boolean", + "description": "Whether this message marks a session-level summary boundary" + }, + "isRecompaction": { + "type": "boolean", + "description": "Whether this summary was produced from already-compacted history" + }, + "originalMessageCount": { + "type": "integer", + "minimum": 0, + "description": "How many original messages were summarized by this boundary" + }, + "preservedMessageIds": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Stable message IDs preserved in the continuation window when this summary supersedes older working memory" + } + }, + "additionalProperties": { + "nullable": true + }, + "description": "Optional message metadata" } }, "required": [ diff --git a/packages/core/src/agent/DextoAgent.session-compaction.integration.test.ts b/packages/core/src/agent/DextoAgent.session-compaction.integration.test.ts new file mode 100644 index 000000000..7aa4c63ec --- /dev/null +++ b/packages/core/src/agent/DextoAgent.session-compaction.integration.test.ts @@ -0,0 +1,524 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { DextoAgent } from './DextoAgent.js'; +import type { AgentRuntimeSettings } from './runtime-config.js'; +import { SystemPromptConfigSchema } from '../systemPrompt/schemas.js'; +import { LLMConfigSchema } from '../llm/schemas.js'; +import { LoggerConfigSchema } from '../logger/index.js'; +import { SessionConfigSchema } from '../session/schemas.js'; +import { PermissionsConfigSchema, ElicitationConfigSchema } from '../tools/schemas.js'; +import { ResourcesConfigSchema } from '../resources/schemas.js'; +import { PromptsSchema } from '../prompts/schemas.js'; +import { createLogger } from '../logger/factory.js'; +import { ServersConfigSchema } from '../mcp/schemas.js'; +import { + createInMemoryBlobStore, + createInMemoryCache, + createInMemoryDatabase, +} from '../test-utils/in-memory-storage.js'; +import type { CompactionStrategy } from '../context/compaction/types.js'; +import { filterCompacted } from '../context/utils.js'; +import type { InternalMessage } from '../context/types.js'; + +const testCompactionStrategy: CompactionStrategy = { + name: 'test-session-compaction', + getSettings: () => ({ + enabled: true, + thresholdPercent: 0.9, + }), + getModelLimits: (modelContextWindow: number) => ({ + contextWindow: modelContextWindow, + }), + shouldCompact: () => false, + compact: async () => ({ + summaryMessages: [ + { + role: 'assistant', + content: [{ type: 'text', text: 'Compacted summary' }], + timestamp: Date.now(), + }, + ], + preserveFromWorkingIndex: 2, + }), +}; + +const testSettings: AgentRuntimeSettings = { + systemPrompt: SystemPromptConfigSchema.parse('You are a helpful assistant.'), + llm: LLMConfigSchema.parse({ + provider: 'openai', + model: 'gpt-5-mini', + apiKey: 'test-key-123', + }), + agentId: 'session-compaction-test-agent', + mcpServers: ServersConfigSchema.parse({}), + sessions: SessionConfigSchema.parse({ + maxSessions: 10, + sessionTTL: 1000, + }), + permissions: PermissionsConfigSchema.parse({ + mode: 'auto-approve', + timeout: 120000, + }), + elicitation: ElicitationConfigSchema.parse({ + enabled: false, + timeout: 120000, + }), + resources: ResourcesConfigSchema.parse([]), + prompts: PromptsSchema.parse([]), +}; + +async function addSeedHistory(agent: DextoAgent, sessionId: string): Promise { + const session = await agent.createSession(sessionId); + const contextManager = session.getContextManager(); + + const messages: InternalMessage[] = [ + { + role: 'user', + content: [{ type: 'text', text: 'old request' }], + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'old response' }], + }, + { + role: 'user', + content: [{ type: 'text', text: 'keep this request' }], + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'keep this response' }], + }, + ]; + + for (const message of messages) { + await contextManager.addMessage(message); + } +} + +async function addShortSeedHistory(agent: DextoAgent, sessionId: string): Promise { + const session = await agent.createSession(sessionId); + const contextManager = session.getContextManager(); + + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'brief request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'brief response' }], + }); +} + +describe('DextoAgent session compaction integration', () => { + let agent: DextoAgent; + + beforeEach(async () => { + const loggerConfig = LoggerConfigSchema.parse({ + level: 'warn', + transports: [{ type: 'console', colorize: false }], + }); + const logger = createLogger({ config: loggerConfig, agentId: testSettings.agentId }); + + agent = new DextoAgent({ + ...testSettings, + logger, + storage: { + blob: createInMemoryBlobStore(), + database: createInMemoryDatabase(), + cache: createInMemoryCache(), + }, + compaction: testCompactionStrategy, + tools: [], + hooks: [], + }); + await agent.start(); + }); + + afterEach(async () => { + if (agent.isStarted()) { + await agent.stop(); + } + }); + + it('continues in place and persists a compaction artifact', async () => { + const sessionId = 'compact-in-place'; + await addSeedHistory(agent, sessionId); + + const compaction = await agent.compactSession({ + sessionId, + mode: 'continue-in-place', + trigger: 'manual', + }); + + expect(compaction).not.toBeNull(); + expect(compaction?.targetSessionId).toBeUndefined(); + expect(compaction?.mode).toBe('continue-in-place'); + expect(compaction?.summaryMessages).toHaveLength(1); + expect(compaction?.continuationMessages).toHaveLength(3); + + const stored = await agent.getSessionCompaction(compaction!.id); + expect(stored?.id).toBe(compaction?.id); + + const history = await agent.getSessionHistory(sessionId); + expect(history).toHaveLength(5); + + const filtered = filterCompacted(history); + expect(filtered).toHaveLength(3); + expect(filtered[0]?.metadata?.isSummary).toBe(true); + }); + + it('creates a seeded child session from the persisted continuation artifact', async () => { + const sessionId = 'compact-in-child'; + await addSeedHistory(agent, sessionId); + + const compaction = await agent.compactSession({ + sessionId, + mode: 'continue-in-child', + trigger: 'api', + }); + + expect(compaction).not.toBeNull(); + expect(compaction?.targetSessionId).toBeDefined(); + expect(compaction?.mode).toBe('continue-in-child'); + + const sourceHistory = await agent.getSessionHistory(sessionId); + expect(sourceHistory).toHaveLength(4); + + const childSessionId = compaction!.targetSessionId!; + const childHistory = await agent.getSessionHistory(childSessionId); + expect(childHistory).toHaveLength(compaction!.continuationMessages.length); + expect(childHistory[0]?.metadata?.isSummary).toBe(true); + + const childMetadata = await agent.getSessionMetadata(childSessionId); + expect(childMetadata?.parentSessionId).toBe(sessionId); + expect(childMetadata?.messageCount).toBe(compaction?.continuationMessages.length); + + const stored = await agent.getSessionCompaction(compaction!.id); + expect(stored?.targetSessionId).toBe(childSessionId); + }); + + it('persists an artifact without mutating the source session when using artifact-only mode', async () => { + const sessionId = 'compact-artifact-only'; + await addSeedHistory(agent, sessionId); + + const compaction = await agent.compactSession({ + sessionId, + mode: 'artifact-only', + trigger: 'scheduled', + }); + + expect(compaction).not.toBeNull(); + expect(compaction?.mode).toBe('artifact-only'); + expect(compaction?.trigger).toBe('scheduled'); + expect(compaction?.targetSessionId).toBeUndefined(); + + const history = await agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(filterCompacted(history)).toHaveLength(4); + + const stored = await agent.getSessionCompaction(compaction!.id); + expect(stored?.id).toBe(compaction?.id); + expect(stored?.mode).toBe('artifact-only'); + expect(stored?.continuationMessages).toHaveLength(3); + }); + + it('allows session compaction for short histories when the strategy returns a summary', async () => { + const sessionId = 'compact-short-history'; + await addShortSeedHistory(agent, sessionId); + + const compaction = await agent.compactSession({ + sessionId, + mode: 'continue-in-place', + trigger: 'manual', + }); + + expect(compaction).not.toBeNull(); + expect(compaction?.summaryMessages).toHaveLength(1); + expect(compaction?.continuationMessages).toHaveLength(1); + }); + + it('validates the input object before reading sessionId', async () => { + await expect(Reflect.apply(agent.compactSession, agent, [undefined])).rejects.toThrow( + 'input is required and must be an object' + ); + }); + + it('rejects childTitle outside continue-in-child mode for direct SDK consumers', async () => { + const sessionId = 'compact-child-title-invalid'; + await addSeedHistory(agent, sessionId); + + await expect( + agent.compactSession({ + sessionId, + mode: 'artifact-only', + childTitle: 'Should not be allowed', + }) + ).rejects.toThrow('childTitle is only supported when mode is "continue-in-child"'); + }); + + it('rejects non-string childTitle values for direct SDK consumers', async () => { + const sessionId = 'compact-child-title-type-invalid'; + await addSeedHistory(agent, sessionId); + + await expect( + Reflect.apply(agent.compactSession, agent, [ + { + sessionId, + mode: 'continue-in-child', + childTitle: 123, + }, + ]) + ).rejects.toThrow('childTitle must be a string when provided'); + }); + + it('does not mutate the source session when artifact persistence fails in continue-in-place mode', async () => { + const sessionId = 'compact-in-place-save-failure'; + await addSeedHistory(agent, sessionId); + + const saveSpy = vi + .spyOn(agent.sessionManager, 'saveSessionCompaction') + .mockRejectedValueOnce(new Error('persist failed')); + + await expect( + agent.compactSession({ + sessionId, + mode: 'continue-in-place', + trigger: 'manual', + }) + ).rejects.toThrow('persist failed'); + + const history = await agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(filterCompacted(history)).toHaveLength(4); + + saveSpy.mockRestore(); + }); + + it('rolls back the persisted artifact when in-place compaction apply fails', async () => { + const sessionId = 'compact-in-place-apply-failure'; + await addSeedHistory(agent, sessionId); + + const session = await agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const contextManager = session.getContextManager(); + const originalAddMessage = contextManager.addMessage.bind(contextManager); + const addMessageSpy = vi + .spyOn(contextManager, 'addMessage') + .mockImplementation(async (message) => { + if ( + message.metadata?.isSummary === true || + message.metadata?.isSessionSummary === true + ) { + throw new Error('apply failed'); + } + + await originalAddMessage(message); + }); + + const originalSaveSessionCompaction = agent.sessionManager.saveSessionCompaction.bind( + agent.sessionManager + ); + let persistedCompactionId: string | undefined; + const saveSpy = vi + .spyOn(agent.sessionManager, 'saveSessionCompaction') + .mockImplementation(async (compaction) => { + persistedCompactionId = compaction.id; + await originalSaveSessionCompaction(compaction); + }); + + await expect( + agent.compactSession({ + sessionId, + mode: 'continue-in-place', + trigger: 'manual', + }) + ).rejects.toThrow('apply failed'); + + if (!persistedCompactionId) { + throw new Error('Expected compaction artifact to be persisted before apply failure'); + } + + expect(await agent.getSessionCompaction(persistedCompactionId)).toBeUndefined(); + + const history = await agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(filterCompacted(history)).toHaveLength(4); + + saveSpy.mockRestore(); + addMessageSpy.mockRestore(); + }); + + it('rolls back the child session when artifact persistence fails in continue-in-child mode', async () => { + const sessionId = 'compact-in-child-save-failure'; + await addSeedHistory(agent, sessionId); + + const originalCreateSeededChildSession = agent.sessionManager.createSeededChildSession.bind( + agent.sessionManager + ); + let childSessionId: string | undefined; + + const createSpy = vi + .spyOn(agent.sessionManager, 'createSeededChildSession') + .mockImplementation(async (parentSessionId, options) => { + const childSession = await originalCreateSeededChildSession( + parentSessionId, + options + ); + childSessionId = childSession.id; + return childSession; + }); + const saveSpy = vi + .spyOn(agent.sessionManager, 'saveSessionCompaction') + .mockRejectedValueOnce(new Error('persist failed')); + + await expect( + agent.compactSession({ + sessionId, + mode: 'continue-in-child', + trigger: 'api', + }) + ).rejects.toThrow('persist failed'); + + if (!childSessionId) { + throw new Error('Expected child session to be created before persistence failure'); + } + + const sourceHistory = await agent.getSessionHistory(sessionId); + expect(sourceHistory).toHaveLength(4); + expect(await agent.getSessionMetadata(childSessionId)).toBeUndefined(); + + saveSpy.mockRestore(); + createSpy.mockRestore(); + }); + + it('rejects strategies that return multiple summary messages for session compaction', async () => { + const sessionId = 'compact-multi-summary-invalid'; + await addSeedHistory(agent, sessionId); + + const session = await agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const multiSummaryStrategy: CompactionStrategy = { + ...testCompactionStrategy, + compact: async () => ({ + summaryMessages: [ + { + role: 'assistant', + content: [{ type: 'text', text: 'Summary one' }], + }, + { + role: 'assistant', + content: [{ type: 'text', text: 'Summary two' }], + }, + ], + preserveFromWorkingIndex: 2, + }), + }; + + const getCompactionStrategySpy = vi + .spyOn(session.getLLMService(), 'getCompactionStrategy') + .mockReturnValue(multiSummaryStrategy); + + await expect( + agent.compactSession({ + sessionId, + mode: 'continue-in-place', + trigger: 'manual', + }) + ).rejects.toThrow('must return exactly one summary message'); + + const history = await agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(history.some((message) => message.metadata?.isSummary === true)).toBe(false); + + getCompactionStrategySpy.mockRestore(); + }); + + it('rejects preserveFromWorkingIndex values outside the working-history bounds', async () => { + const sessionId = 'compact-working-boundary-invalid'; + await addSeedHistory(agent, sessionId); + + const session = await agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const invalidBoundaryStrategy: CompactionStrategy = { + ...testCompactionStrategy, + compact: async () => ({ + summaryMessages: [ + { + role: 'assistant', + content: [{ type: 'text', text: 'Bad summary' }], + }, + ], + preserveFromWorkingIndex: 999, + }), + }; + + const getCompactionStrategySpy = vi + .spyOn(session.getLLMService(), 'getCompactionStrategy') + .mockReturnValue(invalidBoundaryStrategy); + + await expect( + agent.compactSession({ + sessionId, + mode: 'artifact-only', + trigger: 'scheduled', + }) + ).rejects.toThrow( + 'must provide a valid preserveFromWorkingIndex within the current working history bounds' + ); + + const history = await agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(history.filter((message) => message.metadata?.isSummary === true)).toHaveLength(0); + + getCompactionStrategySpy.mockRestore(); + }); + + it('rejects continue-in-place compaction when preserved source messages lack stable ids', async () => { + const sessionId = 'compact-in-place-missing-preserved-id'; + await addSeedHistory(agent, sessionId); + + const session = await agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const contextManager = session.getContextManager(); + const originalGetHistory = contextManager.getHistory.bind(contextManager); + const getHistorySpy = vi + .spyOn(contextManager, 'getHistory') + .mockImplementation(async () => { + const history = await originalGetHistory(); + return history.map((message, index) => + index >= 2 + ? (() => { + const cloned = structuredClone(message); + delete cloned.id; + return cloned; + })() + : message + ); + }); + + await expect( + agent.compactSession({ + sessionId, + mode: 'continue-in-place', + trigger: 'manual', + }) + ).rejects.toThrow('produced continuation messages without stable IDs'); + + const history = await agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(history.filter((message) => message.metadata?.isSummary === true)).toHaveLength(0); + + getHistorySpy.mockRestore(); + }); +}); diff --git a/packages/core/src/agent/DextoAgent.ts b/packages/core/src/agent/DextoAgent.ts index a27f35791..e1511a43e 100644 --- a/packages/core/src/agent/DextoAgent.ts +++ b/packages/core/src/agent/DextoAgent.ts @@ -12,8 +12,18 @@ import type { InternalMessage } from '../context/types.js'; import { PromptManager } from '../prompts/index.js'; import type { PromptsConfig } from '../prompts/schemas.js'; import { AgentStateManager } from './state-manager.js'; -import { SessionManager, ChatSession, SessionError } from '../session/index.js'; -import type { SessionMetadata } from '../session/index.js'; +import { + SessionManager, + ChatSession, + SessionError, + SESSION_COMPACTION_MODES, + SESSION_COMPACTION_TRIGGERS, +} from '../session/index.js'; +import type { + SessionMetadata, + SessionCompactionInput, + SessionCompactionRecord, +} from '../session/index.js'; import { AgentServices, type InitializeServicesOptions, @@ -74,6 +84,10 @@ import type { ApprovalHandler } from '../approval/types.js'; import type { DextoAgentOptions } from './agent-options.js'; import type { WorkspaceManager } from '../workspace/manager.js'; import type { SetWorkspaceInput, WorkspaceContext } from '../workspace/types.js'; +import { + createAgentSessionCompactionEventSink, + runSessionCompaction, +} from '../session/compaction-service.js'; const requiredServices: (keyof AgentServices)[] = [ 'mcpManager', @@ -87,6 +101,9 @@ const requiredServices: (keyof AgentServices)[] = [ 'memoryManager', ]; +const sessionCompactionModes = new Set(SESSION_COMPACTION_MODES); +const sessionCompactionTriggers = new Set(SESSION_COMPACTION_TRIGGERS); + /** * Interface for objects that can subscribe to the agent's event bus. * Typically used by API layer subscribers (SSE, Webhooks, etc.) @@ -1853,134 +1870,137 @@ export class DextoAgent { } /** - * Manually compact the context for a session. - * - * Compaction generates a summary of older messages and adds it to the conversation history. - * When the context is loaded, filterCompacted() will exclude messages before the summary, - * effectively reducing the context window while preserving the full history in storage. + * Resolve a persisted compaction artifact by its ID. + */ + public async getSessionCompaction( + compactionId: string + ): Promise { + this.ensureStarted(); + + if (!compactionId || typeof compactionId !== 'string') { + throw AgentError.apiValidationError( + 'compactionId is required and must be a non-empty string' + ); + } + + return await this.sessionManager.getSessionCompaction(compactionId); + } + + /** + * Compact a session into a persisted artifact and optionally apply it. * - * @param sessionId Session ID of the session to compact (required) - * @returns Compaction result with stats, or null if compaction was skipped + * This turns the current working memory into a reusable compaction artifact + * that can: + * - remain as an artifact only + * - update the current session in place + * - seed a new child session for continuation */ - public async compactContext(sessionId: string): Promise<{ - /** The session that was compacted */ - sessionId: string; - /** Estimated tokens in context after compaction (includes system prompt, tools, and messages) */ - compactedContextTokens: number; - /** Number of messages before compaction */ - originalMessages: number; - /** Number of messages after compaction (summary + preserved) */ - compactedMessages: number; - } | null> { + public async compactSession( + input: SessionCompactionInput + ): Promise { this.ensureStarted(); + if (!input || typeof input !== 'object' || Array.isArray(input)) { + throw AgentError.apiValidationError('input is required and must be an object'); + } + + const sessionId = input.sessionId; if (!sessionId || typeof sessionId !== 'string') { throw AgentError.apiValidationError( 'sessionId is required and must be a non-empty string' ); } + const mode = input.mode ?? 'continue-in-place'; + if (!sessionCompactionModes.has(mode)) { + throw AgentError.apiValidationError( + `mode must be one of: ${SESSION_COMPACTION_MODES.join(', ')}` + ); + } + + if (input.childTitle !== undefined && typeof input.childTitle !== 'string') { + throw AgentError.apiValidationError('childTitle must be a string when provided'); + } + + if (input.childTitle !== undefined && mode !== 'continue-in-child') { + throw AgentError.apiValidationError( + 'childTitle is only supported when mode is "continue-in-child"' + ); + } + + const trigger = input.trigger ?? 'manual'; + if (!sessionCompactionTriggers.has(trigger)) { + throw AgentError.apiValidationError( + `trigger must be one of: ${SESSION_COMPACTION_TRIGGERS.join(', ')}` + ); + } + const session = await this.sessionManager.getSession(sessionId); if (!session) { throw SessionError.notFound(sessionId); } - // Get compaction strategy from the session's LLM service const llmService = session.getLLMService(); const compactionStrategy = llmService.getCompactionStrategy(); if (!compactionStrategy) { this.logger.warn( - `Compaction strategy not configured for session ${sessionId} - skipping manual compaction` + `Compaction strategy not configured for session ${sessionId} - skipping compaction` ); return null; } - // Get history and generate summary const contextManager = session.getContextManager(); - const history = await contextManager.getHistory(); - - if (history.length < 4) { - this.logger.debug(`Compaction skipped for session ${sessionId} - history too short`); - return null; - } - // Get full context estimate BEFORE compaction (includes system prompt, tools, messages) - // This uses the same calculation as /context command for consistency const contributorContext = await this.toolManager.buildContributorContext({ sessionId }); const tools = await llmService.getEnabledTools(); - const beforeEstimate = await contextManager.getContextTokenEstimate( - contributorContext, - tools - ); - const originalTokens = beforeEstimate.estimated; - const originalMessages = beforeEstimate.stats.filteredMessageCount; - - // Emit compacting event - this.agentEventBus.emit('context:compacting', { - estimatedTokens: originalTokens, + return await runSessionCompaction({ sessionId, + mode, + trigger, + ...(input.childTitle !== undefined && { childTitle: input.childTitle }), + languageModel: llmService.getLanguageModel(), + logger: session.logger, + contextManager, + compactionStrategy, + contributorContext, + tools, + persistence: this.sessionManager, + eventSink: createAgentSessionCompactionEventSink(this.agentEventBus, sessionId), }); + } - // Generate summary message(s) - const summaryMessages = await compactionStrategy.compact(history, { + /** + * Manually compact the context for a session. + * + * Compatibility wrapper over the new compaction primitive that preserves + * the previous public API shape. + */ + public async compactContext(sessionId: string): Promise<{ + /** The session that was compacted */ + sessionId: string; + /** Estimated tokens in context after compaction (includes system prompt, tools, and messages) */ + compactedContextTokens: number; + /** Number of messages before compaction */ + originalMessages: number; + /** Number of messages after compaction (summary + preserved) */ + compactedMessages: number; + } | null> { + const compaction = await this.compactSession({ sessionId, - model: llmService.getLanguageModel(), - logger: session.logger, + mode: 'continue-in-place', + trigger: 'manual', }); - if (summaryMessages.length === 0) { - this.logger.debug(`Compaction skipped for session ${sessionId} - nothing to compact`); - this.agentEventBus.emit('context:compacted', { - originalTokens, - compactedTokens: originalTokens, - originalMessages, - compactedMessages: originalMessages, - strategy: compactionStrategy.name, - reason: 'manual', - sessionId, - }); + if (!compaction) { return null; } - // Add summary to history - filterCompacted() will exclude pre-summary messages at read-time - for (const summary of summaryMessages) { - await contextManager.addMessage(summary); - } - - // Reset actual token tracking since context has fundamentally changed - // The formula (lastInput + lastOutput + newEstimate) is no longer valid after compaction - contextManager.resetActualTokenTracking(); - - // Get full context estimate AFTER compaction (uses pure estimation since actuals were reset) - // This ensures /context will show the same value - const afterEstimate = await contextManager.getContextTokenEstimate( - contributorContext, - tools - ); - const compactedTokens = afterEstimate.estimated; - const compactedMessages = afterEstimate.stats.filteredMessageCount; - - this.agentEventBus.emit('context:compacted', { - originalTokens, - compactedTokens, - originalMessages, - compactedMessages, - strategy: compactionStrategy.name, - reason: 'manual', - sessionId, - }); - - this.logger.info( - `Compaction complete for session ${sessionId}: ` + - `${originalMessages} messages → ${compactedMessages} messages (~${compactedTokens} tokens)` - ); - return { sessionId, - compactedContextTokens: compactedTokens, - originalMessages, - compactedMessages, + compactedContextTokens: compaction.compactedTokens, + originalMessages: compaction.originalMessages, + compactedMessages: compaction.compactedMessages, }; } diff --git a/packages/core/src/context/compaction/compaction.integration.test.ts b/packages/core/src/context/compaction/compaction.integration.test.ts index 34c72686e..17f4895f6 100644 --- a/packages/core/src/context/compaction/compaction.integration.test.ts +++ b/packages/core/src/context/compaction/compaction.integration.test.ts @@ -2,6 +2,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import { ContextManager } from '../manager.js'; import { filterCompacted } from '../utils.js'; import { ReactiveOverflowCompactionStrategy } from './strategies/reactive-overflow-compaction.js'; +import { buildCompactionWindow } from './window.js'; import { VercelMessageFormatter } from '../../llm/formatters/vercel.js'; import { SystemPromptManager } from '../../systemPrompt/manager.js'; import { SystemPromptConfigSchema } from '../../systemPrompt/schemas.js'; @@ -152,17 +153,33 @@ describe('Context Compaction Integration Tests', () => { */ async function runCompaction(): Promise { const history = await contextManager.getHistory(); - const summaryMessages = await compactionStrategy.compact(history, { + const compactionWindow = buildCompactionWindow(history); + const result = await compactionStrategy.compact(compactionWindow, { sessionId, model: createMockModel(), logger, }); - if (summaryMessages.length === 0) { + if (!result || result.summaryMessages.length === 0) { return null; } - const summary = summaryMessages[0]!; + const preservedMessageIds = compactionWindow.workingHistory + .slice(result.preserveFromWorkingIndex) + .map((message) => { + if (!message.id) { + throw new Error('Expected preserved working message to have a stable id'); + } + return message.id; + }); + const summary = { + ...result.summaryMessages[0]!, + metadata: { + ...(result.summaryMessages[0]!.metadata ?? {}), + isSummary: true, + preservedMessageIds, + }, + }; await contextManager.addMessage(summary); return summary; } @@ -283,32 +300,33 @@ describe('Context Compaction Integration Tests', () => { expect(nonSummaryMessages.length).toBeLessThan(15); }); - it('should correctly calculate originalMessageCount for each compaction', async () => { + it('should preserve the continuation window across repeated compactions', async () => { // === FIRST COMPACTION === await addMessages(10); const summary1 = await runCompaction(); expect(summary1).not.toBeNull(); - // First compaction: originalMessageCount should be the number of summarized messages - const originalCount1 = summary1?.metadata?.originalMessageCount; - expect(typeof originalCount1).toBe('number'); - expect(originalCount1).toBeLessThan(20); // Less than total, some were preserved + const preservedIds1 = summary1?.metadata?.preservedMessageIds; + expect(Array.isArray(preservedIds1)).toBe(true); + if (!Array.isArray(preservedIds1)) { + throw new Error('Expected preservedMessageIds to be an array'); + } + expect(preservedIds1.length).toBeGreaterThan(0); // === SECOND COMPACTION === await addMessages(10); - const historyBefore2 = await contextManager.getHistory(); - const summary1Index = historyBefore2.findIndex((m) => m === summary1); - const summary2 = await runCompaction(); expect(summary2).not.toBeNull(); - // Second compaction: originalMessageCount should be ABSOLUTE - // It should be > summary1Index (pointing past the first summary) - const originalCount2 = summary2?.metadata?.originalMessageCount; - expect(typeof originalCount2).toBe('number'); - expect(originalCount2).toBeGreaterThan(summary1Index); + const preservedIds2 = summary2?.metadata?.preservedMessageIds; + expect(Array.isArray(preservedIds2)).toBe(true); + if (!Array.isArray(preservedIds2)) { + throw new Error('Expected preservedMessageIds to be an array'); + } + expect(preservedIds2.length).toBeGreaterThan(0); + expect(preservedIds2).not.toContain(summary1?.id); - // Verify filterCompacted works with this absolute count + // Verify filterCompacted works with the preserved ID window const historyAfter2 = await contextManager.getHistory(); const filtered2 = filterCompacted(historyAfter2); @@ -328,17 +346,18 @@ describe('Context Compaction Integration Tests', () => { expect(summary).toBeNull(); }); - it('should not re-compact if few messages after existing summary', async () => { + it('should re-compact when a working-memory prefix is still eligible', async () => { // First compaction await addMessages(10); await runCompaction(); - // Add only 2 messages (4 messages = 2 turns, below threshold) + // Add only 2 turns. The strategy can still compact the carried-forward + // working-memory prefix when manual/session compaction is invoked again. await addMessages(2); - // Should skip re-compaction const summary2 = await runCompaction(); - expect(summary2).toBeNull(); + expect(summary2).not.toBeNull(); + expect(summary2?.metadata?.isRecompaction).toBe(true); }); it('should handle compaction through prepareHistory flow', async () => { diff --git a/packages/core/src/context/compaction/index.ts b/packages/core/src/context/compaction/index.ts index 8dce869d6..fdb9e4ebf 100644 --- a/packages/core/src/context/compaction/index.ts +++ b/packages/core/src/context/compaction/index.ts @@ -1,4 +1,5 @@ export * from './types.js'; +export * from './window.js'; export * from './strategies/reactive-overflow-compaction.js'; export * from './strategies/noop.js'; diff --git a/packages/core/src/context/compaction/strategies/noop.ts b/packages/core/src/context/compaction/strategies/noop.ts index 16e93d207..02e915959 100644 --- a/packages/core/src/context/compaction/strategies/noop.ts +++ b/packages/core/src/context/compaction/strategies/noop.ts @@ -1,6 +1,10 @@ -import type { InternalMessage } from '../../types.js'; import type { ModelLimits } from '../overflow.js'; -import type { CompactionRuntimeContext, CompactionSettings, CompactionStrategy } from '../types.js'; +import type { + CompactionRuntimeContext, + CompactionSettings, + CompactionStrategy, + CompactionWindow, +} from '../types.js'; /** * No-op compaction strategy that doesn't perform any compaction. @@ -47,12 +51,9 @@ export class NoOpCompactionStrategy implements CompactionStrategy { } /** - * Does nothing - returns empty array (no summary needed) + * Does nothing - returns null (no compaction needed) */ - async compact( - _history: readonly InternalMessage[], - _context: CompactionRuntimeContext - ): Promise { - return []; + async compact(_window: CompactionWindow, _context: CompactionRuntimeContext): Promise { + return null; } } diff --git a/packages/core/src/context/compaction/strategies/reactive-overflow-compaction.ts b/packages/core/src/context/compaction/strategies/reactive-overflow-compaction.ts index 907d2a3a6..8f71b0123 100644 --- a/packages/core/src/context/compaction/strategies/reactive-overflow-compaction.ts +++ b/packages/core/src/context/compaction/strategies/reactive-overflow-compaction.ts @@ -3,7 +3,13 @@ import type { InternalMessage, ToolCall } from '../../types.js'; import { isAssistantMessage, isToolMessage } from '../../types.js'; import type { Logger } from '../../../logger/v2/types.js'; import { isOverflow, type ModelLimits } from '../overflow.js'; -import type { CompactionRuntimeContext, CompactionSettings, CompactionStrategy } from '../types.js'; +import type { + CompactionResult, + CompactionRuntimeContext, + CompactionSettings, + CompactionStrategy, + CompactionWindow, +} from '../types.js'; /** * Configuration options for ReactiveOverflowCompactionStrategy. @@ -78,18 +84,14 @@ Conversation to summarize: * Key behaviors: * - Triggers on overflow (after actual tokens exceed context limit) * - Uses LLM to generate intelligent summary of older messages - * - Returns summary message to ADD to history (not replace) - * - Read-time filtering via filterCompacted() excludes pre-summary messages + * - Returns a structured summary + working-history boundary + * - Core materializes that boundary into the session's continuation window * * This strategy is designed to work with TurnExecutor's main loop: * 1. After each step, check if overflow occurred - * 2. If yes, generate summary and ADD it to history - * 3. filterCompacted() in getFormattedMessages() excludes old messages + * 2. If yes, generate a replacement summary over the oldest working-memory prefix + * 3. Core materializes the preserved working-memory tail for the next continuation * 4. Continue with fresh context (summary + recent messages) - * - * NOTE: This does NOT replace history. The summary message is ADDED, - * and filterCompacted() handles excluding old messages at read-time. - * This preserves full history for audit/recovery purposes. */ export class ReactiveOverflowCompactionStrategy implements CompactionStrategy { readonly name = 'reactive-overflow'; @@ -130,136 +132,54 @@ export class ReactiveOverflowCompactionStrategy implements CompactionStrategy { } async compact( - history: readonly InternalMessage[], + window: CompactionWindow, context: CompactionRuntimeContext - ): Promise { + ): Promise { if (!this.settings.enabled) { - return []; + return null; } const { model, logger } = context; + const workingHistory = window.workingHistory; // Don't compact if history is too short - if (history.length <= 2) { + if (workingHistory.length <= 2) { logger.debug( 'ReactiveOverflowCompactionStrategy: History too short, skipping compaction' ); - return []; - } - - // Check if there's already a summary in history - // If so, we need to work with messages AFTER the summary only - // Use reverse search to find the MOST RECENT summary (important for re-compaction) - let existingSummaryIndex = -1; - for (let i = history.length - 1; i >= 0; i--) { - const msg = history[i]; - if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) { - existingSummaryIndex = i; - break; - } + return null; } - if (existingSummaryIndex !== -1) { - // There's already a summary - only consider messages AFTER it - const messagesAfterSummary = history.slice(existingSummaryIndex + 1); - - // If there are very few messages after the summary, skip compaction - // (nothing meaningful to re-summarize) - if (messagesAfterSummary.length <= 4) { - logger.debug( - `ReactiveOverflowCompactionStrategy: Only ${messagesAfterSummary.length} messages after existing summary, skipping re-compaction` - ); - return []; - } - - logger.info( - `ReactiveOverflowCompactionStrategy: Found existing summary at index ${existingSummaryIndex}, ` + - `working with ${messagesAfterSummary.length} messages after it` - ); - - // Re-run compaction on the subset after the summary - // This prevents cascading summaries of summaries - return this.compactSubset( - messagesAfterSummary, - history, - existingSummaryIndex, - model, - logger + // Split working history into messages to summarize and messages to keep + const { toSummarize, toKeep } = this.splitHistory(workingHistory); + if (window.latestSummary && toSummarize.length === 0) { + logger.debug( + 'ReactiveOverflowCompactionStrategy: No working-history prefix is eligible for re-compaction' ); + return null; } - // Split history into messages to summarize and messages to keep - const { toSummarize, toKeep } = this.splitHistory(history); - // If nothing to summarize, return empty (no summary needed) if (toSummarize.length === 0) { logger.debug('ReactiveOverflowCompactionStrategy: No messages to summarize'); - return []; + return null; } // Find the most recent user message to understand current task - const currentTaskMessage = this.findCurrentTaskMessage(history); - - logger.info( - `ReactiveOverflowCompactionStrategy: Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}` - ); - - // Generate LLM summary of old messages with current task context - const summary = await this.generateSummary(toSummarize, currentTaskMessage, model, logger); - - // Create summary message (will be ADDED to history, not replace) - // originalMessageCount tells filterCompacted() how many messages were summarized - const summaryMessage: InternalMessage = { - role: 'assistant', - content: [{ type: 'text', text: summary }], - timestamp: Date.now(), - metadata: { - isSummary: true, - summarizedAt: Date.now(), - originalMessageCount: toSummarize.length, - originalFirstTimestamp: toSummarize[0]?.timestamp, - originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp, - }, - }; - - // Return just the summary message - caller adds it to history - // filterCompacted() will handle excluding old messages at read-time - return [summaryMessage]; - } - - /** - * Handle re-compaction when there's already a summary in history. - * Only summarizes messages AFTER the existing summary, preventing - * cascading summaries of summaries. - */ - private async compactSubset( - messagesAfterSummary: readonly InternalMessage[], - fullHistory: readonly InternalMessage[], - existingSummaryIndex: number, - model: LanguageModel, - logger: Logger - ): Promise { - // Split the subset into messages to summarize and keep - const { toSummarize, toKeep } = this.splitHistory(messagesAfterSummary); - - if (toSummarize.length === 0) { - logger.debug('ReactiveOverflowCompactionStrategy: No messages to summarize in subset'); - return []; - } + const currentTaskMessage = this.findCurrentTaskMessage(window.activeHistory); - // Get current task from the full history - const currentTaskMessage = this.findCurrentTaskMessage(fullHistory); + const summaryInput = window.latestSummary + ? [window.latestSummary.message, ...toSummarize] + : [...toSummarize]; logger.info( - `ReactiveOverflowCompactionStrategy (re-compact): Summarizing ${toSummarize.length} messages after existing summary, keeping ${toKeep.length}` + `ReactiveOverflowCompactionStrategy: Summarizing ${summaryInput.length} message(s) of context, keeping ${toKeep.length} working message(s)` ); - // Generate summary - const summary = await this.generateSummary(toSummarize, currentTaskMessage, model, logger); - - // Create summary message - // originalMessageCount must be an ABSOLUTE index for filterCompacted() to work correctly. - const absoluteOriginalMessageCount = existingSummaryIndex + 1 + toSummarize.length; + // Generate LLM summary of old messages with current task context. + // When a prior summary exists, it becomes part of the new summary input so + // the resulting replacement summary fully supersedes it. + const summary = await this.generateSummary(summaryInput, currentTaskMessage, model, logger); const summaryMessage: InternalMessage = { role: 'assistant', @@ -268,14 +188,16 @@ export class ReactiveOverflowCompactionStrategy implements CompactionStrategy { metadata: { isSummary: true, summarizedAt: Date.now(), - originalMessageCount: absoluteOriginalMessageCount, - isRecompaction: true, - originalFirstTimestamp: toSummarize[0]?.timestamp, - originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp, + ...(window.latestSummary ? { isRecompaction: true } : {}), + originalFirstTimestamp: summaryInput[0]?.timestamp, + originalLastTimestamp: summaryInput[summaryInput.length - 1]?.timestamp, }, }; - return [summaryMessage]; + return { + summaryMessages: [summaryMessage], + preserveFromWorkingIndex: toSummarize.length, + }; } /** diff --git a/packages/core/src/context/compaction/strategies/reactive-overflow.test.ts b/packages/core/src/context/compaction/strategies/reactive-overflow.test.ts index bd5ea15a4..d3b3c619d 100644 --- a/packages/core/src/context/compaction/strategies/reactive-overflow.test.ts +++ b/packages/core/src/context/compaction/strategies/reactive-overflow.test.ts @@ -4,9 +4,9 @@ import type { InternalMessage } from '../../types.js'; import type { LanguageModel } from 'ai'; import { createMockLogger } from '../../../logger/v2/test-utils.js'; import { filterCompacted } from '../../utils.js'; -import type { CompactionRuntimeContext } from '../types.js'; +import type { CompactionRuntimeContext, CompactionResult } from '../types.js'; +import { buildCompactionWindow } from '../window.js'; -// Mock the ai module vi.mock('ai', async (importOriginal) => { const actual = await importOriginal(); return { @@ -19,9 +19,6 @@ import { generateText } from 'ai'; const mockGenerateText = vi.mocked(generateText); -/** - * Helper to create a mock LanguageModel - */ function createMockModel(): LanguageModel { return { modelId: 'test-model', @@ -32,14 +29,12 @@ function createMockModel(): LanguageModel { } as unknown as LanguageModel; } -/** - * Helper to create test messages - */ function createUserMessage(text: string, timestamp?: number): InternalMessage { return { role: 'user', content: [{ type: 'text', text }], timestamp: timestamp ?? Date.now(), + id: `user-${text}-${timestamp ?? 0}`, }; } @@ -48,22 +43,23 @@ function createAssistantMessage(text: string, timestamp?: number): InternalMessa role: 'assistant', content: [{ type: 'text', text }], timestamp: timestamp ?? Date.now(), + id: `assistant-${text}-${timestamp ?? 0}`, }; } function createSummaryMessage( text: string, - originalMessageCount: number, + preservedMessageIds: string[], timestamp?: number ): InternalMessage { return { role: 'assistant', content: [{ type: 'text', text }], timestamp: timestamp ?? Date.now(), + id: `summary-${text}-${timestamp ?? 0}`, metadata: { isSummary: true, - summarizedAt: Date.now(), - originalMessageCount, + preservedMessageIds, }, }; } @@ -87,57 +83,54 @@ describe('ReactiveOverflowCompactionStrategy', () => { }; } - describe('compact() - short history guard', () => { - it('should return empty array when history has 2 or fewer messages', async () => { - const history: InternalMessage[] = [ - createUserMessage('Hello'), - createAssistantMessage('Hi there!'), - ]; - - const result = await strategy.compact(history, createContext()); - - expect(result).toEqual([]); - expect(mockGenerateText).not.toHaveBeenCalled(); - }); - - it('should return empty array for empty history', async () => { - const result = await strategy.compact([], createContext()); - - expect(result).toEqual([]); - expect(mockGenerateText).not.toHaveBeenCalled(); - }); - - it('should return empty array for single message', async () => { - const history: InternalMessage[] = [createUserMessage('Hello')]; - - const result = await strategy.compact(history, createContext()); - - expect(result).toEqual([]); - }); - }); + async function compactHistory( + history: readonly InternalMessage[] + ): Promise { + return await strategy.compact(buildCompactionWindow(history), createContext()); + } - describe('compact() - summary message metadata', () => { - it('should return summary with isSummary=true metadata', async () => { - mockGenerateText.mockResolvedValue({ - text: 'Test summary', - } as Awaited>); + function materializeSummary( + history: readonly InternalMessage[], + result: CompactionResult + ): InternalMessage { + const compactionWindow = buildCompactionWindow(history); + const summary = structuredClone(result.summaryMessages[0]!); + const preservedMessageIds = compactionWindow.workingHistory + .slice(result.preserveFromWorkingIndex) + .map((message) => { + if (!message.id) { + throw new Error('Expected preserved working message to have a stable id'); + } + return message.id; + }); + summary.metadata = { + ...(summary.metadata ?? {}), + isSummary: true, + preservedMessageIds, + }; + if (compactionWindow.latestSummary && summary.metadata?.isRecompaction !== true) { + summary.metadata.isRecompaction = true; + } + if (summary.metadata) { + delete summary.metadata.originalMessageCount; + } + return summary; + } + describe('compact()', () => { + it('returns null when the working history is too short', async () => { const history: InternalMessage[] = [ - createUserMessage('First question', 1000), - createAssistantMessage('First answer', 1001), - createUserMessage('Second question', 1002), - createAssistantMessage('Second answer', 1003), - createUserMessage('Third question', 1004), - createAssistantMessage('Third answer', 1005), + createUserMessage('Hello', 1000), + createAssistantMessage('Hi there', 1001), ]; - const result = await strategy.compact(history, createContext()); + const result = await compactHistory(history); - expect(result).toHaveLength(1); - expect(result[0]?.metadata?.isSummary).toBe(true); + expect(result).toBeNull(); + expect(mockGenerateText).not.toHaveBeenCalled(); }); - it('should set originalMessageCount to number of summarized messages', async () => { + it('returns a structured summary and working-history boundary', async () => { mockGenerateText.mockResolvedValue({ text: 'Test summary', } as Awaited>); @@ -151,413 +144,147 @@ describe('ReactiveOverflowCompactionStrategy', () => { createAssistantMessage('Recent answer 2', 1005), ]; - const result = await strategy.compact(history, createContext()); + const result = await compactHistory(history); - expect(result).toHaveLength(1); - expect(result[0]?.metadata?.originalMessageCount).toBe(2); + expect(result).not.toBeNull(); + expect(result?.summaryMessages).toHaveLength(1); + expect(result?.summaryMessages[0]?.metadata?.isSummary).toBe(true); + expect(result?.preserveFromWorkingIndex).toBe(2); }); - it('should include summarizedAt timestamp in metadata', async () => { - mockGenerateText.mockResolvedValue({ - text: 'Test summary', - } as Awaited>); - - const history: InternalMessage[] = [ - createUserMessage('Question 1', 1000), - createAssistantMessage('Answer 1', 1001), - createUserMessage('Question 2', 1002), - createAssistantMessage('Answer 2', 1003), - createUserMessage('Question 3', 1004), - createAssistantMessage('Answer 3', 1005), - ]; - - const beforeTime = Date.now(); - const result = await strategy.compact(history, createContext()); - const afterTime = Date.now(); - - expect(result[0]?.metadata?.summarizedAt).toBeGreaterThanOrEqual(beforeTime); - expect(result[0]?.metadata?.summarizedAt).toBeLessThanOrEqual(afterTime); - }); - - it('should include original timestamps in metadata', async () => { - mockGenerateText.mockResolvedValue({ - text: 'Test summary', - } as Awaited>); - - const history: InternalMessage[] = [ - createUserMessage('Old question', 1000), - createAssistantMessage('Old answer', 2000), - createUserMessage('Recent question 1', 3000), - createAssistantMessage('Recent answer 1', 4000), - createUserMessage('Recent question 2', 5000), - createAssistantMessage('Recent answer 2', 6000), - ]; - - const result = await strategy.compact(history, createContext()); - - expect(result[0]?.metadata?.originalFirstTimestamp).toBe(1000); - expect(result[0]?.metadata?.originalLastTimestamp).toBe(2000); - }); - }); - - describe('compact() - re-compaction with existing summary', () => { - it('should detect existing summary and only summarize messages after it', async () => { + it('recompacts against the logical working window instead of raw stored indexes', async () => { mockGenerateText.mockResolvedValue({ text: 'New summary', } as Awaited>); const history: InternalMessage[] = [ - createUserMessage('Very old question', 1000), - createAssistantMessage('Very old answer', 1001), - createSummaryMessage('Previous summary', 2, 1002), - createUserMessage('Question after summary 1', 2000), - createAssistantMessage('Answer after summary 1', 2001), - createUserMessage('Question after summary 2', 2002), - createAssistantMessage('Answer after summary 2', 2003), - createUserMessage('Question after summary 3', 2004), - createAssistantMessage('Answer after summary 3', 2005), + createUserMessage('Ancient question', 1000), + createAssistantMessage('Ancient answer', 1001), + createUserMessage('Preserved question', 1002), + createAssistantMessage('Preserved answer', 1003), + createSummaryMessage( + 'Previous summary', + ['user-Preserved question-1002', 'assistant-Preserved answer-1003'], + 1004 + ), + createUserMessage('Fresh question 1', 2000), + createAssistantMessage('Fresh answer 1', 2001), + createUserMessage('Fresh question 2', 2002), + createAssistantMessage('Fresh answer 2', 2003), + createUserMessage('Fresh question 3', 2004), + createAssistantMessage('Fresh answer 3', 2005), ]; - const result = await strategy.compact(history, createContext()); + const result = await compactHistory(history); - expect(result).toHaveLength(1); - expect(result[0]?.metadata?.isRecompaction).toBe(true); + expect(result).not.toBeNull(); + expect(result?.summaryMessages).toHaveLength(1); + expect(result?.summaryMessages[0]?.metadata?.isRecompaction).toBe(true); + expect(result?.preserveFromWorkingIndex).toBe(4); }); - it('should skip re-compaction if few messages after existing summary', async () => { + it('skips recompaction when no working-history prefix is eligible after the latest summary', async () => { const history: InternalMessage[] = [ - createUserMessage('Old question', 1000), - createAssistantMessage('Old answer', 1001), - createSummaryMessage('Existing summary', 2, 1002), - createUserMessage('New question', 2000), - createAssistantMessage('New answer', 2001), - createUserMessage('Another question', 2002), + createUserMessage('Ancient question', 1000), + createAssistantMessage('Ancient answer', 1001), + createSummaryMessage('Previous summary', [], 1002), + { + role: 'assistant', + content: [{ type: 'text', text: 'Thinking through the task' }], + timestamp: 2000, + id: 'assistant-thinking', + }, + { + role: 'tool', + content: [{ type: 'text', text: 'Tool output' }], + timestamp: 2001, + id: 'tool-output', + name: 'read_file', + toolCallId: 'call-1', + }, + createAssistantMessage('Still working on it', 2002), ]; - const result = await strategy.compact(history, createContext()); + const result = await compactHistory(history); - expect(result).toEqual([]); + expect(result).toBeNull(); expect(mockGenerateText).not.toHaveBeenCalled(); }); - it('should find most recent summary when multiple exist', async () => { + it('produces summaries that can supersede older summaries without pulling them back into filterCompacted()', async () => { mockGenerateText.mockResolvedValue({ - text: 'Newest summary', + text: 'Replacement summary', } as Awaited>); - const history: InternalMessage[] = [ - createUserMessage('Ancient question', 100), - createSummaryMessage('First summary', 1, 200), - createUserMessage('Old question', 300), - createAssistantMessage('Old answer', 301), - createSummaryMessage('Second summary', 2, 400), - createUserMessage('Q1', 500), - createAssistantMessage('A1', 501), - createUserMessage('Q2', 502), - createAssistantMessage('A2', 503), - createUserMessage('Q3', 504), - createAssistantMessage('A3', 505), - ]; - - const result = await strategy.compact(history, createContext()); - - expect(result).toHaveLength(1); - expect(result[0]?.metadata?.isRecompaction).toBe(true); - }); - - it('should set originalMessageCount as absolute index for filterCompacted compatibility', async () => { - mockGenerateText.mockResolvedValue({ - text: 'Re-compacted summary', - } as Awaited>); - - const history: InternalMessage[] = [ - createUserMessage('Very old question', 1000), - createAssistantMessage('Very old answer', 1001), - createSummaryMessage('Previous summary', 2, 1002), - createUserMessage('Q1', 2000), - createAssistantMessage('A1', 2001), - createUserMessage('Q2', 2002), - createAssistantMessage('A2', 2003), - createUserMessage('Q3', 2004), - createAssistantMessage('A3', 2005), - ]; - - const result = await strategy.compact(history, createContext()); - expect(result).toHaveLength(1); - - const newSummary = result[0]!; - expect(newSummary.metadata?.isRecompaction).toBe(true); - expect(newSummary.metadata?.originalMessageCount).toBe(5); - - const historyAfterCompaction = [...history, newSummary]; - const filtered = filterCompacted(historyAfterCompaction); - - expect(filtered).toHaveLength(5); - expect(filtered[0]?.metadata?.isRecompaction).toBe(true); - expect(filtered[1]?.role).toBe('user'); - expect(filtered[4]?.role).toBe('assistant'); - }); - - it('should ensure filterCompacted does not return old summary or pre-summary messages after re-compaction', async () => { - mockGenerateText.mockResolvedValue({ - text: 'New summary', - } as Awaited>); - - const history: InternalMessage[] = []; - for (let i = 0; i < 50; i++) { - history.push(createUserMessage(`Old Q${i}`, 1000 + i * 2)); - history.push(createAssistantMessage(`Old A${i}`, 1001 + i * 2)); - } - history.push(createSummaryMessage('Old summary', 90, 2000)); - for (let i = 0; i < 15; i++) { - history.push(createUserMessage(`New Q${i}`, 3000 + i * 2)); - history.push(createAssistantMessage(`New A${i}`, 3001 + i * 2)); - } - - expect(history).toHaveLength(131); - - const result = await strategy.compact(history, createContext()); - expect(result).toHaveLength(1); - - const newSummary = result[0]!; - expect(newSummary.metadata?.isRecompaction).toBe(true); - - const historyAfterCompaction = [...history, newSummary]; - const filtered = filterCompacted(historyAfterCompaction); - - const hasOldSummary = filtered.some( - (msg) => msg.metadata?.isSummary && !msg.metadata?.isRecompaction - ); - expect(hasOldSummary).toBe(false); - expect(filtered.length).toBeLessThan(20); - }); - - it('should handle three sequential compactions correctly', async () => { - mockGenerateText.mockResolvedValue({ - text: 'Summary content', - } as Awaited>); - - let history: InternalMessage[] = []; - - for (let i = 0; i < 10; i++) { - history.push(createUserMessage(`Q${i}`, 1000 + i * 2)); - history.push(createAssistantMessage(`A${i}`, 1001 + i * 2)); - } - expect(history).toHaveLength(20); - - const result1 = await strategy.compact(history, createContext()); - expect(result1).toHaveLength(1); - const summary1 = result1[0]!; - expect(summary1.metadata?.isRecompaction).toBeUndefined(); - - history.push(summary1); - expect(history).toHaveLength(21); - - let filtered = filterCompacted(history); - expect(filtered.length).toBeLessThan(15); - - for (let i = 10; i < 20; i++) { - history.push(createUserMessage(`Q${i}`, 2000 + i * 2)); - history.push(createAssistantMessage(`A${i}`, 2001 + i * 2)); - } - expect(history).toHaveLength(41); - - const result2 = await strategy.compact(history, createContext()); - expect(result2).toHaveLength(1); - const summary2 = result2[0]!; - expect(summary2.metadata?.isRecompaction).toBe(true); - - history.push(summary2); - expect(history).toHaveLength(42); - - filtered = filterCompacted(history); - expect(filtered[0]?.metadata?.isRecompaction).toBe(true); - const hasSummary1 = filtered.some( - (m) => m.metadata?.isSummary && !m.metadata?.isRecompaction + const oldSummary = createSummaryMessage( + 'Old summary', + ['user-Preserved question-1002', 'assistant-Preserved answer-1003'], + 1004 ); - expect(hasSummary1).toBe(false); - - for (let i = 20; i < 30; i++) { - history.push(createUserMessage(`Q${i}`, 3000 + i * 2)); - history.push(createAssistantMessage(`A${i}`, 3001 + i * 2)); - } - expect(history).toHaveLength(62); - - const result3 = await strategy.compact(history, createContext()); - expect(result3).toHaveLength(1); - const summary3 = result3[0]!; - expect(summary3.metadata?.isRecompaction).toBe(true); - - history.push(summary3); - expect(history).toHaveLength(63); - - filtered = filterCompacted(history); - - expect(filtered[0]?.metadata?.isRecompaction).toBe(true); - expect(filtered[0]).toBe(summary3); - - const oldSummaries = filtered.filter((m) => m.metadata?.isSummary && m !== summary3); - expect(oldSummaries).toHaveLength(0); - expect(filtered.length).toBeLessThan(20); - - for (const msg of filtered) { - if (msg === summary3) continue; - expect(msg.timestamp).toBeGreaterThanOrEqual(3000); - } - }); - - it('should work correctly with manual compaction followed by automatic compaction', async () => { - mockGenerateText.mockResolvedValue({ - text: 'Summary', - } as Awaited>); - - let history: InternalMessage[] = []; - for (let i = 0; i < 10; i++) { - history.push(createUserMessage(`Q${i}`, 1000 + i)); - history.push(createAssistantMessage(`A${i}`, 1000 + i)); - } - - const manualResult = await strategy.compact(history, createContext()); - expect(manualResult).toHaveLength(1); - history.push(manualResult[0]!); - - for (let i = 10; i < 20; i++) { - history.push(createUserMessage(`Q${i}`, 2000 + i)); - history.push(createAssistantMessage(`A${i}`, 2000 + i)); - } - - const autoResult = await strategy.compact(history, createContext()); - expect(autoResult).toHaveLength(1); - expect(autoResult[0]?.metadata?.isRecompaction).toBe(true); - history.push(autoResult[0]!); - - const filtered = filterCompacted(history); - expect(filtered[0]?.metadata?.isRecompaction).toBe(true); - - const summaryCount = filtered.filter((m) => m.metadata?.isSummary).length; - expect(summaryCount).toBe(1); - }); - }); - - describe('compact() - history splitting', () => { - it('should preserve last N turns based on options', async () => { - mockGenerateText.mockResolvedValue({ - text: 'Summary', - } as Awaited>); - - const customStrategy = new ReactiveOverflowCompactionStrategy({ - strategy: { preserveLastNTurns: 3 }, - }); - - const history: InternalMessage[] = [ - createUserMessage('Turn 1 Q', 1000), - createAssistantMessage('Turn 1 A', 1001), - createUserMessage('Turn 2 Q', 2000), - createAssistantMessage('Turn 2 A', 2001), - createUserMessage('Turn 3 Q', 3000), - createAssistantMessage('Turn 3 A', 3001), - createUserMessage('Turn 4 Q', 4000), - createAssistantMessage('Turn 4 A', 4001), - ]; - - const result = await customStrategy.compact(history, createContext()); - - expect(result).toHaveLength(1); - expect(result[0]?.metadata?.originalMessageCount).toBe(2); - }); - - it('should return empty when message count is at or below minKeep threshold', async () => { - const history: InternalMessage[] = [ - createUserMessage('Q1', 1000), - createAssistantMessage('A1', 1001), - createUserMessage('Q2', 2000), - ]; - - const result = await strategy.compact(history, createContext()); - - expect(result).toEqual([]); - expect(mockGenerateText).not.toHaveBeenCalled(); - }); - }); - - describe('compact() - LLM failure fallback', () => { - it('should create fallback summary when LLM call fails', async () => { - mockGenerateText.mockRejectedValue(new Error('LLM API error')); - const history: InternalMessage[] = [ - createUserMessage('Question 1', 1000), - createAssistantMessage('Answer 1', 1001), - createUserMessage('Question 2', 2000), - createAssistantMessage('Answer 2', 2001), - createUserMessage('Question 3', 3000), - createAssistantMessage('Answer 3', 3001), + createUserMessage('Ancient question', 1000), + createAssistantMessage('Ancient answer', 1001), + createUserMessage('Preserved question', 1002), + createAssistantMessage('Preserved answer', 1003), + oldSummary, + createUserMessage('Fresh question 1', 2000), + createAssistantMessage('Fresh answer 1', 2001), + createUserMessage('Fresh question 2', 2002), + createAssistantMessage('Fresh answer 2', 2003), + createUserMessage('Fresh question 3', 2004), + createAssistantMessage('Fresh answer 3', 2005), ]; - const result = await strategy.compact(history, createContext()); - - expect(result).toHaveLength(1); - expect(result[0]?.metadata?.isSummary).toBe(true); - const content = result[0]?.content; - expect(content).toBeDefined(); - expect(content![0]).toMatchObject({ - type: 'text', - text: expect.stringContaining(''), - }); - expect(content![0]).toMatchObject({ - type: 'text', - text: expect.stringContaining('Fallback'), - }); - }); - - it('should include current task in fallback summary', async () => { - mockGenerateText.mockRejectedValue(new Error('LLM API error')); - - const history: InternalMessage[] = [ - createUserMessage('Old question', 1000), - createAssistantMessage('Old answer', 1001), - createUserMessage('Recent question 1', 2000), - createAssistantMessage('Recent answer 1', 2001), - createUserMessage('My current task is to fix the bug', 3000), - createAssistantMessage('Working on it', 3001), - ]; + const result = await compactHistory(history); + if (!result) { + throw new Error('Expected compaction result'); + } - const result = await strategy.compact(history, createContext()); + const replacementSummary = materializeSummary(history, result); + const filtered = filterCompacted([...history, replacementSummary]); - expect(result).toHaveLength(1); - const content = result[0]!.content; - expect(content).not.toBeNull(); - const firstContent = content![0]; - const summaryText = firstContent?.type === 'text' ? firstContent.text : ''; - expect(summaryText).toContain(''); + expect(filtered[0]).toEqual(replacementSummary); + expect(filtered).not.toContain(oldSummary); + expect( + filtered.some( + (message) => message.metadata?.isSummary && message !== replacementSummary + ) + ).toBe(false); }); - }); - describe('compact() - summary content', () => { - it('should prefix summary with [Session Compaction Summary]', async () => { + it('passes the latest summary content into the next compaction prompt', async () => { mockGenerateText.mockResolvedValue({ - text: 'LLM generated content', + text: 'Replacement summary', } as Awaited>); const history: InternalMessage[] = [ - createUserMessage('Q1', 1000), - createAssistantMessage('A1', 1001), - createUserMessage('Q2', 2000), - createAssistantMessage('A2', 2001), - createUserMessage('Q3', 3000), - createAssistantMessage('A3', 3001), + createUserMessage('Ancient question', 1000), + createAssistantMessage('Ancient answer', 1001), + createUserMessage('Preserved question', 1002), + createAssistantMessage('Preserved answer', 1003), + createSummaryMessage( + '[Session Compaction Summary]\nPrevious summary', + ['user-Preserved question-1002', 'assistant-Preserved answer-1003'], + 1004 + ), + createUserMessage('Fresh question 1', 2000), + createAssistantMessage('Fresh answer 1', 2001), + createUserMessage('Fresh question 2', 2002), + createAssistantMessage('Fresh answer 2', 2003), + createUserMessage('Fresh question 3', 2004), + createAssistantMessage('Fresh answer 3', 2005), ]; - const result = await strategy.compact(history, createContext()); + await compactHistory(history); - expect(result).toHaveLength(1); - const content = result[0]!.content; - expect(content).not.toBeNull(); - const firstContent = content![0]; - const summaryText = firstContent?.type === 'text' ? firstContent.text : ''; - expect(summaryText).toMatch(/^\[Session Compaction Summary\]/); + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: expect.stringContaining('[Session Compaction Summary]'), + }) + ); }); - it('should pass conversation to LLM with proper formatting', async () => { + it('passes conversation content to the LLM with proper formatting', async () => { mockGenerateText.mockResolvedValue({ text: 'Summary', } as Awaited>); @@ -571,7 +298,7 @@ describe('ReactiveOverflowCompactionStrategy', () => { createAssistantMessage('New answer', 3001), ]; - await strategy.compact(history, createContext()); + await compactHistory(history); expect(mockGenerateText).toHaveBeenCalledWith( expect.objectContaining({ @@ -584,10 +311,8 @@ describe('ReactiveOverflowCompactionStrategy', () => { }) ); }); - }); - describe('compact() - tool message handling', () => { - it('should include tool call information in summary', async () => { + it('includes tool call information in the summary prompt', async () => { mockGenerateText.mockResolvedValue({ text: 'Summary with tools', } as Awaited>); @@ -598,11 +323,12 @@ describe('ReactiveOverflowCompactionStrategy', () => { role: 'assistant', content: [{ type: 'text', text: 'Let me read that file' }], timestamp: 1001, + id: 'assistant-tool-call', toolCalls: [ { id: 'call-1', type: 'function', - function: { name: 'read_file', arguments: '{"path": "/test.txt"}' }, + function: { name: 'read_file', arguments: '{"path":"/test.txt"}' }, }, ], }, @@ -610,6 +336,7 @@ describe('ReactiveOverflowCompactionStrategy', () => { role: 'tool', content: [{ type: 'text', text: 'File contents here' }], timestamp: 1002, + id: 'tool-result', name: 'read_file', toolCallId: 'call-1', }, @@ -619,7 +346,7 @@ describe('ReactiveOverflowCompactionStrategy', () => { createAssistantMessage('A3', 3001), ]; - await strategy.compact(history, createContext()); + await compactHistory(history); expect(mockGenerateText).toHaveBeenCalledWith( expect.objectContaining({ @@ -627,22 +354,45 @@ describe('ReactiveOverflowCompactionStrategy', () => { }) ); }); + + it('creates a fallback summary when the LLM call fails', async () => { + mockGenerateText.mockRejectedValue(new Error('LLM API error')); + + const history: InternalMessage[] = [ + createUserMessage('Question 1', 1000), + createAssistantMessage('Answer 1', 1001), + createUserMessage('Question 2', 2000), + createAssistantMessage('Answer 2', 2001), + createUserMessage('Question 3', 3000), + createAssistantMessage('Answer 3', 3001), + ]; + + const result = await compactHistory(history); + + expect(result).not.toBeNull(); + const summaryText = + result?.summaryMessages[0]?.content?.[0]?.type === 'text' + ? result.summaryMessages[0].content[0].text + : ''; + expect(summaryText).toContain(''); + expect(summaryText).toContain('Fallback'); + }); }); describe('getSettings()', () => { - it('should return compaction settings', () => { + it('returns compaction settings', () => { const settings = strategy.getSettings(); expect(settings.enabled).toBe(true); expect(settings.thresholdPercent).toBe(0.9); }); - it('should respect enabled option', () => { + it('respects enabled option', () => { const disabledStrategy = new ReactiveOverflowCompactionStrategy({ enabled: false }); const settings = disabledStrategy.getSettings(); expect(settings.enabled).toBe(false); }); - it('should respect maxContextTokens option', () => { + it('respects maxContextTokens option', () => { const limitedStrategy = new ReactiveOverflowCompactionStrategy({ maxContextTokens: 10000, }); @@ -652,52 +402,35 @@ describe('ReactiveOverflowCompactionStrategy', () => { }); describe('getModelLimits()', () => { - it('should return context window when no maxContextTokens set', () => { + it('returns context window when no maxContextTokens is set', () => { const limits = strategy.getModelLimits(100000); expect(limits.contextWindow).toBe(100000); }); - it('should cap context window when maxContextTokens is set', () => { + it('caps the context window when maxContextTokens is set', () => { const limitedStrategy = new ReactiveOverflowCompactionStrategy({ maxContextTokens: 50000, }); const limits = limitedStrategy.getModelLimits(100000); expect(limits.contextWindow).toBe(50000); }); - - it('should not cap when model window is smaller than maxContextTokens', () => { - const limitedStrategy = new ReactiveOverflowCompactionStrategy({ - maxContextTokens: 100000, - }); - const limits = limitedStrategy.getModelLimits(50000); - expect(limits.contextWindow).toBe(50000); - }); }); describe('shouldCompact()', () => { - it('should return false when disabled', () => { + it('returns false when disabled', () => { const disabledStrategy = new ReactiveOverflowCompactionStrategy({ enabled: false }); const limits = { contextWindow: 100000 }; expect(disabledStrategy.shouldCompact(90000, limits)).toBe(false); }); - it('should return false when under threshold', () => { + it('returns false when under threshold', () => { const limits = { contextWindow: 100000 }; expect(strategy.shouldCompact(80000, limits)).toBe(false); }); - it('should return true when over threshold', () => { + it('returns true when over threshold', () => { const limits = { contextWindow: 100000 }; expect(strategy.shouldCompact(95000, limits)).toBe(true); }); - - it('should respect custom thresholdPercent', () => { - const customStrategy = new ReactiveOverflowCompactionStrategy({ - thresholdPercent: 0.5, - }); - const limits = { contextWindow: 100000 }; - expect(customStrategy.shouldCompact(60000, limits)).toBe(true); - expect(customStrategy.shouldCompact(40000, limits)).toBe(false); - }); }); }); diff --git a/packages/core/src/context/compaction/types.ts b/packages/core/src/context/compaction/types.ts index 57e739c3d..6e910b747 100644 --- a/packages/core/src/context/compaction/types.ts +++ b/packages/core/src/context/compaction/types.ts @@ -24,6 +24,62 @@ export interface CompactionRuntimeContext { logger: Logger; } +export interface CompactionSummaryBoundary { + message: InternalMessage; + storedIndex: number; +} + +export interface CompactionWindow { + /** + * Full stored session transcript as persisted today. + * This may still contain older summary markers for audit/tracing purposes. + */ + storedHistory: readonly InternalMessage[]; + /** + * Logical history currently visible to the model. + * When a prior summary exists, this is `[latestSummary, ...workingHistory]`. + */ + activeHistory: readonly InternalMessage[]; + /** + * Working-memory messages carried forward from the latest summary boundary. + * These were preserved during the previous compaction run and are still + * visible in the current continuation window. + */ + preservedHistory: readonly InternalMessage[]; + /** + * Newly accumulated messages after the latest summary boundary. + * For an uncompacted session, this is the full stored history. + */ + freshHistory: readonly InternalMessage[]; + /** + * Chronological working-memory messages that remain unsummarized. + * This intentionally excludes the latest summary marker so strategies can + * compact the logical working set without reasoning about stored transcript + * offsets or prior summary placement. + */ + workingHistory: readonly InternalMessage[]; + /** + * Latest visible summary marker, if one exists. + * Strategies can use this as already-compacted context when producing a + * replacement summary for the next working-memory window. + */ + latestSummary?: CompactionSummaryBoundary; +} + +export interface CompactionResult { + /** + * Summary/carry-forward messages produced by the strategy. + * Session-level compaction currently requires exactly one summary message. + */ + summaryMessages: InternalMessage[]; + /** + * Index into `workingHistory` where preserved working-memory messages begin. + * Messages before this index are compacted into `summaryMessages`; messages + * from this index onward stay in the continuation window unchanged. + */ + preserveFromWorkingIndex: number; +} + /** * Compaction strategy. * @@ -48,26 +104,18 @@ export type CompactionStrategy = { shouldCompact(inputTokens: number, modelLimits: ModelLimits): boolean; /** - * Compacts the provided message history. - * - * The returned summary messages MUST include specific metadata fields for - * `filterCompacted()` to correctly exclude pre-summary messages at read-time: - * - * Required metadata: - * - `isSummary: true` - Marks the message as a compaction summary - * - `originalMessageCount: number` - Count of messages that were summarized - * (used by filterCompacted to determine which messages to exclude) + * Compacts the provided working-memory window. * - * Optional metadata: - * - `isRecompaction: true` - Set when re-compacting after a previous summary - * - `isSessionSummary: true` - Alternative to isSummary for session-level summaries + * Strategies operate on an explicit logical window instead of inferring + * boundaries from raw stored transcript indexes. Core later materializes the + * returned boundary into whatever persistence/filtering metadata is needed. * - * @param history The current conversation history. + * @param window The current compaction window for this session. * @param context Per-session runtime context (model/logger/sessionId) - * @returns Summary messages to add to history. Empty array if nothing to compact. + * @returns Structured compaction result, or null when nothing should be compacted. */ compact( - history: readonly InternalMessage[], + window: CompactionWindow, context: CompactionRuntimeContext - ): Promise; + ): Promise; }; diff --git a/packages/core/src/context/compaction/window.ts b/packages/core/src/context/compaction/window.ts new file mode 100644 index 000000000..66c32b0b5 --- /dev/null +++ b/packages/core/src/context/compaction/window.ts @@ -0,0 +1,89 @@ +import type { InternalMessage } from '../types.js'; +import type { CompactionSummaryBoundary, CompactionWindow } from './types.js'; + +function isCompactionSummary(message: InternalMessage | undefined): boolean { + return message?.metadata?.isSummary === true || message?.metadata?.isSessionSummary === true; +} + +function findLatestCompactionSummary( + history: readonly InternalMessage[] +): CompactionSummaryBoundary | undefined { + for (let i = history.length - 1; i >= 0; i--) { + const message = history[i]; + if (message && isCompactionSummary(message)) { + return { + message, + storedIndex: i, + }; + } + } + + return undefined; +} + +function resolveLegacyPreservedMessages( + history: readonly InternalMessage[], + summaryIndex: number, + summaryMessage: InternalMessage +): InternalMessage[] { + const rawCount = summaryMessage.metadata?.originalMessageCount; + const originalMessageCount = + typeof rawCount === 'number' && rawCount >= 0 && rawCount <= summaryIndex + ? rawCount + : summaryIndex; + + return history.slice(originalMessageCount, summaryIndex); +} + +function resolvePreservedMessages( + history: readonly InternalMessage[], + summary: CompactionSummaryBoundary +): InternalMessage[] { + const preservedMessageIds = summary.message.metadata?.preservedMessageIds; + if ( + Array.isArray(preservedMessageIds) && + preservedMessageIds.every((messageId) => typeof messageId === 'string') + ) { + const messagesBeforeSummary = history.slice(0, summary.storedIndex); + const messagesById = new Map( + messagesBeforeSummary.flatMap((message) => + message.id ? [[message.id, message] as const] : [] + ) + ); + + return preservedMessageIds.flatMap((messageId) => { + const message = messagesById.get(messageId); + return message ? [message] : []; + }); + } + + return resolveLegacyPreservedMessages(history, summary.storedIndex, summary.message); +} + +export function buildCompactionWindow(history: readonly InternalMessage[]): CompactionWindow { + const storedHistory = history.slice(); + const latestSummary = findLatestCompactionSummary(storedHistory); + + if (!latestSummary) { + return { + storedHistory, + activeHistory: storedHistory.slice(), + preservedHistory: [], + freshHistory: storedHistory.slice(), + workingHistory: storedHistory.slice(), + }; + } + + const preservedMessages = resolvePreservedMessages(storedHistory, latestSummary); + const messagesAfterSummary = storedHistory.slice(latestSummary.storedIndex + 1); + const workingHistory = [...preservedMessages, ...messagesAfterSummary]; + + return { + storedHistory, + activeHistory: [latestSummary.message, ...workingHistory], + preservedHistory: preservedMessages, + freshHistory: messagesAfterSummary, + workingHistory, + latestSummary, + }; +} diff --git a/packages/core/src/context/utils.test.ts b/packages/core/src/context/utils.test.ts index fb15f8f59..e24dca329 100644 --- a/packages/core/src/context/utils.test.ts +++ b/packages/core/src/context/utils.test.ts @@ -1571,6 +1571,36 @@ describe('filterCompacted', () => { expect(result[1]?.content).toBe('Recent message'); expect(result[2]?.content).toBe('Recent response'); }); + + it('should prefer preservedMessageIds when reconstructing a repeated compaction window', () => { + const messages = [ + { role: 'user', content: 'Very old', id: 'u1' }, + { role: 'assistant', content: 'Very old response', id: 'a1' }, + { role: 'user', content: 'Preserved question', id: 'u2' }, + { role: 'assistant', content: 'Preserved answer', id: 'a2' }, + { + role: 'assistant', + content: 'Old summary', + id: 's1', + metadata: { isSummary: true, preservedMessageIds: ['u2', 'a2'] }, + }, + { role: 'user', content: 'Fresh question', id: 'u3' }, + { role: 'assistant', content: 'Fresh answer', id: 'a3' }, + { + role: 'assistant', + content: 'Replacement summary', + id: 's2', + metadata: { isSummary: true, preservedMessageIds: ['u3', 'a3'] }, + }, + ] as unknown as InternalMessage[]; + + const result = filterCompacted(messages); + + expect(result).toHaveLength(3); + expect(result[0]?.content).toBe('Replacement summary'); + expect(result[1]?.content).toBe('Fresh question'); + expect(result[2]?.content).toBe('Fresh answer'); + }); }); describe('sanitizeToolResultToContentWithBlobs', () => { diff --git a/packages/core/src/context/utils.ts b/packages/core/src/context/utils.ts index c75da12f3..405f87817 100644 --- a/packages/core/src/context/utils.ts +++ b/packages/core/src/context/utils.ts @@ -14,6 +14,7 @@ import { validateModelFileSupport } from '../llm/registry/index.js'; import { LLMContext } from '../llm/types.js'; import { safeStringify } from '../utils/safe-stringify.js'; import { getFileMediaKind, getResourceKind } from './media-helpers.js'; +import { buildCompactionWindow } from './compaction/window.js'; // Tunable heuristics and shared constants const MIN_BASE64_HEURISTIC_LENGTH = 512; // Below this length, treat as regular text @@ -1960,50 +1961,7 @@ export function toTextForToolMessage(content: InternalMessage['content']): strin * @returns Filtered history starting from the most recent summary (or full history if no summary) */ export function filterCompacted(history: readonly InternalMessage[]): InternalMessage[] { - // Find the most recent summary message (search backwards for efficiency) - // Check for both old isSummary marker and new isSessionSummary marker - let summaryIndex = -1; - for (let i = history.length - 1; i >= 0; i--) { - const msg = history[i]; - if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) { - summaryIndex = i; - break; - } - } - - // If no summary found, return full history (slice returns mutable copy) - if (summaryIndex === -1) { - return history.slice(); - } - - // Get the summary message (we know it exists since we found the index) - const summaryMessage = history[summaryIndex]!; - - // Get the count of messages that were summarized (stored in metadata) - // The preserved messages are between the summarized portion and the summary - // Clamp to valid range: 0 <= originalMessageCount <= summaryIndex - // For legacy summaries without metadata, default to summaryIndex (no preserved messages) - const rawCount = summaryMessage.metadata?.originalMessageCount; - const originalMessageCount = - typeof rawCount === 'number' && rawCount >= 0 && rawCount <= summaryIndex - ? rawCount - : summaryIndex; - - // Layout after compaction: - // [summarized..., preserved..., summary, afterSummary...] - // ^-- indices 0 to (originalMessageCount-1) - // ^-- indices originalMessageCount to (summaryIndex-1) - // ^-- index summaryIndex - // ^-- indices (summaryIndex+1) onwards - - // Get preserved messages (messages between summarized portion and summary) - const preservedMessages = history.slice(originalMessageCount, summaryIndex); - - // Get any messages added after the summary (rare but possible) - const messagesAfterSummary = history.slice(summaryIndex + 1); - - // Return: summary + preserved + afterSummary - return [summaryMessage, ...preservedMessages, ...messagesAfterSummary]; + return buildCompactionWindow(history).activeHistory.slice(); } /** diff --git a/packages/core/src/events/index.ts b/packages/core/src/events/index.ts index 15f62d4b2..4a8b879f3 100644 --- a/packages/core/src/events/index.ts +++ b/packages/core/src/events/index.ts @@ -481,6 +481,9 @@ export interface AgentEventMap { compactedMessages: number; strategy: string; reason: 'overflow' | 'manual'; + compactionId?: string; + mode?: 'artifact-only' | 'continue-in-place' | 'continue-in-child'; + targetSessionId?: string; sessionId: string; }; @@ -749,6 +752,9 @@ export interface SessionEventMap { compactedMessages: number; strategy: string; reason: 'overflow' | 'manual'; + compactionId?: string; + mode?: 'artifact-only' | 'continue-in-place' | 'continue-in-child'; + targetSessionId?: string; }; /** Old tool outputs were pruned (marked with compactedAt) to save tokens */ diff --git a/packages/core/src/llm/executor/turn-executor.integration.test.ts b/packages/core/src/llm/executor/turn-executor.integration.test.ts index bfc3fdc9d..c3c1eebfb 100644 --- a/packages/core/src/llm/executor/turn-executor.integration.test.ts +++ b/packages/core/src/llm/executor/turn-executor.integration.test.ts @@ -19,6 +19,9 @@ import type { LanguageModel, ModelMessage } from 'ai'; import type { LLMContext } from '../types.js'; import type { ValidatedLLMConfig } from '../schemas.js'; import type { Logger } from '../../logger/v2/types.js'; +import type { CompactionStrategy } from '../../context/compaction/types.js'; +import type { SessionCompactionRecord } from '../../session/compaction.js'; +import type { SessionCompactionPersistence } from '../../session/compaction-service.js'; // Only mock the AI SDK's streamText/generateText - everything else is real vi.mock('ai', async (importOriginal) => { @@ -803,6 +806,213 @@ describe('TurnExecutor Integration Tests', () => { }); }); + describe('Structured Overflow Compaction', () => { + it('routes overflow compaction through the shared session compaction pipeline', async () => { + const overflowCompactionStrategy: CompactionStrategy = { + name: 'test-overflow-compaction', + getSettings: () => ({ + enabled: true, + thresholdPercent: 0.9, + }), + getModelLimits: (contextWindow: number) => ({ + contextWindow, + }), + shouldCompact: (inputTokens: number) => inputTokens > 120, + compact: async () => ({ + summaryMessages: [ + { + role: 'assistant', + content: [{ type: 'text', text: 'Overflow compacted summary' }], + timestamp: Date.now(), + }, + ], + preserveFromWorkingIndex: 2, + }), + }; + + const savedCompactions: SessionCompactionRecord[] = []; + const sessionCompactionPersistence: SessionCompactionPersistence = { + createSeededChildSession: vi.fn(async () => ({ + id: 'unused-child-session', + })), + deleteSession: vi.fn(async () => undefined), + deleteSessionCompaction: vi.fn(async () => undefined), + saveSessionCompaction: vi.fn(async (compaction) => { + savedCompactions.push(compaction); + }), + }; + + const compactingEvents: Array<{ estimatedTokens: number }> = []; + const compactedEvents: Array<{ + compactionId?: string; + mode?: 'artifact-only' | 'continue-in-place' | 'continue-in-child'; + reason: 'overflow' | 'manual'; + strategy: string; + }> = []; + + sessionEventBus.on('context:compacting', (payload) => { + compactingEvents.push(payload); + }); + sessionEventBus.on('context:compacted', (payload) => { + compactedEvents.push(payload); + }); + + const longText = 'very long archived context '.repeat(40); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: longText }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: longText }], + }); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'Keep this recent request.' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'Keep this recent reply.' }], + }); + await contextManager.addUserMessage([ + { type: 'text', text: 'Respond after compacting.' }, + ]); + + const executorWithCompaction = new TurnExecutor( + createMockModel(), + toolManager, + contextManager, + sessionEventBus, + resourceManager, + sessionId, + { maxSteps: 10, maxOutputTokens: 4096, temperature: 0.7 }, + llmContext, + logger, + messageQueue, + { contextWindow: 200 }, + undefined, + overflowCompactionStrategy, + sessionCompactionPersistence + ); + + vi.mocked(streamText).mockImplementation( + () => + createMockStream({ + text: 'Compacted response', + finishReason: 'stop', + usage: { + inputTokens: 80, + outputTokens: 20, + totalTokens: 100, + }, + }) as unknown as ReturnType + ); + + await executorWithCompaction.execute({ mcpManager }, true); + + expect(sessionCompactionPersistence.saveSessionCompaction).toHaveBeenCalledTimes(1); + expect(savedCompactions).toHaveLength(1); + + const [savedCompaction] = savedCompactions; + expect(savedCompaction?.sourceSessionId).toBe(sessionId); + expect(savedCompaction?.trigger).toBe('overflow'); + expect(savedCompaction?.mode).toBe('continue-in-place'); + expect(savedCompaction?.targetSessionId).toBeUndefined(); + + expect(compactingEvents).toHaveLength(1); + expect(compactedEvents).toHaveLength(1); + expect(compactedEvents[0]).toMatchObject({ + compactionId: savedCompaction?.id, + mode: 'continue-in-place', + reason: 'overflow', + strategy: 'test-overflow-compaction', + }); + + const history = await contextManager.getHistory(); + expect(history.some((message) => message.metadata?.isSummary === true)).toBe(true); + }); + + it('does not mutate history when overflow compaction persistence fails', async () => { + const overflowCompactionStrategy: CompactionStrategy = { + name: 'test-overflow-compaction-save-failure', + getSettings: () => ({ + enabled: true, + thresholdPercent: 0.9, + }), + getModelLimits: (contextWindow: number) => ({ + contextWindow, + }), + shouldCompact: (inputTokens: number) => inputTokens > 120, + compact: async () => ({ + summaryMessages: [ + { + role: 'assistant', + content: [{ type: 'text', text: 'Overflow compacted summary' }], + timestamp: Date.now(), + }, + ], + preserveFromWorkingIndex: 2, + }), + }; + + const sessionCompactionPersistence: SessionCompactionPersistence = { + createSeededChildSession: vi.fn(async () => ({ + id: 'unused-child-session', + })), + deleteSession: vi.fn(async () => undefined), + deleteSessionCompaction: vi.fn(async () => undefined), + saveSessionCompaction: vi.fn(async () => { + throw new Error('persist failed'); + }), + }; + + const longText = 'very long archived context '.repeat(40); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: longText }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: longText }], + }); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'Keep this recent request.' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'Keep this recent reply.' }], + }); + await contextManager.addUserMessage([ + { type: 'text', text: 'Respond after compacting.' }, + ]); + + const executorWithCompaction = new TurnExecutor( + createMockModel(), + toolManager, + contextManager, + sessionEventBus, + resourceManager, + sessionId, + { maxSteps: 10, maxOutputTokens: 4096, temperature: 0.7 }, + llmContext, + logger, + messageQueue, + { contextWindow: 200 }, + undefined, + overflowCompactionStrategy, + sessionCompactionPersistence + ); + + await expect(executorWithCompaction.execute({ mcpManager }, true)).rejects.toThrow( + 'persist failed' + ); + + const history = await contextManager.getHistory(); + expect(history.some((message) => message.metadata?.isSummary === true)).toBe(false); + }); + }); + describe('Context Token Tracking', () => { it('should store actual input tokens from LLM response in ContextManager', async () => { const expectedInputTokens = 1234; diff --git a/packages/core/src/llm/executor/turn-executor.ts b/packages/core/src/llm/executor/turn-executor.ts index dd3242aa3..6979012bc 100644 --- a/packages/core/src/llm/executor/turn-executor.ts +++ b/packages/core/src/llm/executor/turn-executor.ts @@ -41,6 +41,11 @@ import { toError } from '../../utils/error-conversion.js'; import type { CompactionStrategy } from '../../context/compaction/types.js'; import type { ModelLimits } from '../../context/compaction/overflow.js'; import { isCodexBaseURL } from '../providers/codex-base-url.js'; +import { + createSessionCompactionEventSink, + runSessionCompaction, + type SessionCompactionPersistence, +} from '../../session/compaction-service.js'; /** * Static cache for tool support validation. @@ -107,7 +112,8 @@ export class TurnExecutor { private messageQueue: MessageQueueService, private modelLimits?: ModelLimits, private externalSignal?: AbortSignal, - compactionStrategy: CompactionStrategy | null = null + compactionStrategy: CompactionStrategy | null = null, + private sessionCompactionPersistence?: SessionCompactionPersistence ) { this.logger = logger.createChild(DextoLogComponent.EXECUTOR); // Initial controller - will be replaced per-step in execute() @@ -1037,13 +1043,13 @@ export class TurnExecutor { } /** - * Compact context by generating a summary and adding it to the same session. + * Compact context through the shared session-compaction pipeline. * - * The summary message is added to the conversation history with `isSummary: true` metadata. - * When the context is loaded via getFormattedMessagesForLLM(), filterCompacted() will - * exclude all messages before the summary, effectively compacting the context. + * Overflow compaction still applies in place, but now uses the same artifact + * persistence, continuation calculation, and event payload shape as the public + * session compaction API. * - * @param originalTokens The estimated input token count that triggered overflow + * @param originalTokens The estimated or actual input token count that triggered overflow * @param contributorContext Context for system prompt contributors (needed for accurate token estimation) * @param tools Tool definitions (needed for accurate token estimation) * @returns true if compaction occurred, false if skipped @@ -1051,89 +1057,38 @@ export class TurnExecutor { private async compactContext( originalTokens: number, contributorContext: DynamicContributorContext, - tools: Record + tools: ToolSet ): Promise { if (!this.compactionStrategy) { return false; } + if (!this.sessionCompactionPersistence) { + this.logger.warn( + `Compaction skipped for session ${this.sessionId} - no session compaction persistence configured` + ); + return false; + } this.logger.info( `Context overflow detected (${originalTokens} tokens), checking if compression is possible` ); - const history = await this.contextManager.getHistory(); - const { filterCompacted } = await import('../../context/utils.js'); - const originalFiltered = filterCompacted(history); - const originalMessages = originalFiltered.length; - - // Pre-check if history is long enough for compaction (need at least 4 messages for meaningful summary) - if (history.length < 4) { - this.logger.debug('Compaction skipped: history too short to summarize'); - return false; - } - - // Emit event BEFORE the LLM summarization call so UI shows indicator during compaction - this.eventBus.emit('context:compacting', { - estimatedTokens: originalTokens, - }); - - // Generate summary message(s) - this makes an LLM call - const summaryMessages = await this.compactionStrategy.compact(history, { + const compaction = await runSessionCompaction({ sessionId: this.sessionId, - model: this.model, + mode: 'continue-in-place', + trigger: 'overflow', + languageModel: this.model, logger: this.logger, - }); - - if (summaryMessages.length === 0) { - // Compaction returned empty - nothing to summarize (e.g., already compacted) - // Still emit context:compacted to clear the UI's compacting state - this.logger.debug( - 'Compaction skipped: strategy returned no summary (likely already compacted or nothing to summarize)' - ); - this.eventBus.emit('context:compacted', { - originalTokens, - compactedTokens: originalTokens, // No change - originalMessages, - compactedMessages: originalMessages, // No change - strategy: this.compactionStrategy.name, - reason: 'overflow', - }); - return false; - } - - // Add summary to history - filterCompacted() will exclude pre-summary messages at read-time - for (const summary of summaryMessages) { - await this.contextManager.addMessage(summary); - } - - // Reset actual token tracking since context has fundamentally changed - // The formula (lastInput + lastOutput + newEstimate) is no longer valid after compaction - this.contextManager.resetActualTokenTracking(); - - // Get accurate token estimate after compaction using the same method as /context command - // This ensures consistency between what we report and what /context shows - const afterEstimate = await this.contextManager.getContextTokenEstimate( + contextManager: this.contextManager, + compactionStrategy: this.compactionStrategy, contributorContext, - tools - ); - const compactedTokens = afterEstimate.estimated; - const compactedMessages = afterEstimate.stats.filteredMessageCount; - - this.eventBus.emit('context:compacted', { - originalTokens, - compactedTokens, - originalMessages, - compactedMessages, - strategy: this.compactionStrategy.name, - reason: 'overflow', + tools, + persistence: this.sessionCompactionPersistence, + eventSink: createSessionCompactionEventSink(this.eventBus), + originalTokensOverride: originalTokens, }); - this.logger.info( - `Compaction complete: ${originalTokens} → ~${compactedTokens} tokens ` + - `(${originalMessages} → ${compactedMessages} messages after filtering)` - ); - - return true; + return compaction !== null; } /** diff --git a/packages/core/src/llm/services/factory.ts b/packages/core/src/llm/services/factory.ts index 0ffdfc135..0d5ac292e 100644 --- a/packages/core/src/llm/services/factory.ts +++ b/packages/core/src/llm/services/factory.ts @@ -19,6 +19,7 @@ import { createLocalLanguageModel } from '../providers/local/ai-sdk-adapter.js'; import type { ConversationHistoryProvider } from '../../session/history/types.js'; import type { SystemPromptManager } from '../../systemPrompt/manager.js'; import type { Logger } from '../../logger/v2/types.js'; +import type { SessionCompactionPersistence } from '../../session/compaction-service.js'; import { requiresApiKey } from '../registry/index.js'; import { getPrimaryApiKeyEnvVar, resolveApiKeyForProvider } from '../../utils/api-key-resolver.js'; import { @@ -321,6 +322,7 @@ export function createVercelModel( * @param resourceManager Resource manager for blob storage and resource access * @param logger Logger instance for dependency injection * @param compactionStrategy Optional compaction strategy for context management + * @param sessionCompactionPersistence Persistence adapter for structured session compaction * @param compactionConfig Optional compaction configuration for thresholds * @returns VercelLLMService instance */ @@ -333,7 +335,8 @@ export function createLLMService( sessionId: string, resourceManager: import('../../resources/index.js').ResourceManager, logger: Logger, - compactionStrategy?: import('../../context/compaction/types.js').CompactionStrategy | null + compactionStrategy?: import('../../context/compaction/types.js').CompactionStrategy | null, + sessionCompactionPersistence?: SessionCompactionPersistence ): VercelLLMService { const model = createVercelModel(config, { sessionId, @@ -356,6 +359,7 @@ export function createLLMService( sessionId, resourceManager, logger, - compactionStrategy + compactionStrategy, + sessionCompactionPersistence ); } diff --git a/packages/core/src/llm/services/vercel.ts b/packages/core/src/llm/services/vercel.ts index 12308655e..bb12ae667 100644 --- a/packages/core/src/llm/services/vercel.ts +++ b/packages/core/src/llm/services/vercel.ts @@ -21,6 +21,7 @@ import type { ResourceManager } from '../../resources/index.js'; import { DextoRuntimeError } from '../../errors/DextoRuntimeError.js'; import { LLMErrorCode } from '../error-codes.js'; import type { ContentInput } from '../../agent/types.js'; +import type { SessionCompactionPersistence } from '../../session/compaction-service.js'; /** * Vercel AI SDK implementation of LLMService @@ -54,6 +55,7 @@ export class VercelLLMService { | import('../../context/compaction/types.js').CompactionStrategy | null; private modelLimits?: ModelLimits; + private sessionCompactionPersistence: SessionCompactionPersistence | undefined; /** * Helper to extract model ID from LanguageModel union type (string | LanguageModelV2) @@ -72,7 +74,8 @@ export class VercelLLMService { sessionId: string, resourceManager: ResourceManager, logger: Logger, - compactionStrategy?: import('../../context/compaction/types.js').CompactionStrategy | null + compactionStrategy?: import('../../context/compaction/types.js').CompactionStrategy | null, + sessionCompactionPersistence?: SessionCompactionPersistence ) { this.logger = logger.createChild(DextoLogComponent.LLM); this.model = model; @@ -82,6 +85,7 @@ export class VercelLLMService { this.sessionId = sessionId; this.resourceManager = resourceManager; this.compactionStrategy = compactionStrategy ?? null; + this.sessionCompactionPersistence = sessionCompactionPersistence; // Create session-level message queue for mid-task user messages this.messageQueue = new MessageQueueService(this.sessionEventBus, this.logger); @@ -145,7 +149,8 @@ export class VercelLLMService { this.messageQueue, this.modelLimits, externalSignal, - this.compactionStrategy + this.compactionStrategy, + this.sessionCompactionPersistence ); } diff --git a/packages/core/src/session/chat-session.test.ts b/packages/core/src/session/chat-session.test.ts index 22f2dde1f..3c36046ed 100644 --- a/packages/core/src/session/chat-session.test.ts +++ b/packages/core/src/session/chat-session.test.ts @@ -283,7 +283,8 @@ describe('ChatSession', () => { sessionId, mockServices.resourceManager, mockLogger, - null // compaction strategy + null, // compaction strategy + mockServices.sessionManager ); }); @@ -309,7 +310,8 @@ describe('ChatSession', () => { sessionId, mockServices.resourceManager, mockLogger, - null // compaction strategy + null, // compaction strategy + mockServices.sessionManager ); }); @@ -426,7 +428,8 @@ describe('ChatSession', () => { sessionId, mockServices.resourceManager, // ResourceManager parameter mockLogger, // Logger parameter - null // compaction strategy + null, // compaction strategy + mockServices.sessionManager ); // Verify session-specific history provider creation diff --git a/packages/core/src/session/chat-session.ts b/packages/core/src/session/chat-session.ts index f42247f48..89087e05c 100644 --- a/packages/core/src/session/chat-session.ts +++ b/packages/core/src/session/chat-session.ts @@ -289,7 +289,8 @@ export class ChatSession { this.id, this.services.resourceManager, // Pass ResourceManager for blob storage this.logger, // Pass logger for dependency injection - compactionStrategy // Pass compaction strategy + compactionStrategy, // Pass compaction strategy + this.services.sessionManager ); this.logger.debug(`ChatSession ${this.id}: Services initialized with storage`); @@ -676,7 +677,8 @@ export class ChatSession { this.id, this.services.resourceManager, this.logger, - compactionStrategy // Pass compaction strategy + compactionStrategy, // Pass compaction strategy + this.services.sessionManager ); // Replace the LLM service diff --git a/packages/core/src/session/compaction-service.ts b/packages/core/src/session/compaction-service.ts new file mode 100644 index 000000000..bd4b91932 --- /dev/null +++ b/packages/core/src/session/compaction-service.ts @@ -0,0 +1,510 @@ +import { randomUUID } from 'crypto'; +import type { LanguageModel } from 'ai'; +import { estimateContextTokens } from '../context/utils.js'; +import type { ContentPart, InternalMessage } from '../context/types.js'; +import type { Logger } from '../logger/v2/types.js'; +import type { AgentEventBus, SessionEventBus } from '../events/index.js'; +import type { DynamicContributorContext } from '../systemPrompt/types.js'; +import type { ToolSet } from '../tools/types.js'; +import type { CompactionStrategy } from '../context/compaction/types.js'; +import { buildCompactionWindow } from '../context/compaction/window.js'; +import type { + SessionCompactionMode, + SessionCompactionRecord, + SessionCompactionTrigger, +} from './compaction.js'; +import { SessionCompactionError } from './errors.js'; + +interface SessionCompactionContextManager { + getHistory(): Promise; + addMessage(message: InternalMessage): Promise; + resetActualTokenTracking(): void; + getSystemPrompt(contributorContext: DynamicContributorContext): Promise; + getContextTokenEstimate( + contributorContext: DynamicContributorContext, + tools: ToolSet + ): Promise<{ + estimated: number; + stats: { + filteredMessageCount: number; + }; + }>; +} + +export interface SessionCompactionPersistence { + createSeededChildSession( + parentSessionId: string, + options: { + initialMessages: readonly InternalMessage[]; + title?: string; + } + ): Promise<{ + id: string; + }>; + deleteSession(sessionId: string): Promise; + deleteSessionCompaction(compactionId: string): Promise; + saveSessionCompaction(compaction: SessionCompactionRecord): Promise; +} + +export interface SessionCompactionEventSink { + emitCompacting(payload: { estimatedTokens: number }): void; + emitCompacted(payload: { + originalTokens: number; + compactedTokens: number; + originalMessages: number; + compactedMessages: number; + strategy: string; + reason: 'overflow' | 'manual'; + compactionId?: string; + mode?: SessionCompactionMode; + targetSessionId?: string; + }): void; +} + +export interface RunSessionCompactionInput { + sessionId: string; + mode: SessionCompactionMode; + trigger: SessionCompactionTrigger; + childTitle?: string; + languageModel: LanguageModel; + logger: Logger; + contextManager: SessionCompactionContextManager; + compactionStrategy: CompactionStrategy; + contributorContext: DynamicContributorContext; + tools: ToolSet; + persistence: SessionCompactionPersistence; + eventSink: SessionCompactionEventSink; + originalTokensOverride?: number; +} + +export function createAgentSessionCompactionEventSink( + eventBus: AgentEventBus, + sessionId: string +): SessionCompactionEventSink { + return { + emitCompacting: (payload) => { + eventBus.emit('context:compacting', { + ...payload, + sessionId, + }); + }, + emitCompacted: (payload) => { + eventBus.emit('context:compacted', { + ...payload, + sessionId, + }); + }, + }; +} + +export function createSessionCompactionEventSink( + eventBus: SessionEventBus +): SessionCompactionEventSink { + return { + emitCompacting: (payload) => { + eventBus.emit('context:compacting', payload); + }, + emitCompacted: (payload) => { + eventBus.emit('context:compacted', payload); + }, + }; +} + +export async function runSessionCompaction( + input: RunSessionCompactionInput +): Promise { + const history = await input.contextManager.getHistory(); + const compactionWindow = buildCompactionWindow(history); + + const beforeEstimate = await input.contextManager.getContextTokenEstimate( + input.contributorContext, + input.tools + ); + const originalTokens = input.originalTokensOverride ?? beforeEstimate.estimated; + const originalMessages = beforeEstimate.stats.filteredMessageCount; + + input.eventSink.emitCompacting({ + estimatedTokens: originalTokens, + }); + + const compactionResult = await input.compactionStrategy.compact(compactionWindow, { + sessionId: input.sessionId, + model: input.languageModel, + logger: input.logger, + }); + + if (!compactionResult || compactionResult.summaryMessages.length === 0) { + input.logger.debug( + `Compaction skipped for session ${input.sessionId} - nothing to compact` + ); + input.eventSink.emitCompacted({ + originalTokens, + compactedTokens: originalTokens, + originalMessages, + compactedMessages: originalMessages, + strategy: input.compactionStrategy.name, + reason: toCompactionReason(input.trigger), + }); + return null; + } + + const preserveFromWorkingIndex = resolveWorkingBoundary( + compactionResult, + compactionWindow.workingHistory.length, + input.compactionStrategy.name + ); + const preservedSourceMessages = compactionWindow.workingHistory.slice(preserveFromWorkingIndex); + const preservedWorkingMessages = preservedSourceMessages.map(normalizeCompactionMessage); + const preservedMessageIds = resolvePreservedMessageIds( + input.mode === 'continue-in-place' ? preservedSourceMessages : preservedWorkingMessages, + input.compactionStrategy.name + ); + const summaryMessages = compactionResult.summaryMessages.map((summaryMessage) => + normalizeSummaryMessage( + summaryMessage, + preservedMessageIds, + !!compactionWindow.latestSummary + ) + ); + const continuationMessages = [ + ...summaryMessages.map(cloneCompactionMessage), + ...preservedWorkingMessages.map(cloneCompactionMessage), + ]; + + const systemPrompt = await input.contextManager.getSystemPrompt(input.contributorContext); + const compactedTokens = estimateContextTokens( + systemPrompt, + continuationMessages, + input.tools + ).total; + const compactedMessages = continuationMessages.length; + + const baseCompaction = { + id: randomUUID(), + sourceSessionId: input.sessionId, + createdAt: Date.now(), + strategy: input.compactionStrategy.name, + mode: input.mode, + trigger: input.trigger, + originalTokens, + compactedTokens, + originalMessages, + compactedMessages, + summaryMessages, + continuationMessages, + } satisfies Omit; + + let compaction: SessionCompactionRecord; + if (input.mode === 'continue-in-child') { + const childSession = await input.persistence.createSeededChildSession(input.sessionId, { + initialMessages: continuationMessages, + ...(input.childTitle !== undefined && { title: input.childTitle }), + }); + compaction = { + ...baseCompaction, + targetSessionId: childSession.id, + }; + try { + await input.persistence.saveSessionCompaction(compaction); + } catch (error) { + try { + await input.persistence.deleteSession(childSession.id); + input.logger.warn( + `Rolled back child session ${childSession.id} after compaction persistence failure` + ); + } catch (rollbackError) { + input.logger.error( + `Failed to roll back child session ${childSession.id} after compaction persistence failure: ${ + rollbackError instanceof Error + ? rollbackError.message + : String(rollbackError) + }` + ); + } + throw error; + } + } else { + compaction = baseCompaction; + await input.persistence.saveSessionCompaction(compaction); + } + + if (input.mode === 'continue-in-place') { + try { + for (const summary of summaryMessages) { + await input.contextManager.addMessage(cloneCompactionMessage(summary)); + } + } catch (error) { + try { + await input.persistence.deleteSessionCompaction(compaction.id); + input.logger.warn( + `Rolled back compaction artifact ${compaction.id} after in-place compaction apply failure` + ); + } catch (rollbackError) { + input.logger.error( + `Failed to roll back compaction artifact ${compaction.id} after in-place compaction apply failure: ${ + rollbackError instanceof Error + ? rollbackError.message + : String(rollbackError) + }` + ); + } + throw error; + } + + // The formula (lastInput + lastOutput + newEstimate) is no longer valid after compaction. + input.contextManager.resetActualTokenTracking(); + } + + input.eventSink.emitCompacted({ + originalTokens, + compactedTokens, + originalMessages, + compactedMessages, + strategy: input.compactionStrategy.name, + reason: toCompactionReason(input.trigger), + compactionId: compaction.id, + mode: input.mode, + ...(compaction.targetSessionId !== undefined && { + targetSessionId: compaction.targetSessionId, + }), + }); + + input.logger.info( + `Compaction complete for session ${input.sessionId}: ` + + `${originalMessages} messages → ${compactedMessages} messages (~${compactedTokens} tokens) [mode=${input.mode}]` + ); + + return compaction; +} + +function normalizeCompactionMessage(message: InternalMessage): InternalMessage { + const normalized = cloneCompactionMessage(message); + if (!normalized.id) { + normalized.id = randomUUID(); + } + if (!normalized.timestamp) { + normalized.timestamp = Date.now(); + } + return normalized; +} + +function normalizeSummaryMessage( + message: InternalMessage, + preservedMessageIds: readonly string[], + isRecompaction: boolean +): InternalMessage { + const normalized = normalizeCompactionMessage(message); + normalized.metadata = { + ...(normalized.metadata ?? {}), + isSummary: normalized.metadata?.isSummary ?? true, + preservedMessageIds: [...preservedMessageIds], + ...(isRecompaction && normalized.metadata?.isRecompaction !== true + ? { isRecompaction: true } + : {}), + }; + if (normalized.metadata) { + delete normalized.metadata.originalMessageCount; + } + + return normalized; +} + +function cloneCompactionMessage(message: InternalMessage): InternalMessage { + const base = { + role: message.role, + ...(message.id !== undefined && { id: message.id }), + ...(message.timestamp !== undefined && { timestamp: message.timestamp }), + ...(message.metadata !== undefined && { metadata: structuredClone(message.metadata) }), + }; + + switch (message.role) { + case 'system': + return { + ...base, + role: 'system', + content: message.content.map(cloneContentPart), + }; + case 'user': + return { + ...base, + role: 'user', + content: message.content.map(cloneContentPart), + }; + case 'assistant': + return { + ...base, + role: 'assistant', + content: message.content?.map(cloneContentPart) ?? null, + ...(message.reasoning !== undefined && { reasoning: message.reasoning }), + ...(message.reasoningMetadata !== undefined && { + reasoningMetadata: structuredClone(message.reasoningMetadata), + }), + ...(message.tokenUsage !== undefined && { + tokenUsage: structuredClone(message.tokenUsage), + }), + ...(message.estimatedCost !== undefined && { + estimatedCost: message.estimatedCost, + }), + ...(message.pricingStatus !== undefined && { + pricingStatus: message.pricingStatus, + }), + ...(message.usageScopeId !== undefined && { + usageScopeId: message.usageScopeId, + }), + ...(message.model !== undefined && { model: message.model }), + ...(message.provider !== undefined && { provider: message.provider }), + ...(message.toolCalls !== undefined && { + toolCalls: message.toolCalls.map((toolCall) => ({ + id: toolCall.id, + type: toolCall.type, + function: { + name: toolCall.function.name, + arguments: toolCall.function.arguments, + }, + ...(toolCall.providerOptions !== undefined && { + providerOptions: structuredClone(toolCall.providerOptions), + }), + })), + }), + }; + case 'tool': + return { + ...base, + role: 'tool', + content: message.content.map(cloneContentPart), + toolCallId: message.toolCallId, + name: message.name, + ...(message.presentationSnapshot !== undefined && { + presentationSnapshot: structuredClone(message.presentationSnapshot), + }), + ...(message.success !== undefined && { success: message.success }), + ...(message.requireApproval !== undefined && { + requireApproval: message.requireApproval, + }), + ...(message.approvalStatus !== undefined && { + approvalStatus: message.approvalStatus, + }), + ...(message.compactedAt !== undefined && { + compactedAt: message.compactedAt, + }), + ...(message.displayData !== undefined && { + displayData: structuredClone(message.displayData), + }), + }; + } +} + +function cloneContentPart(messagePart: ContentPart): ContentPart { + switch (messagePart.type) { + case 'text': + return { + type: 'text', + text: messagePart.text, + }; + case 'image': + return { + type: 'image', + image: cloneBinaryPayload(messagePart.image), + ...(messagePart.mimeType !== undefined && { + mimeType: messagePart.mimeType, + }), + }; + case 'file': + return { + type: 'file', + data: cloneBinaryPayload(messagePart.data), + mimeType: messagePart.mimeType, + ...(messagePart.filename !== undefined && { + filename: messagePart.filename, + }), + }; + case 'ui-resource': + return { + type: 'ui-resource', + uri: messagePart.uri, + mimeType: messagePart.mimeType, + ...(messagePart.content !== undefined && { + content: messagePart.content, + }), + ...(messagePart.blob !== undefined && { + blob: messagePart.blob, + }), + ...(messagePart.metadata !== undefined && { + metadata: structuredClone(messagePart.metadata), + }), + }; + } +} + +function cloneBinaryPayload( + value: string | Uint8Array | Buffer | ArrayBuffer | URL +): string | Uint8Array | Buffer | ArrayBuffer { + if (typeof value === 'string') { + return value; + } + + if (value instanceof URL) { + return value.toString(); + } + + if (Buffer.isBuffer(value)) { + return Buffer.from(value); + } + + if (value instanceof Uint8Array) { + return new Uint8Array(value); + } + + return value.slice(0); +} + +function resolveWorkingBoundary( + result: { + summaryMessages: readonly InternalMessage[]; + preserveFromWorkingIndex: number; + }, + workingHistoryLength: number, + strategyName: string +): number { + if (result.summaryMessages.length !== 1) { + throw SessionCompactionError.invalidSummaryCount( + strategyName, + result.summaryMessages.length + ); + } + + const preserveFromWorkingIndex = result.preserveFromWorkingIndex; + if ( + typeof preserveFromWorkingIndex !== 'number' || + !Number.isInteger(preserveFromWorkingIndex) || + preserveFromWorkingIndex < 0 || + preserveFromWorkingIndex > workingHistoryLength + ) { + throw SessionCompactionError.invalidPreserveFromWorkingIndex( + strategyName, + preserveFromWorkingIndex, + workingHistoryLength + ); + } + + return preserveFromWorkingIndex; +} + +function resolvePreservedMessageIds( + preservedMessages: readonly InternalMessage[], + strategyName: string +): string[] { + const preservedMessageIds: string[] = []; + for (const message of preservedMessages) { + if (!message.id) { + throw SessionCompactionError.preservedMessageMissingId(strategyName); + } + preservedMessageIds.push(message.id); + } + + return preservedMessageIds; +} + +function toCompactionReason(trigger: SessionCompactionTrigger): 'overflow' | 'manual' { + return trigger === 'overflow' ? 'overflow' : 'manual'; +} diff --git a/packages/core/src/session/compaction.ts b/packages/core/src/session/compaction.ts new file mode 100644 index 000000000..326f8b368 --- /dev/null +++ b/packages/core/src/session/compaction.ts @@ -0,0 +1,36 @@ +import type { InternalMessage } from '../context/types.js'; + +export const SESSION_COMPACTION_MODES = [ + 'artifact-only', + 'continue-in-place', + 'continue-in-child', +] as const; + +export type SessionCompactionMode = (typeof SESSION_COMPACTION_MODES)[number]; + +export const SESSION_COMPACTION_TRIGGERS = ['manual', 'api', 'scheduled', 'overflow'] as const; + +export type SessionCompactionTrigger = (typeof SESSION_COMPACTION_TRIGGERS)[number]; + +export interface SessionCompactionInput { + sessionId: string; + mode?: SessionCompactionMode; + trigger?: SessionCompactionTrigger; + childTitle?: string; +} + +export interface SessionCompactionRecord { + id: string; + sourceSessionId: string; + targetSessionId?: string; + createdAt: number; + strategy: string; + mode: SessionCompactionMode; + trigger: SessionCompactionTrigger; + originalTokens: number; + compactedTokens: number; + originalMessages: number; + compactedMessages: number; + summaryMessages: InternalMessage[]; + continuationMessages: InternalMessage[]; +} diff --git a/packages/core/src/session/error-codes.ts b/packages/core/src/session/error-codes.ts index b0b48e458..06337e8ee 100644 --- a/packages/core/src/session/error-codes.ts +++ b/packages/core/src/session/error-codes.ts @@ -13,4 +13,7 @@ export enum SessionErrorCode { // Session operations SESSION_RESET_FAILED = 'session_reset_failed', + + // Session compaction + SESSION_COMPACTION_INVALID_OUTPUT = 'session_compaction_invalid_output', } diff --git a/packages/core/src/session/errors.ts b/packages/core/src/session/errors.ts index db67d50f6..937f81b5f 100644 --- a/packages/core/src/session/errors.ts +++ b/packages/core/src/session/errors.ts @@ -73,3 +73,51 @@ export class SessionError { ); } } + +/** + * Session compaction error factory for invalid strategy output and lifecycle invariants. + */ +export class SessionCompactionError { + static invalidSummaryCount(strategy: string, summaryMessageCount: number) { + return new DextoRuntimeError( + SessionErrorCode.SESSION_COMPACTION_INVALID_OUTPUT, + ErrorScope.SESSION, + ErrorType.SYSTEM, + `Compaction strategy '${strategy}' must return exactly one summary message for session-level compaction`, + { + strategy, + summaryMessageCount, + } + ); + } + + static invalidPreserveFromWorkingIndex( + strategy: string, + preserveFromWorkingIndex: unknown, + workingHistoryLength: number + ) { + return new DextoRuntimeError( + SessionErrorCode.SESSION_COMPACTION_INVALID_OUTPUT, + ErrorScope.SESSION, + ErrorType.SYSTEM, + `Compaction strategy '${strategy}' must provide a valid preserveFromWorkingIndex within the current working history bounds`, + { + strategy, + preserveFromWorkingIndex, + workingHistoryLength, + } + ); + } + + static preservedMessageMissingId(strategy: string) { + return new DextoRuntimeError( + SessionErrorCode.SESSION_COMPACTION_INVALID_OUTPUT, + ErrorScope.SESSION, + ErrorType.SYSTEM, + `Compaction strategy '${strategy}' produced continuation messages without stable IDs, so the preserved working-memory boundary could not be materialized`, + { + strategy, + } + ); + } +} diff --git a/packages/core/src/session/index.ts b/packages/core/src/session/index.ts index 3efd7297f..44a880539 100644 --- a/packages/core/src/session/index.ts +++ b/packages/core/src/session/index.ts @@ -1,8 +1,16 @@ export { ChatSession } from './chat-session.js'; export { SessionManager } from './session-manager.js'; export type { SessionMetadata, SessionLoggerFactory } from './session-manager.js'; +export { + SESSION_COMPACTION_MODES, + SESSION_COMPACTION_TRIGGERS, + type SessionCompactionInput, + type SessionCompactionMode, + type SessionCompactionRecord, + type SessionCompactionTrigger, +} from './compaction.js'; export { SessionErrorCode } from './error-codes.js'; -export { SessionError } from './errors.js'; +export { SessionError, SessionCompactionError } from './errors.js'; export { MessageQueueService } from './message-queue.js'; export type { UserMessageInput } from './message-queue.js'; export type { QueuedMessage, CoalescedMessage } from './types.js'; diff --git a/packages/core/src/session/session-manager.test.ts b/packages/core/src/session/session-manager.test.ts index 96edacbaf..a358d90d3 100644 --- a/packages/core/src/session/session-manager.test.ts +++ b/packages/core/src/session/session-manager.test.ts @@ -724,6 +724,75 @@ describe('SessionManager', () => { expect(mockStorageManager.database.delete).toHaveBeenCalledWith('session:non-existent'); }); + test('should delete compaction artifacts that reference the deleted session', async () => { + const sessionId = 'test-session'; + + await sessionManager.createSession(sessionId); + + mockStorageManager.database.list.mockResolvedValueOnce([ + 'session-compaction:source-hit', + 'session-compaction:target-hit', + 'session-compaction:miss', + ]); + mockStorageManager.database.get + .mockResolvedValueOnce({ + id: 'source-hit', + sourceSessionId: sessionId, + createdAt: Date.now(), + strategy: 'test', + mode: 'continue-in-place', + trigger: 'manual', + originalTokens: 10, + compactedTokens: 5, + originalMessages: 4, + compactedMessages: 2, + summaryMessages: [], + continuationMessages: [], + }) + .mockResolvedValueOnce({ + id: 'target-hit', + sourceSessionId: 'other-session', + targetSessionId: sessionId, + createdAt: Date.now(), + strategy: 'test', + mode: 'continue-in-child', + trigger: 'manual', + originalTokens: 10, + compactedTokens: 5, + originalMessages: 4, + compactedMessages: 2, + summaryMessages: [], + continuationMessages: [], + }) + .mockResolvedValueOnce({ + id: 'miss', + sourceSessionId: 'other-session', + createdAt: Date.now(), + strategy: 'test', + mode: 'artifact-only', + trigger: 'manual', + originalTokens: 10, + compactedTokens: 5, + originalMessages: 4, + compactedMessages: 2, + summaryMessages: [], + continuationMessages: [], + }); + + await sessionManager.deleteSession(sessionId); + + expect(mockStorageManager.database.list).toHaveBeenCalledWith('session-compaction:'); + expect(mockStorageManager.database.delete).toHaveBeenCalledWith( + 'session-compaction:source-hit' + ); + expect(mockStorageManager.database.delete).toHaveBeenCalledWith( + 'session-compaction:target-hit' + ); + expect(mockStorageManager.database.delete).not.toHaveBeenCalledWith( + 'session-compaction:miss' + ); + }); + test('should cleanup all sessions during shutdown', async () => { // Create multiple sessions const sessions = [ diff --git a/packages/core/src/session/session-manager.ts b/packages/core/src/session/session-manager.ts index 6dd6b9f04..dfe32fbd5 100644 --- a/packages/core/src/session/session-manager.ts +++ b/packages/core/src/session/session-manager.ts @@ -12,6 +12,8 @@ import type { HookManager } from '../hooks/manager.js'; import { SessionError } from './errors.js'; import type { TokenUsage } from '../llm/types.js'; import type { CompactionStrategy } from '../context/compaction/types.js'; +import type { InternalMessage } from '../context/types.js'; +import type { SessionCompactionRecord } from './compaction.js'; export type SessionLoggerFactory = (options: { baseLogger: Logger; agentId: string; @@ -90,6 +92,13 @@ export interface SessionData { llmOverride?: PersistedLLMConfig; } +interface CreateDerivedSessionOptions { + parentSessionId: string; + title: string; + messageCount: number; + populateMessages: (childMessagesKey: string) => Promise; +} + /** * Manages multiple chat sessions within a Dexto agent. * @@ -122,6 +131,7 @@ export class SessionManager { private static readonly FORK_ID_GENERATION_MAX_ATTEMPTS = 5; private static readonly FORK_TITLE_PREFIX = 'Fork: '; private static readonly FORK_PARENT_ID_PREVIEW_LENGTH = 8; + private static readonly SESSION_COMPACTION_KEY_PREFIX = 'session-compaction:'; private readonly sessionLoggerFactory: SessionLoggerFactory; @@ -276,35 +286,158 @@ export class SessionManager { public async forkSession(parentSessionId: string): Promise { await this.ensureInitialized(); - const database = this.services.storageManager.getDatabase(); - const cache = this.services.storageManager.getCache(); - const parentSessionKey = `session:${parentSessionId}`; + const parentSessionData = await this.getRequiredSessionData(parentSessionId); const parentMessagesKey = `messages:${parentSessionId}`; + const childSession = await this.createDerivedSession({ + parentSessionId, + title: this.buildDerivedTitle(parentSessionData, parentSessionId, { + prefix: SessionManager.FORK_TITLE_PREFIX, + }), + messageCount: parentSessionData.messageCount, + populateMessages: async (childMessagesKey) => { + await this.copySessionHistory(parentMessagesKey, childMessagesKey); + }, + }); + + this.logger.info(`Forked session '${parentSessionId}' into child '${childSession.id}'`); + return childSession; + } + + /** + * Create a child session seeded with continuation messages from the parent. + * This is used by structured session compaction to start a fresh child + * session from compacted working memory rather than a full raw history clone. + */ + public async createSeededChildSession( + parentSessionId: string, + options: { + initialMessages: readonly InternalMessage[]; + title?: string; + } + ): Promise { + await this.ensureInitialized(); + + const parentSessionData = await this.getRequiredSessionData(parentSessionId); + const defaultTitle = this.resolveParentTitle(parentSessionData, parentSessionId); + const childTitle = options.title?.trim() || defaultTitle; + const normalizedMessages = options.initialMessages.map((message) => + structuredClone(message) + ); + + const childSession = await this.createDerivedSession({ + parentSessionId, + title: childTitle, + messageCount: normalizedMessages.length, + populateMessages: async (childMessagesKey) => { + await this.appendMessages(childMessagesKey, normalizedMessages); + }, + }); + + this.logger.info( + `Created seeded child session '${childSession.id}' from parent '${parentSessionId}'` + ); + return childSession; + } + + public async saveSessionCompaction(compaction: SessionCompactionRecord): Promise { + await this.ensureInitialized(); + const key = `${SessionManager.SESSION_COMPACTION_KEY_PREFIX}${compaction.id}`; + await this.services.storageManager.getDatabase().set(key, compaction); + } + + public async deleteSessionCompaction(compactionId: string): Promise { + await this.ensureInitialized(); + const key = `${SessionManager.SESSION_COMPACTION_KEY_PREFIX}${compactionId}`; + await this.services.storageManager.getDatabase().delete(key); + } + + public async getSessionCompaction( + compactionId: string + ): Promise { + await this.ensureInitialized(); + const key = `${SessionManager.SESSION_COMPACTION_KEY_PREFIX}${compactionId}`; + return await this.services.storageManager.getDatabase().get(key); + } + + private async deleteCompactionsForSession(sessionId: string): Promise { + const database = this.services.storageManager.getDatabase(); + const compactionKeys = await database.list(SessionManager.SESSION_COMPACTION_KEY_PREFIX); + let deletedCount = 0; + + for (const key of compactionKeys) { + const compaction = await database.get(key); + if (!compaction) { + continue; + } - const parentSessionData = await database.get(parentSessionKey); + if ( + compaction.sourceSessionId !== sessionId && + compaction.targetSessionId !== sessionId + ) { + continue; + } + + await database.delete(key); + deletedCount += 1; + } + + return deletedCount; + } + + private resolveParentTitle(parentSessionData: SessionData, parentSessionId: string): string { + const rawParentTitle = parentSessionData.metadata?.title; + const parentTitle = typeof rawParentTitle === 'string' ? rawParentTitle.trim() : ''; + return parentTitle.length > 0 + ? parentTitle + : parentSessionId.slice(0, SessionManager.FORK_PARENT_ID_PREVIEW_LENGTH); + } + + private buildDerivedTitle( + parentSessionData: SessionData, + parentSessionId: string, + options: { + prefix?: string; + } = {} + ): string { + const prefix = options.prefix ?? ''; + const baseTitle = this.resolveParentTitle(parentSessionData, parentSessionId); + + if (!prefix) { + return baseTitle; + } + + return baseTitle.startsWith(prefix) ? baseTitle : `${prefix}${baseTitle}`.trim(); + } + + private async getRequiredSessionData(parentSessionId: string): Promise { + const parentSessionData = await this.services.storageManager + .getDatabase() + .get(`session:${parentSessionId}`); if (!parentSessionData) { throw SessionError.notFound(parentSessionId); } + return parentSessionData; + } - const activeSessionKeys = await database.list('session:'); - if (activeSessionKeys.length >= this.maxSessions) { - throw SessionError.maxSessionsExceeded(activeSessionKeys.length, this.maxSessions); - } + private async createDerivedSession(options: CreateDerivedSessionOptions): Promise { + const parentSessionData = await this.getRequiredSessionData(options.parentSessionId); + await this.ensureSessionCapacity(); - const childSessionId = await this.generateForkSessionId(); + const database = this.services.storageManager.getDatabase(); + const cache = this.services.storageManager.getCache(); + const childSessionId = await this.generateChildSessionId(); const childSessionKey = `session:${childSessionId}`; const childMessagesKey = `messages:${childSessionId}`; const now = Date.now(); - const childTitle = this.buildForkTitle(parentSessionData, parentSessionId); const childSessionData: SessionData = { id: childSessionId, createdAt: now, lastActivity: now, - messageCount: parentSessionData.messageCount, - parentSessionId, + messageCount: options.messageCount, + parentSessionId: options.parentSessionId, metadata: { - title: childTitle, + title: options.title, }, ...(parentSessionData.workspaceId !== undefined && { workspaceId: parentSessionData.workspaceId, @@ -316,13 +449,10 @@ export class SessionManager { try { await database.set(childSessionKey, childSessionData); - await this.copySessionHistory(parentMessagesKey, childMessagesKey); + await options.populateMessages(childMessagesKey); - const childSession = await this.createSession(childSessionId); - this.logger.info(`Forked session '${parentSessionId}' into child '${childSessionId}'`); - return childSession; + return await this.createSession(childSessionId); } catch (error) { - // Best-effort rollback for partially created fork state. await Promise.allSettled([ database.delete(childSessionKey), database.delete(childMessagesKey), @@ -343,22 +473,14 @@ export class SessionManager { } } - private buildForkTitle(parentSessionData: SessionData, parentSessionId: string): string { - const rawParentTitle = parentSessionData.metadata?.title; - const parentTitle = typeof rawParentTitle === 'string' ? rawParentTitle.trim() : ''; - const prefix = SessionManager.FORK_TITLE_PREFIX; - - const baseTitle = - parentTitle.length > 0 - ? parentTitle.startsWith(prefix) - ? parentTitle.slice(prefix.length).trim() || parentTitle - : parentTitle - : parentSessionId.slice(0, SessionManager.FORK_PARENT_ID_PREVIEW_LENGTH); - - return `${prefix}${baseTitle}`; + private async ensureSessionCapacity(): Promise { + const activeSessionKeys = await this.services.storageManager.getDatabase().list('session:'); + if (activeSessionKeys.length >= this.maxSessions) { + throw SessionError.maxSessionsExceeded(activeSessionKeys.length, this.maxSessions); + } } - private async generateForkSessionId(): Promise { + private async generateChildSessionId(): Promise { const database = this.services.storageManager.getDatabase(); for (let attempt = 0; attempt < SessionManager.FORK_ID_GENERATION_MAX_ATTEMPTS; attempt++) { @@ -409,6 +531,17 @@ export class SessionManager { } } + private async appendMessages( + childMessagesKey: string, + messages: readonly InternalMessage[] + ): Promise { + const database = this.services.storageManager.getDatabase(); + + for (const message of messages) { + await database.append(childMessagesKey, structuredClone(message)); + } + } + /** * Internal method that handles the actual session creation logic. * This method implements atomic session creation to prevent race conditions. @@ -657,8 +790,14 @@ export class SessionManager { const messagesKey = `messages:${sessionId}`; await this.services.storageManager.getDatabase().delete(messagesKey); + const deletedCompactionCount = await this.deleteCompactionsForSession(sessionId); - this.logger.debug(`Deleted session and conversation history: ${sessionId}`); + this.logger.debug( + `Deleted session and conversation history: ${sessionId}` + + (deletedCompactionCount > 0 + ? ` (removed ${deletedCompactionCount} compaction artifact${deletedCompactionCount === 1 ? '' : 's'})` + : '') + ); } /** diff --git a/packages/server/src/hono/__tests__/api.integration.test.ts b/packages/server/src/hono/__tests__/api.integration.test.ts index 0695d2f4f..290277b25 100644 --- a/packages/server/src/hono/__tests__/api.integration.test.ts +++ b/packages/server/src/hono/__tests__/api.integration.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest'; import { TextDecoder } from 'node:util'; import type { StreamingEvent } from '@dexto/core'; +import type { CompactionStrategy } from '@dexto/core'; import { createTestAgent, startTestServer, @@ -10,11 +11,35 @@ import { validators, } from './test-fixtures.js'; +const testCompactionStrategy: CompactionStrategy = { + name: 'test-api-compaction', + getSettings: () => ({ + enabled: true, + thresholdPercent: 0.9, + }), + getModelLimits: (modelContextWindow: number) => ({ + contextWindow: modelContextWindow, + }), + shouldCompact: () => false, + compact: async () => ({ + summaryMessages: [ + { + role: 'assistant', + content: [{ type: 'text', text: 'Compacted summary' }], + timestamp: Date.now(), + }, + ], + preserveFromWorkingIndex: 2, + }), +}; + describe('Hono API Integration Tests', () => { let testServer: TestServer | undefined; beforeAll(async () => { - const agent = await createTestAgent(); + const agent = await createTestAgent(undefined, { + compaction: testCompactionStrategy, + }); testServer = await startTestServer(agent); }, 30000); // 30 second timeout for server startup @@ -567,6 +592,356 @@ describe('Hono API Integration Tests', () => { expect(res.status).toBe(404); }); + it('POST /api/sessions/:id/compact can seed a continuation child session', async () => { + if (!testServer) throw new Error('Test server not initialized'); + const sessionId = 'test-session-compact-child'; + + await httpRequest(testServer.baseUrl, 'POST', '/api/sessions', { + sessionId, + }); + + const session = await testServer.agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const contextManager = session.getContextManager(); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'old request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'old response' }], + }); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'keep request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'keep response' }], + }); + + const compactRes = await httpRequest( + testServer.baseUrl, + 'POST', + `/api/sessions/${sessionId}/compact`, + { + mode: 'continue-in-child', + } + ); + + expect(compactRes.status).toBe(200); + const compaction = ( + compactRes.body as { + compaction: { + id: string; + mode: string; + sourceSessionId: string; + targetSessionId: string | null; + summaryMessages: Array<{ + metadata?: { + isSummary?: boolean; + preservedMessageIds?: string[]; + }; + }>; + continuationMessages: unknown[]; + } | null; + } + ).compaction; + expect(compaction).not.toBeNull(); + expect(compaction?.mode).toBe('continue-in-child'); + expect(compaction?.sourceSessionId).toBe(sessionId); + expect(compaction?.targetSessionId).toBeTruthy(); + expect(compaction?.summaryMessages[0]?.metadata?.isSummary).toBe(true); + expect(compaction?.summaryMessages[0]?.metadata?.preservedMessageIds).toHaveLength(2); + expect(compaction?.continuationMessages.length).toBeGreaterThan(0); + + const getCompactionRes = await httpRequest( + testServer.baseUrl, + 'GET', + `/api/sessions/compactions/${compaction!.id}` + ); + expect(getCompactionRes.status).toBe(200); + expect( + ( + getCompactionRes.body as { + compaction: { id: string; targetSessionId: string | null }; + } + ).compaction.id + ).toBe(compaction?.id); + + const childSessionId = compaction?.targetSessionId; + if (!childSessionId) { + throw new Error('Expected continuation child session ID'); + } + + const childDetailsRes = await httpRequest( + testServer.baseUrl, + 'GET', + `/api/sessions/${childSessionId}` + ); + expect(childDetailsRes.status).toBe(200); + expect( + ( + childDetailsRes.body as { + session: { parentSessionId: string | null }; + } + ).session.parentSessionId + ).toBe(sessionId); + }); + + it('POST /api/sessions/:id/compact rejects invalid request bodies without compacting the session', async () => { + if (!testServer) throw new Error('Test server not initialized'); + const sessionId = 'test-session-compact-invalid-body'; + await httpRequest(testServer.baseUrl, 'POST', '/api/sessions', { + sessionId, + }); + + const session = await testServer.agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const contextManager = session.getContextManager(); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'old request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'old response' }], + }); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'keep request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'keep response' }], + }); + + const compactRes = await httpRequest( + testServer.baseUrl, + 'POST', + `/api/sessions/${sessionId}/compact`, + { + mode: 'bogus', + } + ); + + expect(compactRes.status).toBeGreaterThanOrEqual(400); + + const history = await testServer.agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(history.some((message) => message.metadata?.isSummary === true)).toBe(false); + }); + + it('POST /api/sessions/:id/compact rejects childTitle outside continue-in-child mode', async () => { + if (!testServer) throw new Error('Test server not initialized'); + const sessionId = 'test-session-compact-invalid-child-title'; + await httpRequest(testServer.baseUrl, 'POST', '/api/sessions', { + sessionId, + }); + + const session = await testServer.agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const contextManager = session.getContextManager(); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'old request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'old response' }], + }); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'keep request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'keep response' }], + }); + + const compactRes = await httpRequest( + testServer.baseUrl, + 'POST', + `/api/sessions/${sessionId}/compact`, + { + childTitle: 'Should be rejected', + } + ); + + expect(compactRes.status).toBe(400); + + const history = await testServer.agent.getSessionHistory(sessionId); + expect(history).toHaveLength(4); + expect(history.some((message) => message.metadata?.isSummary === true)).toBe(false); + }); + + it('POST /api/sessions/:id/compact rejects malformed JSON bodies', async () => { + if (!testServer) throw new Error('Test server not initialized'); + const sessionId = 'test-session-compact-malformed-json'; + await httpRequest(testServer.baseUrl, 'POST', '/api/sessions', { + sessionId, + }); + + const response = await fetch( + `${testServer.baseUrl}/api/sessions/${sessionId}/compact`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: '{"mode":"continue-in-child"', + } + ); + + expect(response.status).toBe(400); + }); + + it('GET /api/sessions/compactions/:id returns 404 when artifact is missing', async () => { + if (!testServer) throw new Error('Test server not initialized'); + const res = await httpRequest( + testServer.baseUrl, + 'GET', + '/api/sessions/compactions/missing-compaction' + ); + expect(res.status).toBe(404); + expect(res.body).toMatchObject({ + code: 'compaction_not_found', + message: 'Compaction artifact not found', + scope: 'agent', + type: 'not_found', + endpoint: '/api/sessions/compactions/missing-compaction', + method: 'GET', + }); + }); + + it('POST /api/sessions/:id/compact serializes multimodal artifacts to JSON-safe strings', async () => { + const multimodalCompactionStrategy: CompactionStrategy = { + name: 'test-api-multimodal-compaction', + getSettings: () => ({ + enabled: true, + thresholdPercent: 0.9, + }), + getModelLimits: (modelContextWindow: number) => ({ + contextWindow: modelContextWindow, + }), + shouldCompact: () => false, + compact: async () => ({ + summaryMessages: [ + { + role: 'assistant', + content: [ + { + type: 'image', + image: Buffer.from('summary-image-bytes'), + mimeType: 'image/png', + }, + ], + }, + ], + preserveFromWorkingIndex: 2, + }), + }; + + const multimodalAgent = await createTestAgent(undefined, { + compaction: multimodalCompactionStrategy, + }); + const multimodalServer = await startTestServer(multimodalAgent); + + try { + const sessionId = 'test-session-compact-serialized'; + await httpRequest(multimodalServer.baseUrl, 'POST', '/api/sessions', { + sessionId, + }); + + const session = await multimodalServer.agent.getSession(sessionId); + if (!session) { + throw new Error(`Expected session '${sessionId}' to exist`); + } + + const contextManager = session.getContextManager(); + await contextManager.addMessage({ + role: 'user', + content: [{ type: 'text', text: 'old request' }], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'old response' }], + }); + await contextManager.addMessage({ + role: 'user', + content: [ + { + type: 'file', + data: new URL('https://example.com/file.pdf'), + mimeType: 'application/pdf', + filename: 'file.pdf', + }, + ], + }); + await contextManager.addMessage({ + role: 'assistant', + content: [{ type: 'text', text: 'keep response' }], + }); + + const compactRes = await httpRequest( + multimodalServer.baseUrl, + 'POST', + `/api/sessions/${sessionId}/compact`, + { + mode: 'artifact-only', + } + ); + + expect(compactRes.status).toBe(200); + + const compaction = ( + compactRes.body as { + compaction: { + summaryMessages: Array<{ + content: Array<{ + type: string; + image?: string; + mimeType?: string; + }>; + }>; + continuationMessages: Array<{ + content: Array<{ + type: string; + data?: string; + mimeType?: string; + filename?: string; + }>; + }>; + } | null; + } + ).compaction; + + expect(compaction).not.toBeNull(); + expect(compaction?.summaryMessages[0]?.content[0]).toMatchObject({ + type: 'image', + image: Buffer.from('summary-image-bytes').toString('base64'), + mimeType: 'image/png', + }); + expect(compaction?.continuationMessages[1]?.content[0]).toMatchObject({ + type: 'file', + data: 'https://example.com/file.pdf', + mimeType: 'application/pdf', + filename: 'file.pdf', + }); + } finally { + await multimodalServer.cleanup(); + } + }); + it('DELETE /api/sessions/:id deletes session', async () => { if (!testServer) throw new Error('Test server not initialized'); // Create session first diff --git a/packages/server/src/hono/__tests__/test-fixtures.ts b/packages/server/src/hono/__tests__/test-fixtures.ts index b4d4813c8..79365220c 100644 --- a/packages/server/src/hono/__tests__/test-fixtures.ts +++ b/packages/server/src/hono/__tests__/test-fixtures.ts @@ -6,7 +6,7 @@ import { } from '@dexto/agent-config'; import imageLocal from '@dexto/image-local'; import { DextoAgent, createAgentCard } from '@dexto/core'; -import type { AgentCard } from '@dexto/core'; +import type { AgentCard, DextoAgentOptions } from '@dexto/core'; import { randomUUID } from 'node:crypto'; import { promises as fs } from 'node:fs'; import type { Server as HttpServer } from 'node:http'; @@ -60,16 +60,23 @@ export function createTestAgentConfig(): AgentConfig { * Creates a real DextoAgent instance with in-memory storage * No mocks - uses real implementations */ -export async function createTestAgent(config?: AgentConfig): Promise { +export async function createTestAgent( + config?: AgentConfig, + options?: { + compaction?: DextoAgentOptions['compaction']; + } +): Promise { const agentConfig = config ?? createTestAgentConfig(); const validatedConfig = AgentConfigSchema.parse(agentConfig); const services = await resolveServicesFromConfig(validatedConfig, imageLocal); - const agent = new DextoAgent( - toDextoAgentOptions({ - config: validatedConfig, - services, - }) - ); + const agentOptions = toDextoAgentOptions({ + config: validatedConfig, + services, + }); + const agent = new DextoAgent({ + ...agentOptions, + ...(options?.compaction !== undefined ? { compaction: options.compaction } : {}), + }); await agent.start(); return agent; } diff --git a/packages/server/src/hono/middleware/error.ts b/packages/server/src/hono/middleware/error.ts index 1bf8fcfa3..58cae3fe9 100644 --- a/packages/server/src/hono/middleware/error.ts +++ b/packages/server/src/hono/middleware/error.ts @@ -84,12 +84,13 @@ export function handleHonoError(ctx: any, err: unknown) { ); } - // Some hono specific handlers (e.g., ctx.req.json()) may throw SyntaxError for invalid/empty JSON - if (err instanceof SyntaxError) { + // Some Hono handlers surface malformed JSON as SyntaxError, while the validator + // middleware currently throws a plain Error with this exact message. + if (err instanceof SyntaxError || isMalformedJsonRequestError(err)) { return ctx.json( { code: 'invalid_json', - message: err.message || 'Invalid JSON body', + message: err instanceof Error ? err.message : 'Invalid JSON body', scope: 'agent', type: 'user', severity: 'error', @@ -127,3 +128,7 @@ export function handleHonoError(ctx: any, err: unknown) { 500 ); } + +function isMalformedJsonRequestError(err: unknown): err is Error { + return err instanceof Error && err.message === 'Malformed JSON in request body'; +} diff --git a/packages/server/src/hono/routes/sessions.ts b/packages/server/src/hono/routes/sessions.ts index abb87aa81..36de177eb 100644 --- a/packages/server/src/hono/routes/sessions.ts +++ b/packages/server/src/hono/routes/sessions.ts @@ -1,13 +1,24 @@ import { OpenAPIHono, createRoute, z } from '@hono/zod-openapi'; import { + DextoRuntimeError, + ErrorType, getConfiguredUsageScopeId, + type ContentPart as CoreContentPart, + type InternalMessage as CoreInternalMessage, + type SessionCompactionRecord as CoreSessionCompactionRecord, type SessionMetadata as CoreSessionMetadata, + type ToolCall as CoreToolCall, } from '@dexto/core'; import { + type ContentPart as ApiContentPart, SessionMetadataSchema, + SessionCompactionModeSchema, + SessionCompactionSchema, InternalMessageSchema, + type InternalMessage as ApiInternalMessage, ScopedUsageSummarySchema, StandardErrorEnvelopeSchema, + type ToolCall as ApiToolCall, UsageSummarySchema, } from '../schemas/responses.js'; import type { GetAgentFn } from '../index.js'; @@ -18,6 +29,28 @@ const CreateSessionSchema = z }) .describe('Request body for creating a new session'); +const CompactSessionSchema = z + .object({ + mode: SessionCompactionModeSchema.optional().describe( + 'Whether to persist the artifact only, update the current session in place, or seed a new child session' + ), + childTitle: z + .string() + .optional() + .describe('Optional title for a continuation child session'), + }) + .strict() + .superRefine((value, ctx) => { + if (value.childTitle !== undefined && value.mode !== 'continue-in-child') { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ['childTitle'], + message: '`childTitle` is only supported when mode is "continue-in-child"', + }); + } + }) + .describe('Request body for compacting a session'); + function mapSessionMetadata( sessionId: string, metadata: CoreSessionMetadata | undefined, @@ -47,6 +80,113 @@ function mapSessionMetadata( }; } +function mapSessionCompaction( + compaction: CoreSessionCompactionRecord +): z.output { + return { + ...compaction, + targetSessionId: compaction.targetSessionId ?? null, + summaryMessages: compaction.summaryMessages.map(serializeInternalMessage), + continuationMessages: compaction.continuationMessages.map(serializeInternalMessage), + }; +} + +function serializeBinaryPayload(value: string | Uint8Array | Buffer | ArrayBuffer | URL): string { + if (typeof value === 'string') { + return value; + } + + if (value instanceof URL) { + return value.toString(); + } + + if (Buffer.isBuffer(value)) { + return value.toString('base64'); + } + + if (value instanceof Uint8Array) { + return Buffer.from(value).toString('base64'); + } + + return Buffer.from(new Uint8Array(value)).toString('base64'); +} + +function serializeContentPart(part: CoreContentPart): ApiContentPart { + switch (part.type) { + case 'text': + return { + type: 'text', + text: part.text, + }; + case 'image': + return { + type: 'image', + image: serializeBinaryPayload(part.image), + ...(part.mimeType !== undefined && { mimeType: part.mimeType }), + }; + case 'file': + return { + type: 'file', + data: serializeBinaryPayload(part.data), + mimeType: part.mimeType, + ...(part.filename !== undefined && { filename: part.filename }), + }; + case 'ui-resource': + return { + type: 'ui-resource', + uri: part.uri, + mimeType: part.mimeType, + ...(part.content !== undefined && { content: part.content }), + ...(part.blob !== undefined && { blob: part.blob }), + ...(part.metadata !== undefined && { metadata: part.metadata }), + }; + } +} + +function serializeToolCall(toolCall: CoreToolCall): ApiToolCall { + return { + id: toolCall.id, + type: toolCall.type, + function: { + name: toolCall.function.name, + arguments: toolCall.function.arguments, + }, + }; +} + +function serializeInternalMessage(message: CoreInternalMessage): ApiInternalMessage { + return { + role: message.role, + content: Array.isArray(message.content) + ? message.content.map(serializeContentPart) + : message.content, + ...(message.id !== undefined && { id: message.id }), + ...(message.timestamp !== undefined && { timestamp: message.timestamp }), + ...('reasoning' in message && + message.reasoning !== undefined && { reasoning: message.reasoning }), + ...('tokenUsage' in message && + message.tokenUsage !== undefined && { tokenUsage: message.tokenUsage }), + ...('estimatedCost' in message && + message.estimatedCost !== undefined && { estimatedCost: message.estimatedCost }), + ...('pricingStatus' in message && + message.pricingStatus !== undefined && { pricingStatus: message.pricingStatus }), + ...('usageScopeId' in message && + message.usageScopeId !== undefined && { usageScopeId: message.usageScopeId }), + ...('model' in message && message.model !== undefined && { model: message.model }), + ...('provider' in message && + message.provider !== undefined && { provider: message.provider }), + ...('toolCalls' in message && + message.toolCalls !== undefined && { + toolCalls: message.toolCalls.map(serializeToolCall), + }), + ...('toolCallId' in message && + message.toolCallId !== undefined && { toolCallId: message.toolCallId }), + ...('name' in message && message.name !== undefined && { name: message.name }), + ...('success' in message && message.success !== undefined && { success: message.success }), + ...(message.metadata !== undefined && { metadata: message.metadata }), + }; +} + export function createSessionsRouter(getAgent: GetAgentFn) { const app = new OpenAPIHono(); @@ -176,6 +316,94 @@ export function createSessionsRouter(getAgent: GetAgentFn) { }, }); + const compactRoute = createRoute({ + method: 'post', + path: '/sessions/{sessionId}/compact', + summary: 'Compact Session', + description: + 'Generates a persisted compaction artifact for a session and can optionally apply it in place or seed a new child session.', + tags: ['sessions'], + request: { + params: z.object({ + sessionId: z.string().describe('Source session identifier'), + }), + body: { + content: { + 'application/json': { + schema: CompactSessionSchema, + }, + }, + required: false, + }, + }, + responses: { + 200: { + description: + 'Compaction result. Returns null when the session was too short or nothing needed compaction.', + content: { + 'application/json': { + schema: z + .object({ + compaction: SessionCompactionSchema.nullable(), + }) + .strict(), + }, + }, + }, + 400: { + description: 'Invalid compaction request', + content: { + 'application/json': { + schema: StandardErrorEnvelopeSchema, + }, + }, + }, + 404: { + description: 'Source session not found', + content: { + 'application/json': { + schema: StandardErrorEnvelopeSchema, + }, + }, + }, + }, + }); + + const getCompactionRoute = createRoute({ + method: 'get', + path: '/sessions/compactions/{compactionId}', + summary: 'Get Session Compaction', + description: 'Retrieves a previously persisted session compaction artifact.', + tags: ['sessions'], + request: { + params: z.object({ + compactionId: z.string().describe('Compaction artifact identifier'), + }), + }, + responses: { + 200: { + description: 'Persisted session compaction artifact', + content: { + 'application/json': { + schema: z + .object({ + compaction: SessionCompactionSchema, + }) + .strict(), + }, + }, + }, + 404: { + description: 'Compaction artifact not found', + content: { + 'application/json': { + schema: StandardErrorEnvelopeSchema, + }, + }, + }, + }, + }); + const historyRoute = createRoute({ method: 'get', path: '/sessions/{sessionId}/history', @@ -487,6 +715,47 @@ export function createSessionsRouter(getAgent: GetAgentFn) { 201 ); }) + .openapi(compactRoute, async (ctx) => { + const agent = await getAgent(ctx); + const { sessionId } = ctx.req.valid('param'); + const body: z.input | undefined = + ctx.req.raw.body === null ? undefined : ctx.req.valid('json'); + + const compaction = await agent.compactSession({ + sessionId, + trigger: 'api', + ...(body?.mode !== undefined && { mode: body.mode }), + ...(body?.childTitle !== undefined && { childTitle: body.childTitle }), + }); + + return ctx.json( + { + compaction: compaction ? mapSessionCompaction(compaction) : null, + }, + 200 + ); + }) + .openapi(getCompactionRoute, async (ctx) => { + const agent = await getAgent(ctx); + const { compactionId } = ctx.req.valid('param'); + const compaction = await agent.getSessionCompaction(compactionId); + if (!compaction) { + throw new DextoRuntimeError( + 'compaction_not_found', + 'agent', + ErrorType.NOT_FOUND, + 'Compaction artifact not found', + { compactionId } + ); + } + + return ctx.json( + { + compaction: mapSessionCompaction(compaction), + }, + 200 + ); + }) .openapi(getRoute, async (ctx) => { const agent = await getAgent(ctx); const { sessionId } = ctx.req.param(); @@ -506,11 +775,8 @@ export function createSessionsRouter(getAgent: GetAgentFn) { agent.getSessionHistory(sessionId), agent.isSessionBusy(sessionId), ]); - // TODO: Improve type alignment between core and server schemas. - // Core's InternalMessage has union types (string | Uint8Array | Buffer | URL) - // for binary data, but JSON responses are always base64 strings. return ctx.json({ - history: history as z.output[], + history: history.map(serializeInternalMessage), isBusy, }); }) diff --git a/packages/server/src/hono/schemas/responses.ts b/packages/server/src/hono/schemas/responses.ts index 63975471b..6d4d31e71 100644 --- a/packages/server/src/hono/schemas/responses.ts +++ b/packages/server/src/hono/schemas/responses.ts @@ -32,6 +32,8 @@ import { LLM_PRICING_STATUSES, LLMConfigBaseSchema as CoreLLMConfigBaseSchema, LLM_PROVIDERS, + SESSION_COMPACTION_MODES, + SESSION_COMPACTION_TRIGGERS, } from '@dexto/core'; // TODO: Implement shared error response schemas for OpenAPI documentation. @@ -169,6 +171,38 @@ export const TokenUsageSchema = z .strict() .describe('Token usage accounting'); +export const InternalMessageMetadataSchema = z + .object({ + isSummary: z + .boolean() + .optional() + .describe('Whether this message marks a compaction summary boundary'), + isSessionSummary: z + .boolean() + .optional() + .describe('Whether this message marks a session-level summary boundary'), + isRecompaction: z + .boolean() + .optional() + .describe('Whether this summary was produced from already-compacted history'), + originalMessageCount: z + .number() + .int() + .nonnegative() + .optional() + .describe('How many original messages were summarized by this boundary'), + preservedMessageIds: z + .array(z.string()) + .optional() + .describe( + 'Stable message IDs preserved in the continuation window when this summary supersedes older working memory' + ), + }) + .catchall(z.unknown()) + .describe( + 'Optional message metadata. Known compaction fields are documented explicitly, and additional metadata keys may also be present.' + ); + export const PricingStatusSchema = z .enum(LLM_PRICING_STATUSES) .describe('Whether pricing was resolved for this response'); @@ -209,6 +243,7 @@ export const InternalMessageSchema = z .boolean() .optional() .describe('Whether tool execution succeeded (present for role=tool messages)'), + metadata: InternalMessageMetadataSchema.optional().describe('Optional message metadata'), }) .strict() .describe('Internal message representation'); @@ -393,6 +428,59 @@ export type ModelStatistics = z.output; export type SessionUsageTracking = z.output; export type SessionMetadata = z.output; +export const SessionCompactionModeSchema = z + .enum(SESSION_COMPACTION_MODES) + .describe('How the compaction artifact should be applied'); + +export const SessionCompactionTriggerSchema = z + .enum(SESSION_COMPACTION_TRIGGERS) + .describe('Why the compaction was triggered'); + +export const SessionCompactionSchema = z + .object({ + id: z.string().describe('Unique compaction artifact identifier'), + sourceSessionId: z.string().describe('Source session identifier'), + targetSessionId: z + .string() + .optional() + .nullable() + .describe('Target child session when continuation was applied into a new session'), + createdAt: z.number().int().positive().describe('Creation timestamp (Unix ms)'), + strategy: z.string().describe('Compaction strategy name used to produce the artifact'), + mode: SessionCompactionModeSchema, + trigger: SessionCompactionTriggerSchema, + originalTokens: z + .number() + .int() + .nonnegative() + .describe('Estimated tokens before compaction'), + compactedTokens: z + .number() + .int() + .nonnegative() + .describe('Estimated tokens after compaction'), + originalMessages: z + .number() + .int() + .nonnegative() + .describe('Prepared message count before compaction'), + compactedMessages: z + .number() + .int() + .nonnegative() + .describe('Prepared message count after compaction'), + summaryMessages: z + .array(InternalMessageSchema) + .describe('Generated summary messages returned by the compaction strategy'), + continuationMessages: z + .array(InternalMessageSchema) + .describe('Messages that can seed a continued session after compaction'), + }) + .strict() + .describe('Persisted session compaction artifact'); + +export type SessionCompaction = z.output; + // --- Workspace Schemas --- export const WorkspaceSchema = z