From 5e105bf4b7ca546321ba107a5d3143d4fcf543cf Mon Sep 17 00:00:00 2001 From: Alessio Rocchi Date: Wed, 28 Jan 2026 22:55:36 +0100 Subject: [PATCH 1/3] test: improve coverage to 85%+ for issue #6 - Add comprehensive tests for SlackIntegration (37 tests) - Add CircuitBreaker and retry utility tests - Add Semaphore and AgentPool tests - Add review-loops route handler tests - Improve health monitoring tests - Improve request parsing tests - Improve auth route tests Closes #6 Co-Authored-By: Claude Opus 4.5 --- tests/unit/integrations/slack.test.ts | 469 ++++++++++++ tests/unit/monitoring/health.test.ts | 183 +++++ tests/unit/retry.test.ts | 391 ++++++++++ tests/unit/semaphore.test.ts | 473 ++++++++++++ tests/unit/web/request.test.ts | 111 +++ tests/unit/web/routes/auth.test.ts | 228 ++++++ tests/unit/web/routes/review-loops.test.ts | 790 +++++++++++++++++++++ 7 files changed, 2645 insertions(+) create mode 100644 tests/unit/integrations/slack.test.ts create mode 100644 tests/unit/retry.test.ts create mode 100644 tests/unit/semaphore.test.ts create mode 100644 tests/unit/web/routes/review-loops.test.ts diff --git a/tests/unit/integrations/slack.test.ts b/tests/unit/integrations/slack.test.ts new file mode 100644 index 0000000..5ac0d83 --- /dev/null +++ b/tests/unit/integrations/slack.test.ts @@ -0,0 +1,469 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { + SlackIntegration, + getSlackIntegration, + resetSlackIntegration, + type SlackConfig, + type SlackMessage, +} from '../../../src/integrations/slack.js'; + +// Mock the logger +vi.mock('../../../src/utils/logger.js', () => ({ + logger: { + child: () => ({ + debug: vi.fn(), + error: vi.fn(), + }), + }, +})); + +// Mock global fetch +const mockFetch = vi.fn(); +global.fetch = mockFetch; + +describe('SlackIntegration', () => { + let enabledConfig: SlackConfig; + let disabledConfig: SlackConfig; + + beforeEach(() => { + enabledConfig = { + enabled: true, + webhookUrl: 'https://hooks.slack.com/services/test', + channel: '#general', + username: 'TestBot', + iconEmoji: ':test:', + }; + + disabledConfig = { + enabled: false, + webhookUrl: 'https://hooks.slack.com/services/test', + }; + + mockFetch.mockReset(); + resetSlackIntegration(); + }); + + afterEach(() => { + vi.clearAllMocks(); + resetSlackIntegration(); + }); + + describe('constructor & isEnabled', () => { + it('should create instance with config', () => { + const slack = new SlackIntegration(enabledConfig); + expect(slack).toBeInstanceOf(SlackIntegration); + }); + + it('should return true when enabled and webhookUrl present', () => { + const slack = new SlackIntegration(enabledConfig); + expect(slack.isEnabled()).toBe(true); + }); + + it('should return false when disabled', () => { + const slack = new SlackIntegration(disabledConfig); + expect(slack.isEnabled()).toBe(false); + }); + + it('should return false when no webhookUrl', () => { + const configNoUrl: SlackConfig = { + enabled: true, + webhookUrl: undefined, + }; + const slack = new SlackIntegration(configNoUrl); + expect(slack.isEnabled()).toBe(false); + }); + }); + + describe('sendMessage', () => { + it('should return false when disabled (no HTTP call)', async () => { + const slack = new SlackIntegration(disabledConfig); + const result = await slack.sendMessage({ text: 'test' }); + + expect(result).toBe(false); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + it('should send correct payload structure to fetch', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + const message: SlackMessage = { + text: 'Test message', + blocks: [{ type: 'section', text: { type: 'mrkdwn', text: 'Hello' } }], + }; + + await slack.sendMessage(message); + + expect(mockFetch).toHaveBeenCalledWith( + 'https://hooks.slack.com/services/test', + expect.objectContaining({ + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }) + ); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.text).toBe('Test message'); + expect(calledBody.channel).toBe('#general'); + expect(calledBody.username).toBe('TestBot'); + expect(calledBody.icon_emoji).toBe(':test:'); + }); + + it('should return true on successful response', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + const result = await slack.sendMessage({ text: 'test' }); + + expect(result).toBe(true); + }); + + it('should return false on non-ok response', async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 400, + text: () => Promise.resolve('Bad request'), + }); + + const slack = new SlackIntegration(enabledConfig); + const result = await slack.sendMessage({ text: 'test' }); + + expect(result).toBe(false); + }); + + it('should handle network/fetch errors gracefully', async () => { + mockFetch.mockRejectedValueOnce(new Error('Network error')); + + const slack = new SlackIntegration(enabledConfig); + const result = await slack.sendMessage({ text: 'test' }); + + expect(result).toBe(false); + }); + + it('should use default username when not provided', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const configNoUsername: SlackConfig = { + enabled: true, + webhookUrl: 'https://hooks.slack.com/services/test', + }; + const slack = new SlackIntegration(configNoUsername); + await slack.sendMessage({ text: 'test' }); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.username).toBe('AgentStack Bot'); + }); + + it('should use default emoji when not provided', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const configNoEmoji: SlackConfig = { + enabled: true, + webhookUrl: 'https://hooks.slack.com/services/test', + }; + const slack = new SlackIntegration(configNoEmoji); + await slack.sendMessage({ text: 'test' }); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.icon_emoji).toBe(':robot_face:'); + }); + + it('should override channel when provided in message', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendMessage({ text: 'test', channel: '#alerts' }); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.channel).toBe('#alerts'); + }); + }); + + describe('sendAgentSpawned', () => { + it('should format block with truncated ID', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendAgentSpawned('coder', 'abcdefgh-1234-5678-9012'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Agent Spawned'); + expect(calledBody.blocks[0].text.text).toContain('`coder`'); + expect(calledBody.blocks[0].text.text).toContain('`abcdefgh`'); + expect(calledBody.blocks[0].text.text).not.toContain('1234-5678'); + }); + }); + + describe('sendAgentStopped', () => { + it('should format block correctly', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendAgentStopped('abcdefgh-1234-5678-9012'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Agent Stopped'); + expect(calledBody.blocks[0].text.text).toContain('`abcdefgh`'); + }); + }); + + describe('sendAgentError', () => { + it('should include error message', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendAgentError('abcdefgh-1234', 'Something went wrong'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Agent Error'); + expect(calledBody.blocks[0].text.text).toContain('Something went wrong'); + }); + }); + + describe('sendWorkflowStarted', () => { + it('should format name and ID', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendWorkflowStarted('wf-abcdefgh-1234', 'Build Pipeline'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Workflow Started'); + expect(calledBody.blocks[0].text.text).toContain('*Build Pipeline*'); + expect(calledBody.blocks[0].text.text).toContain('`wf-abcde`'); + }); + }); + + describe('sendWorkflowCompleted', () => { + it('should format with duration', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendWorkflowCompleted('wf-abcdefgh-1234', 'Build Pipeline', 65000); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Workflow Completed'); + expect(calledBody.blocks[0].text.text).toContain('*Build Pipeline*'); + expect(calledBody.blocks[0].text.text).toContain('Duration: 65s'); + }); + + it('should format without duration', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendWorkflowCompleted('wf-abcdefgh-1234', 'Build Pipeline'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Workflow Completed'); + expect(calledBody.blocks[0].text.text).not.toContain('Duration:'); + }); + }); + + describe('sendWorkflowError', () => { + it('should include error', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendWorkflowError('wf-abcdefgh-1234', 'Build Pipeline', 'Build failed'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Workflow Error'); + expect(calledBody.blocks[0].text.text).toContain('Build failed'); + }); + }); + + describe('sendReviewLoopStarted', () => { + it('should include all agent IDs', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendReviewLoopStarted('loop-abcd1234', 'coder-efgh5678', 'adv-ijkl9012'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Review Loop Started'); + expect(calledBody.blocks[0].text.text).toContain('`loop-abc`'); + expect(calledBody.blocks[0].text.text).toContain('`coder-ef`'); + expect(calledBody.blocks[0].text.text).toContain('`adv-ijkl`'); + }); + }); + + describe('sendReviewLoopApproved', () => { + it('should include iteration number', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendReviewLoopApproved('loop-abcd1234', 3); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Code Approved'); + expect(calledBody.blocks[0].text.text).toContain('Iteration: 3'); + }); + }); + + describe('sendReviewLoopCompleted', () => { + it('should show approved status when approved', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendReviewLoopCompleted('loop-abcd1234', 5, true); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Review Loop Completed'); + expect(calledBody.blocks[0].text.text).toContain('Iterations: 5'); + expect(calledBody.blocks[0].text.text).toContain(':white_check_mark: Approved'); + }); + + it('should show rejected status when not approved', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendReviewLoopCompleted('loop-abcd1234', 5, false); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain(':x: Not Approved'); + }); + }); + + describe('sendTaskCompleted', () => { + it('should include agent type and task ID', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendTaskCompleted('task-abcd1234', 'coder'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Task Completed'); + expect(calledBody.blocks[0].text.text).toContain('`coder`'); + expect(calledBody.blocks[0].text.text).toContain('`task-abc`'); + }); + }); + + describe('sendTaskFailed', () => { + it('should include error', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendTaskFailed('task-abcd1234', 'coder', 'Task timeout'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Task Failed'); + expect(calledBody.blocks[0].text.text).toContain('Task timeout'); + }); + }); + + describe('sendCustomMessage', () => { + it('should use correct emoji for info level', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendCustomMessage('Title', 'Message', 'info'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain(':information_source:'); + }); + + it('should use correct emoji for success level', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendCustomMessage('Title', 'Message', 'success'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain(':white_check_mark:'); + }); + + it('should use correct emoji for warning level', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendCustomMessage('Title', 'Message', 'warning'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain(':warning:'); + }); + + it('should use correct emoji for error level', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendCustomMessage('Title', 'Message', 'error'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain(':x:'); + }); + + it('should default to info level', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendCustomMessage('Title', 'Message'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain(':information_source:'); + }); + }); + + describe('sendResourceWarning', () => { + it('should format resource warning correctly', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendResourceWarning('agent-abcd1234', 'coder', 'memory', 800, 1000); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Resource Warning'); + expect(calledBody.blocks[0].text.text).toContain('`coder`'); + expect(calledBody.blocks[0].text.text).toContain('`memory`'); + expect(calledBody.blocks[0].text.text).toContain('80%'); + }); + }); + + describe('sendResourceIntervention', () => { + it('should format intervention correctly', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendResourceIntervention('agent-abcd1234', 'coder', 'memory', 'Exceeded limit'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Agent Paused'); + expect(calledBody.blocks[0].text.text).toContain('Exceeded limit'); + }); + }); + + describe('sendResourceTermination', () => { + it('should format termination correctly', async () => { + mockFetch.mockResolvedValueOnce({ ok: true }); + + const slack = new SlackIntegration(enabledConfig); + await slack.sendResourceTermination('agent-abcd1234', 'coder', 'Memory exhausted'); + + const calledBody = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(calledBody.blocks[0].text.text).toContain('Agent Terminated'); + expect(calledBody.blocks[0].text.text).toContain('Memory exhausted'); + }); + }); + + describe('singleton functions', () => { + it('should create instance with config', () => { + const slack = getSlackIntegration(enabledConfig); + expect(slack).toBeInstanceOf(SlackIntegration); + }); + + it('should return same instance on subsequent calls', () => { + const slack1 = getSlackIntegration(enabledConfig); + const slack2 = getSlackIntegration(); + expect(slack1).toBe(slack2); + }); + + it('should throw without config/instance', () => { + expect(() => getSlackIntegration()).toThrow('Slack integration not initialized'); + }); + + it('should reset instance', () => { + getSlackIntegration(enabledConfig); + resetSlackIntegration(); + expect(() => getSlackIntegration()).toThrow('Slack integration not initialized'); + }); + }); +}); diff --git a/tests/unit/monitoring/health.test.ts b/tests/unit/monitoring/health.test.ts index 18930f9..3d6ad6a 100644 --- a/tests/unit/monitoring/health.test.ts +++ b/tests/unit/monitoring/health.test.ts @@ -502,5 +502,188 @@ describe('HealthMonitor', () => { expect(result.checks.providers.status).toBe('pass'); expect(result.checks.providers.details?.defaultProvider).toBe('openai'); }); + + it('should handle unknown default provider', async () => { + const configUnknown = { + ...mockConfig, + providers: { + default: 'bedrock', + bedrock: { apiKey: 'test-key' }, + }, + }; + + const monitor = new HealthMonitor(configUnknown); + const result = await monitor.performHealthCheck(); + + // Unknown providers should pass as long as they have some configuration + expect(result.checks.providers.status).toBe('pass'); + expect(result.checks.providers.details?.defaultProvider).toBe('bedrock'); + }); + + it('should handle provider with empty apiKey string', async () => { + const configEmptyKey = { + ...mockConfig, + providers: { + default: 'anthropic', + anthropic: { apiKey: '' }, + }, + }; + + const monitor = new HealthMonitor(configEmptyKey); + const result = await monitor.performHealthCheck(); + + expect(result.checks.providers.status).toBe('warn'); + expect(result.checks.providers.message).toContain('Default provider (Anthropic) not configured'); + }); + }); + + describe('checkMemory edge cases', () => { + it('should verify percentage rounding', async () => { + const originalMemoryUsage = process.memoryUsage; + process.memoryUsage = vi.fn().mockReturnValue({ + heapUsed: 333 * 1024 * 1024, + heapTotal: 1000 * 1024 * 1024, + external: 0, + rss: 500 * 1024 * 1024, + arrayBuffers: 0, + }); + + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + // 33.3% should be rounded to 33 + expect(result.checks.memory.details?.heapUsedPercent).toBe(33); + + process.memoryUsage = originalMemoryUsage; + }); + + it('should handle exact 75% boundary (warn threshold)', async () => { + const originalMemoryUsage = process.memoryUsage; + process.memoryUsage = vi.fn().mockReturnValue({ + heapUsed: 750 * 1024 * 1024, + heapTotal: 1000 * 1024 * 1024, + external: 0, + rss: 1000 * 1024 * 1024, + arrayBuffers: 0, + }); + + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + // Exactly 75% should NOT trigger warning (> 75 is the condition) + expect(result.checks.memory.status).toBe('pass'); + + process.memoryUsage = originalMemoryUsage; + }); + + it('should handle just above 75% boundary', async () => { + const originalMemoryUsage = process.memoryUsage; + process.memoryUsage = vi.fn().mockReturnValue({ + heapUsed: 760 * 1024 * 1024, + heapTotal: 1000 * 1024 * 1024, + external: 0, + rss: 1000 * 1024 * 1024, + arrayBuffers: 0, + }); + + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + expect(result.checks.memory.status).toBe('warn'); + + process.memoryUsage = originalMemoryUsage; + }); + + it('should handle exact 90% boundary (fail threshold)', async () => { + const originalMemoryUsage = process.memoryUsage; + process.memoryUsage = vi.fn().mockReturnValue({ + heapUsed: 900 * 1024 * 1024, + heapTotal: 1000 * 1024 * 1024, + external: 0, + rss: 1000 * 1024 * 1024, + arrayBuffers: 0, + }); + + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + // Exactly 90% should NOT trigger fail (> 90 is the condition) + expect(result.checks.memory.status).toBe('warn'); + + process.memoryUsage = originalMemoryUsage; + }); + }); + + describe('checkSystem edge cases', () => { + it('should include platform in details when status is pass', async () => { + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + // Platform is only included when status is 'pass' (normal resources) + // When status is 'warn' (high CPU), only cpuPercent and uptime are included + if (result.checks.system.status === 'pass') { + expect(result.checks.system.details?.platform).toBe(process.platform); + } else { + expect(result.checks.system.details?.cpuPercent).toBeDefined(); + } + }); + + it('should include nodeVersion in details when status is pass', async () => { + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + // nodeVersion is only included when status is 'pass' (normal resources) + if (result.checks.system.status === 'pass') { + expect(result.checks.system.details?.nodeVersion).toBe(process.version); + } else { + expect(result.checks.system.details?.uptime).toBeDefined(); + } + }); + }); + + describe('performHealthCheck integration', () => { + it('should verify timestamp format is ISO 8601', async () => { + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + // ISO 8601 format validation + const isoDateRegex = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/; + expect(result.timestamp).toMatch(isoDateRegex); + }); + + it('should verify uptime is calculated correctly', async () => { + const monitor = new HealthMonitor(mockConfig); + + // Wait a known amount of time + await new Promise((resolve) => setTimeout(resolve, 50)); + + const result = await monitor.performHealthCheck(); + + // Uptime should be at least 0.05 seconds + expect(result.uptime).toBeGreaterThanOrEqual(0.05); + }); + + it('should include all required check categories', async () => { + const monitor = new HealthMonitor(mockConfig); + const result = await monitor.performHealthCheck(); + + expect(result.checks).toHaveProperty('database'); + expect(result.checks).toHaveProperty('memory'); + expect(result.checks).toHaveProperty('providers'); + expect(result.checks).toHaveProperty('disk'); + expect(result.checks).toHaveProperty('system'); + }); + + it('should return version from config', async () => { + const configWithVersion = { + ...mockConfig, + version: '2.5.0', + }; + + const monitor = new HealthMonitor(configWithVersion); + const result = await monitor.performHealthCheck(); + + expect(result.version).toBe('2.5.0'); + }); }); }); diff --git a/tests/unit/retry.test.ts b/tests/unit/retry.test.ts new file mode 100644 index 0000000..e728609 --- /dev/null +++ b/tests/unit/retry.test.ts @@ -0,0 +1,391 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { + CircuitBreaker, + retryWithBackoff, + retryWithCircuitBreaker, +} from '../../src/utils/retry.js'; + +// Mock the logger +vi.mock('../../src/utils/logger.js', () => ({ + logger: { + child: () => ({ + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }), + }, +})); + +describe('CircuitBreaker', () => { + describe('constructor', () => { + it('should create with default options', () => { + const breaker = new CircuitBreaker('test'); + expect(breaker.getState()).toBe('CLOSED'); + }); + + it('should create with custom options', () => { + const breaker = new CircuitBreaker('test', { + failureThreshold: 3, + successThreshold: 1, + timeout: 30000, + }); + expect(breaker.getState()).toBe('CLOSED'); + }); + }); + + describe('getState', () => { + it('should return initial state as CLOSED', () => { + const breaker = new CircuitBreaker('test'); + expect(breaker.getState()).toBe('CLOSED'); + }); + }); + + describe('isOpen', () => { + it('should return false when CLOSED', () => { + const breaker = new CircuitBreaker('test'); + expect(breaker.isOpen()).toBe(false); + }); + + it('should return true when OPEN and timeout not elapsed', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 1, timeout: 60000 }); + + // Trigger failure to open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + + expect(breaker.getState()).toBe('OPEN'); + expect(breaker.isOpen()).toBe(true); + }); + + it('should transition to HALF_OPEN when timeout elapsed', async () => { + const breaker = new CircuitBreaker('test', { + failureThreshold: 1, + timeout: 10, // Very short timeout + }); + + // Open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('OPEN'); + + // Wait for timeout + await new Promise(resolve => setTimeout(resolve, 20)); + + // isOpen should transition to HALF_OPEN + expect(breaker.isOpen()).toBe(false); + expect(breaker.getState()).toBe('HALF_OPEN'); + }); + }); + + describe('execute', () => { + it('should execute function when CLOSED', async () => { + const breaker = new CircuitBreaker('test'); + const result = await breaker.execute(() => Promise.resolve('success')); + expect(result).toBe('success'); + }); + + it('should throw when circuit is OPEN', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 1, timeout: 60000 }); + + // Open the circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + + await expect( + breaker.execute(() => Promise.resolve('success')) + ).rejects.toThrow('Circuit breaker is OPEN for test'); + }); + + it('should propagate errors from function', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 5 }); + + await expect( + breaker.execute(() => Promise.reject(new Error('custom error'))) + ).rejects.toThrow('custom error'); + }); + + it('should track failures and open circuit at threshold', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 3, timeout: 60000 }); + + // First 2 failures should keep circuit closed + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('CLOSED'); + + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('CLOSED'); + + // Third failure should open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('OPEN'); + }); + + it('should reset failure count on success', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 3, timeout: 60000 }); + + // 2 failures + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + + // Success resets count + await breaker.execute(() => Promise.resolve('success')); + + // 2 more failures should not open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('CLOSED'); + }); + }); + + describe('HALF_OPEN state', () => { + it('should transition to CLOSED after success threshold', async () => { + const breaker = new CircuitBreaker('test', { + failureThreshold: 1, + successThreshold: 2, + timeout: 10, + }); + + // Open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('OPEN'); + + // Wait for timeout + await new Promise(resolve => setTimeout(resolve, 20)); + breaker.isOpen(); // Triggers transition to HALF_OPEN + expect(breaker.getState()).toBe('HALF_OPEN'); + + // First success + await breaker.execute(() => Promise.resolve('success')); + expect(breaker.getState()).toBe('HALF_OPEN'); + + // Second success should close circuit + await breaker.execute(() => Promise.resolve('success')); + expect(breaker.getState()).toBe('CLOSED'); + }); + + it('should transition to OPEN on failure in HALF_OPEN', async () => { + const breaker = new CircuitBreaker('test', { + failureThreshold: 1, + timeout: 10, + }); + + // Open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + + // Wait for timeout + await new Promise(resolve => setTimeout(resolve, 20)); + breaker.isOpen(); + expect(breaker.getState()).toBe('HALF_OPEN'); + + // Failure in HALF_OPEN should reopen + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('OPEN'); + }); + }); + + describe('reset', () => { + it('should reset circuit to CLOSED', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 1, timeout: 60000 }); + + // Open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + expect(breaker.getState()).toBe('OPEN'); + + // Reset + breaker.reset(); + expect(breaker.getState()).toBe('CLOSED'); + }); + + it('should allow execution after reset', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 1, timeout: 60000 }); + + // Open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + + // Reset and execute + breaker.reset(); + const result = await breaker.execute(() => Promise.resolve('success')); + expect(result).toBe('success'); + }); + }); +}); + +describe('retryWithBackoff', () => { + it('should return result on success', async () => { + const fn = vi.fn().mockResolvedValue('success'); + const result = await retryWithBackoff(fn); + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should retry on retryable error', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('500 Internal Server Error')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { initialDelayMs: 10 }); + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should not retry on non-retryable error', async () => { + const fn = vi.fn().mockRejectedValue(new Error('Invalid input')); + + await expect( + retryWithBackoff(fn, { maxAttempts: 3 }) + ).rejects.toThrow('Invalid input'); + expect(fn).toHaveBeenCalledTimes(1); + }); + + it('should retry with custom retryable errors', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('CUSTOM_ERROR')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { + initialDelayMs: 10, + retryableErrors: ['CUSTOM_ERROR'], + }); + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should retry with regex pattern', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('Error code: E12345')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { + initialDelayMs: 10, + retryableErrors: [/E\d+/], + }); + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should throw after max attempts', async () => { + const fn = vi.fn().mockRejectedValue(new Error('rate_limit')); + + await expect( + retryWithBackoff(fn, { + maxAttempts: 2, + initialDelayMs: 10, + maxDelayMs: 50, + }) + ).rejects.toThrow('rate_limit'); + + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should apply exponential backoff', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('timeout')) + .mockRejectedValueOnce(new Error('timeout')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { + initialDelayMs: 10, + backoffMultiplier: 2, + maxAttempts: 3, + }); + + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(3); + }); + + it('should respect maxDelayMs', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('timeout')) + .mockRejectedValueOnce(new Error('timeout')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { + initialDelayMs: 50, + maxDelayMs: 60, + backoffMultiplier: 10, + maxAttempts: 3, + }); + + expect(result).toBe('success'); + }); + + it('should handle non-Error thrown values', async () => { + const fn = vi.fn().mockRejectedValue('string error'); + + await expect(retryWithBackoff(fn)).rejects.toThrow('string error'); + }); + + it('should use default options', async () => { + const fn = vi.fn().mockResolvedValue('success'); + const result = await retryWithBackoff(fn); + expect(result).toBe('success'); + }); + + it('should retry on ECONNRESET error', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('ECONNRESET')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { initialDelayMs: 10 }); + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should retry on 429 error', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('HTTP 429 Too Many Requests')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { initialDelayMs: 10 }); + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + }); + + it('should retry on 503 error', async () => { + const fn = vi.fn() + .mockRejectedValueOnce(new Error('503 Service Unavailable')) + .mockResolvedValueOnce('success'); + + const result = await retryWithBackoff(fn, { initialDelayMs: 10 }); + expect(result).toBe('success'); + expect(fn).toHaveBeenCalledTimes(2); + }); +}); + +describe('retryWithCircuitBreaker', () => { + it('should combine circuit breaker with retry', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 5 }); + const fn = vi.fn().mockResolvedValue('success'); + + const result = await retryWithCircuitBreaker(fn, breaker); + expect(result).toBe('success'); + }); + + it('should fail fast when circuit is open', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 1, timeout: 60000 }); + + // Open circuit + await breaker.execute(() => Promise.reject(new Error('fail'))).catch(() => {}); + + const fn = vi.fn().mockResolvedValue('success'); + + await expect( + retryWithCircuitBreaker(fn, breaker) + ).rejects.toThrow('Circuit breaker is OPEN'); + + expect(fn).not.toHaveBeenCalled(); + }); + + it('should use custom retry options', async () => { + const breaker = new CircuitBreaker('test', { failureThreshold: 10 }); + const fn = vi.fn() + .mockRejectedValueOnce(new Error('timeout')) + .mockResolvedValueOnce('success'); + + const result = await retryWithCircuitBreaker(fn, breaker, { + maxAttempts: 2, + initialDelayMs: 10, + }); + + expect(result).toBe('success'); + }); +}); diff --git a/tests/unit/semaphore.test.ts b/tests/unit/semaphore.test.ts new file mode 100644 index 0000000..3c3613f --- /dev/null +++ b/tests/unit/semaphore.test.ts @@ -0,0 +1,473 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { Semaphore, AgentPool } from '../../src/utils/semaphore.js'; + +// Mock the logger +vi.mock('../../src/utils/logger.js', () => ({ + logger: { + child: () => ({ + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }), + }, +})); + +describe('Semaphore', () => { + describe('constructor', () => { + it('should create semaphore with max permits', () => { + const sem = new Semaphore('test', 3); + const state = sem.getState(); + expect(state.available).toBe(3); + expect(state.maxPermits).toBe(3); + expect(state.queued).toBe(0); + }); + }); + + describe('acquire', () => { + it('should acquire permit when available', async () => { + const sem = new Semaphore('test', 2); + await sem.acquire(); + expect(sem.getState().available).toBe(1); + }); + + it('should acquire multiple permits', async () => { + const sem = new Semaphore('test', 3); + await sem.acquire(); + await sem.acquire(); + await sem.acquire(); + expect(sem.getState().available).toBe(0); + }); + + it('should wait when no permits available', async () => { + const sem = new Semaphore('test', 1); + await sem.acquire(); + + let acquired = false; + const acquirePromise = sem.acquire().then(() => { + acquired = true; + }); + + // Should be waiting + expect(acquired).toBe(false); + expect(sem.getState().queued).toBe(1); + + // Release permit + sem.release(); + + await acquirePromise; + expect(acquired).toBe(true); + }); + + it('should queue multiple waiters', async () => { + const sem = new Semaphore('test', 1); + await sem.acquire(); + + const results: number[] = []; + + const promise1 = sem.acquire().then(() => results.push(1)); + const promise2 = sem.acquire().then(() => results.push(2)); + const promise3 = sem.acquire().then(() => results.push(3)); + + expect(sem.getState().queued).toBe(3); + + // Release permits one by one + sem.release(); + await promise1; + expect(results).toEqual([1]); + + sem.release(); + await promise2; + expect(results).toEqual([1, 2]); + + sem.release(); + await promise3; + expect(results).toEqual([1, 2, 3]); + }); + }); + + describe('tryAcquire', () => { + it('should return true and acquire when available', () => { + const sem = new Semaphore('test', 2); + expect(sem.tryAcquire()).toBe(true); + expect(sem.getState().available).toBe(1); + }); + + it('should return false when no permits available', async () => { + const sem = new Semaphore('test', 1); + await sem.acquire(); + expect(sem.tryAcquire()).toBe(false); + expect(sem.getState().available).toBe(0); + }); + + it('should not wait when no permits available', async () => { + const sem = new Semaphore('test', 1); + await sem.acquire(); + + const start = Date.now(); + const result = sem.tryAcquire(); + const duration = Date.now() - start; + + expect(result).toBe(false); + expect(duration).toBeLessThan(50); // Should return immediately + }); + }); + + describe('release', () => { + it('should increase available permits', async () => { + const sem = new Semaphore('test', 2); + await sem.acquire(); + expect(sem.getState().available).toBe(1); + + sem.release(); + expect(sem.getState().available).toBe(2); + }); + + it('should give permit to waiting task', async () => { + const sem = new Semaphore('test', 1); + await sem.acquire(); + + let acquired = false; + const acquirePromise = sem.acquire().then(() => { + acquired = true; + }); + + // Release to waiting task + sem.release(); + await acquirePromise; + + expect(acquired).toBe(true); + // Available should still be 0 because permit went to waiter + expect(sem.getState().available).toBe(0); + }); + + it('should return permit to pool when no waiters', async () => { + const sem = new Semaphore('test', 2); + await sem.acquire(); + await sem.acquire(); + + expect(sem.getState().available).toBe(0); + + sem.release(); + expect(sem.getState().available).toBe(1); + + sem.release(); + expect(sem.getState().available).toBe(2); + }); + }); + + describe('execute', () => { + it('should acquire permit, run function, and release', async () => { + const sem = new Semaphore('test', 1); + + const result = await sem.execute(async () => { + expect(sem.getState().available).toBe(0); + return 'result'; + }); + + expect(result).toBe('result'); + expect(sem.getState().available).toBe(1); + }); + + it('should release permit even on error', async () => { + const sem = new Semaphore('test', 1); + + await expect( + sem.execute(async () => { + throw new Error('test error'); + }) + ).rejects.toThrow('test error'); + + expect(sem.getState().available).toBe(1); + }); + + it('should serialize concurrent operations', async () => { + const sem = new Semaphore('test', 1); + const order: number[] = []; + + const task1 = sem.execute(async () => { + await new Promise((r) => setTimeout(r, 50)); + order.push(1); + return 1; + }); + + const task2 = sem.execute(async () => { + order.push(2); + return 2; + }); + + await Promise.all([task1, task2]); + + // Task 1 should complete before task 2 starts + expect(order).toEqual([1, 2]); + }); + + it('should allow concurrent operations up to permit limit', async () => { + const sem = new Semaphore('test', 2); + let concurrent = 0; + let maxConcurrent = 0; + + const task = async () => { + concurrent++; + maxConcurrent = Math.max(maxConcurrent, concurrent); + await new Promise((r) => setTimeout(r, 20)); + concurrent--; + }; + + await Promise.all([ + sem.execute(task), + sem.execute(task), + sem.execute(task), + sem.execute(task), + ]); + + expect(maxConcurrent).toBe(2); + }); + }); + + describe('getState', () => { + it('should return current state', async () => { + const sem = new Semaphore('test', 3); + + expect(sem.getState()).toEqual({ + available: 3, + maxPermits: 3, + queued: 0, + }); + + await sem.acquire(); + await sem.acquire(); + + expect(sem.getState()).toEqual({ + available: 1, + maxPermits: 3, + queued: 0, + }); + }); + + it('should show queued count', async () => { + const sem = new Semaphore('test', 1); + await sem.acquire(); + + // Queue some waiters + sem.acquire(); + sem.acquire(); + + expect(sem.getState().queued).toBe(2); + }); + }); + + describe('reset', () => { + it('should reset to initial state', async () => { + const sem = new Semaphore('test', 3); + await sem.acquire(); + await sem.acquire(); + + sem.reset(); + + expect(sem.getState()).toEqual({ + available: 3, + maxPermits: 3, + queued: 0, + }); + }); + + it('should resolve waiting tasks', async () => { + const sem = new Semaphore('test', 1); + await sem.acquire(); + + const results: string[] = []; + + const promise1 = sem.acquire().then(() => results.push('acquired1')); + const promise2 = sem.acquire().then(() => results.push('acquired2')); + + expect(sem.getState().queued).toBe(2); + + sem.reset(); + + // Waiters should be resolved + await Promise.all([promise1, promise2]); + expect(results).toHaveLength(2); + expect(sem.getState().queued).toBe(0); + }); + }); +}); + +describe('AgentPool', () => { + describe('constructor', () => { + it('should create pool with default size', () => { + const pool = new AgentPool(); + expect(pool.getStats()).toEqual({}); + }); + + it('should create pool with custom size', () => { + const pool = new AgentPool(5); + expect(pool.getStats()).toEqual({}); + }); + }); + + describe('acquire', () => { + it('should return null when pool is empty', () => { + const pool = new AgentPool(); + expect(pool.acquire('coder')).toBeNull(); + }); + + it('should return agent from pool', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + + const agentId = pool.acquire('coder'); + expect(agentId).toBe('agent-1'); + }); + + it('should mark acquired agent as in use', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.acquire('coder'); + + // Second acquire should return null + expect(pool.acquire('coder')).toBeNull(); + }); + + it('should return different agents of same type', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.release('coder', 'agent-2'); + + const first = pool.acquire('coder'); + const second = pool.acquire('coder'); + + expect([first, second].sort()).toEqual(['agent-1', 'agent-2']); + }); + + it('should only return agents of requested type', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.release('tester', 'agent-2'); + + expect(pool.acquire('reviewer')).toBeNull(); + expect(pool.acquire('coder')).toBe('agent-1'); + }); + }); + + describe('release', () => { + it('should add agent to pool', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + + const stats = pool.getStats(); + expect(stats.coder.total).toBe(1); + }); + + it('should mark agent as available after release', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.acquire('coder'); + pool.release('coder', 'agent-1'); + + // Should be able to acquire again + expect(pool.acquire('coder')).toBe('agent-1'); + }); + + it('should not duplicate agents in pool', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.release('coder', 'agent-1'); + pool.release('coder', 'agent-1'); + + const stats = pool.getStats(); + expect(stats.coder.total).toBe(1); + }); + + it('should respect max pool size', () => { + const pool = new AgentPool(2); + pool.release('coder', 'agent-1'); + pool.release('coder', 'agent-2'); + pool.release('coder', 'agent-3'); // Should not be added + + const stats = pool.getStats(); + expect(stats.coder.total).toBe(2); + }); + }); + + describe('remove', () => { + it('should remove agent from pool', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.release('coder', 'agent-2'); + + pool.remove('coder', 'agent-1'); + + const stats = pool.getStats(); + expect(stats.coder.total).toBe(1); + }); + + it('should remove agent from in-use set', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.acquire('coder'); + + pool.remove('coder', 'agent-1'); + + // Releasing a new agent should work + pool.release('coder', 'agent-2'); + expect(pool.acquire('coder')).toBe('agent-2'); + }); + + it('should handle removing non-existent agent', () => { + const pool = new AgentPool(); + // Should not throw + pool.remove('coder', 'nonexistent'); + }); + }); + + describe('getStats', () => { + it('should return empty stats for empty pool', () => { + const pool = new AgentPool(); + expect(pool.getStats()).toEqual({}); + }); + + it('should return stats per agent type', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.release('coder', 'agent-2'); + pool.release('tester', 'agent-3'); + + const stats = pool.getStats(); + expect(stats.coder).toEqual({ total: 2, inUse: 0, available: 2 }); + expect(stats.tester).toEqual({ total: 1, inUse: 0, available: 1 }); + }); + + it('should track in-use agents', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.release('coder', 'agent-2'); + pool.acquire('coder'); + + const stats = pool.getStats(); + expect(stats.coder).toEqual({ total: 2, inUse: 1, available: 1 }); + }); + }); + + describe('clear', () => { + it('should clear all agents', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.release('tester', 'agent-2'); + pool.acquire('coder'); + + pool.clear(); + + expect(pool.getStats()).toEqual({}); + }); + + it('should allow new agents after clear', () => { + const pool = new AgentPool(); + pool.release('coder', 'agent-1'); + pool.clear(); + pool.release('coder', 'agent-2'); + + expect(pool.acquire('coder')).toBe('agent-2'); + }); + }); +}); diff --git a/tests/unit/web/request.test.ts b/tests/unit/web/request.test.ts index e69662e..213e49c 100644 --- a/tests/unit/web/request.test.ts +++ b/tests/unit/web/request.test.ts @@ -110,6 +110,59 @@ describe('Request Utils', () => { }); }); + describe('parseRequestBody - additional edge cases', () => { + function createMockRequestWithBuffer(chunks: Buffer[]): IncomingMessage { + const stream = new Readable({ + read() { + chunks.forEach(chunk => this.push(chunk)); + this.push(null); + } + }); + return stream as unknown as IncomingMessage; + } + + it('should handle Buffer chunks correctly', async () => { + const chunks = [ + Buffer.from('{"na'), + Buffer.from('me":"'), + Buffer.from('test"}'), + ]; + const req = createMockRequestWithBuffer(chunks); + const result = await parseRequestBody(req); + expect(result).toEqual({ name: 'test' }); + }); + + it('should handle very large payloads (multiple chunks)', async () => { + // Create a large payload split into multiple chunks + const items = Array.from({ length: 500 }, (_, i) => `"item${i}":"value${i}"`); + const fullJson = `{${items.join(',')}}`; + + // Split into multiple chunks + const chunkSize = 1000; + const chunks: Buffer[] = []; + for (let i = 0; i < fullJson.length; i += chunkSize) { + chunks.push(Buffer.from(fullJson.slice(i, i + chunkSize))); + } + + const req = createMockRequestWithBuffer(chunks); + const result = await parseRequestBody(req); + expect(Object.keys(result)).toHaveLength(500); + expect(result.item0).toBe('value0'); + expect(result.item499).toBe('value499'); + }); + + it('should handle whitespace-only body', async () => { + const stream = new Readable({ + read() { + this.push(' \n\t '); + this.push(null); + } + }); + const req = stream as unknown as IncomingMessage; + await expect(parseRequestBody(req)).rejects.toThrow('Invalid JSON body'); + }); + }); + describe('parseQueryString', () => { it('should parse simple query string', () => { const result = parseQueryString('http://example.com?name=test&age=25'); @@ -211,5 +264,63 @@ describe('Request Utils', () => { const result = parseQueryString('/api/users?active=true'); expect(result).toEqual({ active: 'true' }); }); + + it('should handle multiple question marks in URL', () => { + // The implementation splits on first '?' then splits query string on '&' and '=' + // So 'question=what?&answer=yes' becomes question='what?' and answer='yes' + // But actually the split('=') will split 'question=what?' into ['question', 'what?', ''] + // and only take first two, so 'what?' and then the '&answer=yes' part + // Actually looking at the code more carefully: split('?')[1] gives 'question=what?&answer=yes' + // then split('&') gives ['question=what?', 'answer=yes'] + // then for 'question=what?', split('=') gives ['question', 'what?'] + // Actually wait, split('=') only does first split, so it gives ['question', 'what?'] + // No wait, split without limit returns all parts + // So split('=') on 'question=what?' gives ['question', 'what?'] - only 2 parts because ? isn't = + // But wait, the URL has another ? in the value... + // Let me re-check: 'question=what?&answer=yes'.split('&') = ['question=what?', 'answer=yes'] + // 'question=what?'.split('=') = ['question', 'what?'] - good! + // Actually wait no. Let me trace through more carefully. + // The URL is 'http://example.com?question=what?&answer=yes' + // url.split('?') = ['http://example.com', 'question=what?&answer=yes'] + // Hmm, but there's a second ? so actually: + // url.split('?') = ['http://example.com', 'question=what', '&answer=yes'] + // So url.split('?')[1] = 'question=what' (stops at first ?) + // Then 'question=what'.split('&') = ['question=what'] + // And 'question=what'.split('=') = ['question', 'what'] + const result = parseQueryString('http://example.com?question=what?&answer=yes'); + // The second ? causes split('?')[1] to be 'question=what' only + // The '&answer=yes' gets lost because split('?') splits into 3 parts + expect(result).toEqual({ question: 'what' }); + }); + + it('should handle plus signs as encoded spaces', () => { + const result = parseQueryString('http://example.com?name=John+Doe&city=New+York'); + // Plus signs are kept as-is by default URL parsing (not automatically converted to spaces) + expect(result).toEqual({ name: 'John+Doe', city: 'New+York' }); + }); + + it('should handle invalid URI encoding gracefully', () => { + // %ZZ is not valid URI encoding - decodeURIComponent should throw + expect(() => parseQueryString('http://example.com?invalid=%ZZ')).toThrow(); + }); + + it('should handle very long query strings', () => { + const params = Array.from({ length: 100 }, (_, i) => `key${i}=value${i}`).join('&'); + const result = parseQueryString(`http://example.com?${params}`); + expect(Object.keys(result)).toHaveLength(100); + expect(result.key0).toBe('value0'); + expect(result.key99).toBe('value99'); + }); + + it('should handle nested bracket notation', () => { + const result = parseQueryString('http://example.com?user[name]=John&user[age]=30'); + expect(result['user[name]']).toBe('John'); + expect(result['user[age]']).toBe('30'); + }); + + it('should handle values with encoded equals signs', () => { + const result = parseQueryString('http://example.com?math=1%2B1%3D2'); + expect(result.math).toBe('1+1=2'); + }); }); }); diff --git a/tests/unit/web/routes/auth.test.ts b/tests/unit/web/routes/auth.test.ts index 6235597..192971d 100644 --- a/tests/unit/web/routes/auth.test.ts +++ b/tests/unit/web/routes/auth.test.ts @@ -221,6 +221,77 @@ describe('Auth Routes', () => { const response = JSON.parse(resBody); expect(response.error).toBe('Registration failed'); }); + + it('should handle missing email field', async () => { + vi.mocked(authService.register).mockRejectedValue(new Error('Email is required')); + + mockReq = createMockRequest({ + username: 'testuser', + password: 'password123', + }); + + await routes.register(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('Email is required'); + }); + + it('should handle missing username field', async () => { + vi.mocked(authService.register).mockRejectedValue(new Error('Username is required')); + + mockReq = createMockRequest({ + email: 'test@example.com', + password: 'password123', + }); + + await routes.register(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('Username is required'); + }); + + it('should handle missing password field', async () => { + vi.mocked(authService.register).mockRejectedValue(new Error('Password is required')); + + mockReq = createMockRequest({ + email: 'test@example.com', + username: 'testuser', + }); + + await routes.register(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('Password is required'); + }); + + it('should handle empty request body', async () => { + vi.mocked(authService.register).mockRejectedValue(new Error('All fields are required')); + + mockReq = createMockRequest({}); + + await routes.register(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(400); + }); + + it('should handle invalid email format', async () => { + vi.mocked(authService.register).mockRejectedValue(new Error('Invalid email format')); + + mockReq = createMockRequest({ + email: 'not-an-email', + username: 'testuser', + password: 'password123', + }); + + await routes.register(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('Invalid email format'); + }); }); describe('login', () => { @@ -342,6 +413,52 @@ describe('Auth Routes', () => { const response = JSON.parse(resBody); expect(response.error).toBe('Invalid refresh token'); }); + + it('should handle expired refresh token', async () => { + vi.mocked(authService.refreshAccessToken).mockRejectedValue( + new Error('Refresh token expired') + ); + + mockReq = createMockRequest({ + refreshToken: 'expired-token', + }); + + await routes.refresh(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(401); + const response = JSON.parse(resBody); + expect(response.error).toBe('Refresh token expired'); + }); + + it('should handle malformed refresh token', async () => { + vi.mocked(authService.refreshAccessToken).mockRejectedValue( + new Error('Malformed token') + ); + + mockReq = createMockRequest({ + refreshToken: 'malformed.token.here', + }); + + await routes.refresh(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(401); + const response = JSON.parse(resBody); + expect(response.error).toBe('Malformed token'); + }); + + it('should handle unknown refresh errors', async () => { + vi.mocked(authService.refreshAccessToken).mockRejectedValue('Unknown error'); + + mockReq = createMockRequest({ + refreshToken: 'some-token', + }); + + await routes.refresh(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(401); + const response = JSON.parse(resBody); + expect(response.error).toBe('Token refresh failed'); + }); }); describe('logout', () => { @@ -460,6 +577,40 @@ describe('Auth Routes', () => { const response = JSON.parse(resBody); expect(response.error).toBe('Invalid old password'); }); + + it('should handle same old/new password error', async () => { + vi.mocked(authService.changePassword).mockRejectedValue( + new Error('New password must be different from old password') + ); + + mockReq = createMockRequest({ + oldPassword: 'samepassword', + newPassword: 'samepassword', + }); + mockReq.headers = { authorization: 'Bearer valid-token' }; + + await routes.changePassword(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('New password must be different from old password'); + }); + + it('should handle unknown password change errors', async () => { + vi.mocked(authService.changePassword).mockRejectedValue('Unknown error'); + + mockReq = createMockRequest({ + oldPassword: 'old123', + newPassword: 'new123', + }); + mockReq.headers = { authorization: 'Bearer valid-token' }; + + await routes.changePassword(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('Password change failed'); + }); }); describe('listUsers', () => { @@ -582,6 +733,48 @@ describe('Auth Routes', () => { const response = JSON.parse(resBody); expect(response.error).toBe('User not found'); }); + + it('should handle invalid role value', async () => { + vi.mocked(authService.updateUserRole).mockImplementation(() => { + throw new Error('Invalid role'); + }); + + mockReq = createMockRequest({ + role: 'invalid-role', + }); + mockReq.headers = { authorization: 'Bearer admin-token' }; + + await routes.updateUserRole( + mockReq as IncomingMessage, + mockRes as ServerResponse, + 'user-456' + ); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('Invalid role'); + }); + + it('should handle unknown role update errors', async () => { + vi.mocked(authService.updateUserRole).mockImplementation(() => { + throw 'Unknown error'; + }); + + mockReq = createMockRequest({ + role: 'admin', + }); + mockReq.headers = { authorization: 'Bearer admin-token' }; + + await routes.updateUserRole( + mockReq as IncomingMessage, + mockRes as ServerResponse, + 'user-456' + ); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('Role update failed'); + }); }); describe('deleteUser', () => { @@ -637,5 +830,40 @@ describe('Auth Routes', () => { const response = JSON.parse(resBody); expect(response.error).toBe('User not found'); }); + + it('should handle unknown deletion errors', async () => { + vi.mocked(authService.deleteUser).mockImplementation(() => { + throw 'Unknown error'; + }); + + mockReq = {} as any; + mockReq.headers = { authorization: 'Bearer admin-token' }; + + await routes.deleteUser( + mockReq as IncomingMessage, + mockRes as ServerResponse, + 'user-456' + ); + + expect(resStatus).toBe(400); + const response = JSON.parse(resBody); + expect(response.error).toBe('User deletion failed'); + }); + }); + + describe('logout - additional cases', () => { + it('should handle unknown logout errors', async () => { + vi.mocked(authService.logout).mockRejectedValue('Unknown error'); + + mockReq = createMockRequest({ + refreshToken: 'refresh-token', + }); + + await routes.logout(mockReq as IncomingMessage, mockRes as ServerResponse); + + expect(resStatus).toBe(500); + const response = JSON.parse(resBody); + expect(response.error).toBe('Logout failed'); + }); }); }); diff --git a/tests/unit/web/routes/review-loops.test.ts b/tests/unit/web/routes/review-loops.test.ts new file mode 100644 index 0000000..4623fca --- /dev/null +++ b/tests/unit/web/routes/review-loops.test.ts @@ -0,0 +1,790 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { registerReviewLoopRoutes } from '../../../../src/web/routes/review-loops.js'; +import type { Router } from '../../../../src/web/router.js'; +import type { AgentStackConfig } from '../../../../src/types.js'; +import type { ServerResponse } from 'node:http'; +import type { ReviewLoopState } from '../../../../src/types.js'; + +// Mock dependencies +vi.mock('../../../../src/coordination/review-loop.js', () => ({ + createReviewLoop: vi.fn(), + getReviewLoop: vi.fn(), + listReviewLoops: vi.fn(), + abortReviewLoop: vi.fn(), +})); + +vi.mock('../../../../src/web/websocket/event-bridge.js', () => ({ + agentEvents: { + emit: vi.fn(), + }, +})); + +import { + createReviewLoop, + getReviewLoop, + listReviewLoops, + abortReviewLoop, +} from '../../../../src/coordination/review-loop.js'; +import { agentEvents } from '../../../../src/web/websocket/event-bridge.js'; + +describe('Review Loop Routes', () => { + let mockRouter: Router; + let mockConfig: AgentStackConfig; + let routeHandlers: Record; + let mockRes: Partial; + let resStatus: number; + let resBody: string; + + beforeEach(() => { + routeHandlers = {}; + + mockRouter = { + get: vi.fn((path: string, handler: Function) => { + routeHandlers[`GET ${path}`] = handler; + }), + post: vi.fn((path: string, handler: Function) => { + routeHandlers[`POST ${path}`] = handler; + }), + put: vi.fn(), + delete: vi.fn(), + patch: vi.fn(), + } as unknown as Router; + + mockConfig = { + version: '1.0.0', + memory: { + path: ':memory:', + defaultNamespace: 'test', + vectorSearch: { enabled: false }, + }, + providers: { llm: {}, embeddings: {} }, + agents: {}, + github: { enabled: false }, + plugins: { enabled: false, directory: 'plugins' }, + mcp: { enabled: false, servers: {} }, + hooks: { session: {}, task: {}, workflow: {} }, + }; + + resStatus = 0; + resBody = ''; + + mockRes = { + writeHead: vi.fn((status: number) => { + resStatus = status; + return mockRes as ServerResponse; + }), + end: vi.fn((body?: string) => { + if (body) resBody = body; + return mockRes as ServerResponse; + }), + }; + + registerReviewLoopRoutes(mockRouter, mockConfig); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + describe('GET /api/v1/review-loops', () => { + it('should return empty array when no loops', () => { + vi.mocked(listReviewLoops).mockReturnValue([]); + + const handler = routeHandlers['GET /api/v1/review-loops']; + handler({}, mockRes, { path: [], query: {}, body: undefined }); + + expect(resStatus).toBe(200); + const response = JSON.parse(resBody); + // sendJson wraps in { success, data, timestamp } + expect(response.data).toEqual([]); + }); + + it('should return formatted loop list with all fields', () => { + const startedAt = new Date('2024-01-01T10:00:00Z'); + const completedAt = new Date('2024-01-01T10:30:00Z'); + + vi.mocked(listReviewLoops).mockReturnValue([ + { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + sessionId: 'session-001', + iteration: 3, + maxIterations: 5, + status: 'in_progress' as const, + finalVerdict: undefined, + startedAt, + completedAt: undefined, + reviews: [{ reviewId: 'r1' }, { reviewId: 'r2' }], + codeInput: 'code', + currentCode: 'code', + } as unknown as ReviewLoopState, + { + id: 'loop-456', + coderId: 'coder-789', + adversarialId: 'adv-012', + sessionId: 'session-002', + iteration: 5, + maxIterations: 5, + status: 'completed' as const, + finalVerdict: 'approved', + startedAt, + completedAt, + reviews: [{ reviewId: 'r3' }], + codeInput: 'code2', + currentCode: 'code2', + } as unknown as ReviewLoopState, + ]); + + const handler = routeHandlers['GET /api/v1/review-loops']; + handler({}, mockRes, { path: [], query: {}, body: undefined }); + + expect(resStatus).toBe(200); + const response = JSON.parse(resBody); + expect(response.data).toHaveLength(2); + expect(response.data[0].id).toBe('loop-123'); + expect(response.data[0].coderId).toBe('coder-456'); + expect(response.data[0].reviewCount).toBe(2); + expect(response.data[1].completedAt).toBe('2024-01-01T10:30:00.000Z'); + }); + + it('should format dates as ISO strings', () => { + const startedAt = new Date('2024-06-15T14:30:00Z'); + + vi.mocked(listReviewLoops).mockReturnValue([ + { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + sessionId: 'session-001', + iteration: 1, + maxIterations: 3, + status: 'in_progress' as const, + startedAt, + reviews: [], + codeInput: 'code', + currentCode: 'code', + } as unknown as ReviewLoopState, + ]); + + const handler = routeHandlers['GET /api/v1/review-loops']; + handler({}, mockRes, { path: [], query: {}, body: undefined }); + + const response = JSON.parse(resBody); + expect(response.data[0].startedAt).toBe('2024-06-15T14:30:00.000Z'); + }); + + it('should include review count', () => { + vi.mocked(listReviewLoops).mockReturnValue([ + { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + sessionId: 'session-001', + iteration: 1, + maxIterations: 3, + status: 'in_progress' as const, + startedAt: new Date(), + reviews: [{}, {}, {}], + codeInput: 'code', + currentCode: 'code', + } as unknown as ReviewLoopState, + ]); + + const handler = routeHandlers['GET /api/v1/review-loops']; + handler({}, mockRes, { path: [], query: {}, body: undefined }); + + const response = JSON.parse(resBody); + expect(response.data[0].reviewCount).toBe(3); + }); + }); + + describe('POST /api/v1/review-loops', () => { + it('should create loop with codeInput', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(null); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'function test() {}' }, + }); + + expect(createReviewLoop).toHaveBeenCalledWith( + 'function test() {}', + mockConfig, + { maxIterations: undefined, sessionId: undefined } + ); + expect(resStatus).toBe(202); + }); + + it('should throw badRequest when codeInput missing', async () => { + const handler = routeHandlers['POST /api/v1/review-loops']; + + await expect( + handler({}, mockRes, { path: [], query: {}, body: {} }) + ).rejects.toThrow('Code input is required'); + }); + + it('should throw badRequest when body is undefined', async () => { + const handler = routeHandlers['POST /api/v1/review-loops']; + + await expect( + handler({}, mockRes, { path: [], query: {}, body: undefined }) + ).rejects.toThrow('Code input is required'); + }); + + it('should pass optional maxIterations and sessionId', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 10, + startedAt: new Date(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(null); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { + codeInput: 'function test() {}', + maxIterations: 10, + sessionId: 'my-session', + }, + }); + + expect(createReviewLoop).toHaveBeenCalledWith( + 'function test() {}', + mockConfig, + { maxIterations: 10, sessionId: 'my-session' } + ); + }); + + it('should return 202 with loop details', async () => { + const startedAt = new Date('2024-01-01T10:00:00Z'); + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt, + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(null); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + expect(resStatus).toBe(202); + const response = JSON.parse(resBody); + // sendJson wraps in { success, data, timestamp } + expect(response.data.id).toBe('loop-123'); + expect(response.data.coderId).toBe('coder-456'); + expect(response.data.startedAt).toBe('2024-01-01T10:00:00.000Z'); + }); + + it('should handle createReviewLoop failure', async () => { + vi.mocked(createReviewLoop).mockRejectedValue(new Error('Creation failed')); + + const handler = routeHandlers['POST /api/v1/review-loops']; + + await expect( + handler({}, mockRes, { path: [], query: {}, body: { codeInput: 'code' } }) + ).rejects.toThrow('Failed to create review loop'); + }); + + it('should attach event handlers when coordinator exists', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const mockCoordinator = { + on: vi.fn(), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + expect(mockCoordinator.on).toHaveBeenCalledWith('loop:start', expect.any(Function)); + expect(mockCoordinator.on).toHaveBeenCalledWith('loop:iteration', expect.any(Function)); + expect(mockCoordinator.on).toHaveBeenCalledWith('loop:review', expect.any(Function)); + expect(mockCoordinator.on).toHaveBeenCalledWith('loop:fix', expect.any(Function)); + expect(mockCoordinator.on).toHaveBeenCalledWith('loop:approved', expect.any(Function)); + expect(mockCoordinator.on).toHaveBeenCalledWith('loop:complete', expect.any(Function)); + expect(mockCoordinator.on).toHaveBeenCalledWith('loop:error', expect.any(Function)); + }); + }); + + describe('GET /api/v1/review-loops/:id', () => { + it('should return full loop details', () => { + const startedAt = new Date('2024-01-01T10:00:00Z'); + const reviewTimestamp = new Date('2024-01-01T10:15:00Z'); + + const mockState = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + sessionId: 'session-001', + iteration: 2, + maxIterations: 5, + status: 'in_progress', + codeInput: 'original code', + currentCode: 'updated code', + reviews: [ + { + reviewId: 'review-1', + verdict: 'needs_changes', + issues: [{ type: 'error', message: 'Fix this' }], + summary: 'Review summary', + timestamp: reviewTimestamp, + }, + ], + finalVerdict: undefined, + startedAt, + completedAt: undefined, + }; + + const mockCoordinator = { + getState: vi.fn().mockReturnValue(mockState), + }; + + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['GET /api/v1/review-loops/:id']; + handler({}, mockRes, { path: ['loop-123'], query: {}, body: undefined }); + + expect(resStatus).toBe(200); + const response = JSON.parse(resBody); + // sendJson wraps in { success, data, timestamp } + expect(response.data.id).toBe('loop-123'); + expect(response.data.codeInput).toBe('original code'); + expect(response.data.currentCode).toBe('updated code'); + expect(response.data.reviews).toHaveLength(1); + expect(response.data.reviews[0].timestamp).toBe('2024-01-01T10:15:00.000Z'); + }); + + it('should throw badRequest when id missing', () => { + const handler = routeHandlers['GET /api/v1/review-loops/:id']; + + expect(() => { + handler({}, mockRes, { path: [], query: {}, body: undefined }); + }).toThrow('Review loop ID is required'); + }); + + it('should throw notFound for nonexistent loop', () => { + vi.mocked(getReviewLoop).mockReturnValue(null); + + const handler = routeHandlers['GET /api/v1/review-loops/:id']; + + expect(() => { + handler({}, mockRes, { path: ['nonexistent'], query: {}, body: undefined }); + }).toThrow(); + }); + + it('should format reviews array correctly', () => { + const mockState = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + sessionId: 'session-001', + iteration: 3, + maxIterations: 5, + status: 'completed', + codeInput: 'code', + currentCode: 'code', + reviews: [ + { + reviewId: 'r1', + verdict: 'needs_changes', + issues: [{ type: 'warning', message: 'msg1' }], + summary: 'summary1', + timestamp: new Date('2024-01-01T10:00:00Z'), + }, + { + reviewId: 'r2', + verdict: 'approved', + issues: [], + summary: 'summary2', + timestamp: new Date('2024-01-01T10:30:00Z'), + }, + ], + startedAt: new Date(), + }; + + const mockCoordinator = { + getState: vi.fn().mockReturnValue(mockState), + }; + + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['GET /api/v1/review-loops/:id']; + handler({}, mockRes, { path: ['loop-123'], query: {}, body: undefined }); + + const response = JSON.parse(resBody); + // sendJson wraps in { success, data, timestamp } + expect(response.data.reviews).toHaveLength(2); + expect(response.data.reviews[0].reviewId).toBe('r1'); + expect(response.data.reviews[1].verdict).toBe('approved'); + }); + }); + + describe('POST /api/v1/review-loops/:id/abort', () => { + it('should abort existing loop successfully', () => { + vi.mocked(abortReviewLoop).mockReturnValue(true); + + const handler = routeHandlers['POST /api/v1/review-loops/:id/abort']; + handler({}, mockRes, { path: ['loop-123'], query: {}, body: undefined }); + + expect(abortReviewLoop).toHaveBeenCalledWith('loop-123'); + expect(resStatus).toBe(200); + const response = JSON.parse(resBody); + // sendJson wraps in { success, data, timestamp } + expect(response.data.loopId).toBe('loop-123'); + expect(response.data.status).toBe('aborted'); + }); + + it('should throw badRequest when id missing', () => { + const handler = routeHandlers['POST /api/v1/review-loops/:id/abort']; + + expect(() => { + handler({}, mockRes, { path: [], query: {}, body: undefined }); + }).toThrow('Review loop ID is required'); + }); + + it('should throw notFound for nonexistent loop', () => { + vi.mocked(abortReviewLoop).mockReturnValue(false); + + const handler = routeHandlers['POST /api/v1/review-loops/:id/abort']; + + expect(() => { + handler({}, mockRes, { path: ['nonexistent'], query: {}, body: undefined }); + }).toThrow(); + }); + + it('should emit review-loop:aborted event', () => { + vi.mocked(abortReviewLoop).mockReturnValue(true); + + const handler = routeHandlers['POST /api/v1/review-loops/:id/abort']; + handler({}, mockRes, { path: ['loop-123'], query: {}, body: undefined }); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:aborted', { + loopId: 'loop-123', + }); + }); + }); + + describe('event wiring', () => { + it('should emit review-loop:start event when loop:start fires', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const eventHandlers: Record = {}; + const mockCoordinator = { + on: vi.fn((event: string, handler: Function) => { + eventHandlers[event] = handler; + }), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + // Simulate the loop:start event + const mockState = { id: 'loop-123', status: 'in_progress' }; + eventHandlers['loop:start'](mockState); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:start', { + loopId: 'loop-123', + state: mockState, + }); + }); + + it('should emit review-loop:iteration event when loop:iteration fires', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const eventHandlers: Record = {}; + const mockCoordinator = { + on: vi.fn((event: string, handler: Function) => { + eventHandlers[event] = handler; + }), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + const mockState = { id: 'loop-123', status: 'in_progress' }; + eventHandlers['loop:iteration'](2, mockState); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:iteration', { + loopId: 'loop-123', + iteration: 2, + state: mockState, + }); + }); + + it('should emit review-loop:error event when loop:error fires', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const eventHandlers: Record = {}; + const mockCoordinator = { + on: vi.fn((event: string, handler: Function) => { + eventHandlers[event] = handler; + }), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + const mockState = { id: 'loop-123', status: 'error' }; + const mockError = new Error('Something went wrong'); + eventHandlers['loop:error'](mockError, mockState); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:error', { + loopId: 'loop-123', + error: 'Something went wrong', + state: mockState, + }); + }); + + it('should emit review-loop:complete event when loop:complete fires', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const eventHandlers: Record = {}; + const mockCoordinator = { + on: vi.fn((event: string, handler: Function) => { + eventHandlers[event] = handler; + }), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + const mockState = { id: 'loop-123', status: 'completed' }; + eventHandlers['loop:complete'](mockState); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:complete', { + loopId: 'loop-123', + state: mockState, + }); + }); + + it('should emit review-loop:fix event when loop:fix fires', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const eventHandlers: Record = {}; + const mockCoordinator = { + on: vi.fn((event: string, handler: Function) => { + eventHandlers[event] = handler; + }), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + const mockState = { id: 'loop-123', status: 'in_progress' }; + const mockIssues = [{ type: 'error', message: 'Fix this bug' }]; + eventHandlers['loop:fix'](2, mockIssues, mockState); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:fix', { + loopId: 'loop-123', + iteration: 2, + issues: mockIssues, + state: mockState, + }); + }); + + it('should emit review-loop:approved event when loop:approved fires', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const eventHandlers: Record = {}; + const mockCoordinator = { + on: vi.fn((event: string, handler: Function) => { + eventHandlers[event] = handler; + }), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + const mockState = { id: 'loop-123', status: 'approved' }; + eventHandlers['loop:approved'](mockState); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:approved', { + loopId: 'loop-123', + state: mockState, + }); + }); + + it('should emit review-loop:review event when loop:review fires', async () => { + const mockLoop = { + id: 'loop-123', + coderId: 'coder-456', + adversarialId: 'adv-789', + status: 'in_progress', + iteration: 1, + maxIterations: 3, + startedAt: new Date(), + }; + + const eventHandlers: Record = {}; + const mockCoordinator = { + on: vi.fn((event: string, handler: Function) => { + eventHandlers[event] = handler; + }), + getState: vi.fn(), + }; + + vi.mocked(createReviewLoop).mockResolvedValue(mockLoop as any); + vi.mocked(getReviewLoop).mockReturnValue(mockCoordinator as any); + + const handler = routeHandlers['POST /api/v1/review-loops']; + await handler({}, mockRes, { + path: [], + query: {}, + body: { codeInput: 'code' }, + }); + + const mockState = { id: 'loop-123', status: 'in_progress' }; + const mockResult = { verdict: 'needs_changes', issues: [] }; + eventHandlers['loop:review'](mockResult, mockState); + + expect(agentEvents.emit).toHaveBeenCalledWith('review-loop:review', { + loopId: 'loop-123', + result: mockResult, + state: mockState, + }); + }); + }); +}); From 35902fe854062c59586548808603e70c0c5686a5 Mon Sep 17 00:00:00 2001 From: Alessio Rocchi Date: Thu, 29 Jan 2026 01:29:45 +0100 Subject: [PATCH 2/3] feat: add consensus checkpoints for high-stakes tasks (#5) Add a consensus checkpoint system that requires validation before high/medium risk tasks can spawn subtasks, preventing self-reinforcing agent cycles. Features: - ConsensusService with configurable risk levels and reviewer strategies - Database schema for checkpoints and audit events - REST API endpoints for checkpoint management - MCP tools for consensus operations - WebSocket events for real-time notifications - Integration with task_create to block high-risk subtasks Configuration options: - enabled: toggle consensus system - requireForRiskLevels: ['high', 'medium'] by default - reviewerStrategy: 'adversarial', 'different-model', or 'human' - timeout: checkpoint expiration (default 5 minutes) - maxDepth: maximum task depth before requiring consensus Co-Authored-By: Claude Opus 4.5 --- src/mcp/server.ts | 6 +- src/mcp/tools/task-tools.ts | 351 ++++++++++- src/memory/index.ts | 14 +- src/memory/sqlite-store.ts | 360 ++++++++++- src/tasks/consensus-service.ts | 514 +++++++++++++++ src/types.ts | 53 ++ src/utils/config.ts | 10 + src/web/routes/consensus.ts | 288 +++++++++ src/web/routes/index.ts | 1 + src/web/server.ts | 3 + src/web/websocket/event-bridge.ts | 33 +- tests/unit/consensus-service.test.ts | 797 ++++++++++++++++++++++++ tests/unit/web/consensus-routes.test.ts | 408 ++++++++++++ 13 files changed, 2826 insertions(+), 12 deletions(-) create mode 100644 src/tasks/consensus-service.ts create mode 100644 src/web/routes/consensus.ts create mode 100644 tests/unit/consensus-service.test.ts create mode 100644 tests/unit/web/consensus-routes.test.ts diff --git a/src/mcp/server.ts b/src/mcp/server.ts index c0ac7f7..c98d6f1 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -21,6 +21,7 @@ import { createGitHubTools, } from './tools/index.js'; import { DriftDetectionService } from '../tasks/drift-detection-service.js'; +import { ConsensusService } from '../tasks/consensus-service.js'; const log = logger.child('mcp'); @@ -37,11 +38,13 @@ export class MCPServer { private memory: MemoryManager; private config: AgentStackConfig; private driftService: DriftDetectionService; + private consensusService: ConsensusService; constructor(config: AgentStackConfig) { this.config = config; this.memory = new MemoryManager(config); this.driftService = new DriftDetectionService(this.memory.getStore(), config); + this.consensusService = new ConsensusService(this.memory.getStore(), config); this.server = new Server( { @@ -65,7 +68,7 @@ export class MCPServer { createAgentTools(this.config), createIdentityTools(this.config), createMemoryTools(this.memory), - createTaskTools(this.memory, this.driftService), + createTaskTools(this.memory, this.driftService, this.consensusService), createSessionTools(this.memory), createSystemTools(this.memory, this.config), createGitHubTools(this.config), @@ -150,6 +153,7 @@ export class MCPServer { async stop(): Promise { await this.server.close(); + this.consensusService.destroy(); this.memory.close(); log.info('MCP server stopped'); } diff --git a/src/mcp/tools/task-tools.ts b/src/mcp/tools/task-tools.ts index d9fd83e..b065cdf 100644 --- a/src/mcp/tools/task-tools.ts +++ b/src/mcp/tools/task-tools.ts @@ -3,8 +3,11 @@ */ import { z } from 'zod'; +import { randomUUID } from 'node:crypto'; import type { MemoryManager } from '../../memory/index.js'; import type { DriftDetectionService } from '../../tasks/drift-detection-service.js'; +import type { ConsensusService } from '../../tasks/consensus-service.js'; +import type { TaskRiskLevel, ProposedSubtask } from '../../types.js'; // Input schemas const CreateInputSchema = z.object({ @@ -12,6 +15,7 @@ const CreateInputSchema = z.object({ input: z.string().optional().describe('Task input/description'), sessionId: z.string().uuid().optional().describe('Session to associate with'), parentTaskId: z.string().uuid().optional().describe('Parent task ID for drift detection'), + riskLevel: z.enum(['low', 'medium', 'high']).optional().describe('Risk level for consensus checking'), }); const AssignInputSchema = z.object({ @@ -45,11 +49,15 @@ const GetRelationshipsInputSchema = z.object({ direction: z.enum(['outgoing', 'incoming', 'both']).optional().describe('Relationship direction'), }); -export function createTaskTools(memory: MemoryManager, driftService?: DriftDetectionService) { +export function createTaskTools( + memory: MemoryManager, + driftService?: DriftDetectionService, + consensusService?: ConsensusService +) { return { task_create: { name: 'task_create', - description: 'Create a new task with optional drift detection', + description: 'Create a new task with optional drift detection and consensus checking', inputSchema: { type: 'object', properties: { @@ -57,6 +65,7 @@ export function createTaskTools(memory: MemoryManager, driftService?: DriftDetec input: { type: 'string', description: 'Task input/description' }, sessionId: { type: 'string', description: 'Session to associate with' }, parentTaskId: { type: 'string', description: 'Parent task ID for drift detection' }, + riskLevel: { type: 'string', enum: ['low', 'medium', 'high'], description: 'Risk level for consensus checking' }, }, required: ['agentType'], }, @@ -89,10 +98,74 @@ export function createTaskTools(memory: MemoryManager, driftService?: DriftDetec } } + // Check for consensus if service is available + if (consensusService && consensusService.isEnabled()) { + // Estimate or use provided risk level + const riskLevel: TaskRiskLevel = input.riskLevel || + consensusService.estimateRiskLevel(input.agentType, input.input); + + // Calculate depth + const depth = consensusService.calculateTaskDepth(input.parentTaskId); + + // Check if consensus is required + const consensusCheck = consensusService.requiresConsensus( + riskLevel, + depth, + input.parentTaskId + ); + + if (consensusCheck.requiresConsensus) { + // Create a checkpoint instead of the task + const subtaskId = randomUUID(); + const proposedSubtask: ProposedSubtask = { + id: subtaskId, + agentType: input.agentType, + input: input.input || '', + estimatedRiskLevel: riskLevel, + parentTaskId: input.parentTaskId || '', + }; + + // Create a temporary task ID for the checkpoint + const pendingTaskId = randomUUID(); + + const checkpoint = consensusService.createCheckpoint({ + taskId: pendingTaskId, + parentTaskId: input.parentTaskId, + proposedSubtasks: [proposedSubtask], + riskLevel, + }); + + return { + success: false, + consensusRequired: true, + checkpointId: checkpoint.id, + status: 'pending', + reason: consensusCheck.reason, + checkpoint: { + id: checkpoint.id, + taskId: checkpoint.taskId, + riskLevel: checkpoint.riskLevel, + status: checkpoint.status, + expiresAt: checkpoint.expiresAt.toISOString(), + }, + }; + } + } + + // Get risk level and depth for task creation + const riskLevel: TaskRiskLevel | undefined = input.riskLevel || + (consensusService ? consensusService.estimateRiskLevel(input.agentType, input.input) : undefined); + const depth = consensusService?.calculateTaskDepth(input.parentTaskId); + const task = memory.createTask( input.agentType, input.input, - input.sessionId + input.sessionId, + { + riskLevel, + parentTaskId: input.parentTaskId, + depth, + } ); // Index the task for future drift detection @@ -118,6 +191,8 @@ export function createTaskTools(memory: MemoryManager, driftService?: DriftDetec createdAt: task.createdAt.toISOString(), sessionId: task.sessionId, parentTaskId: input.parentTaskId, + riskLevel: task.riskLevel, + depth: task.depth, }, drift: driftResult ? { isDrift: driftResult.isDrift, @@ -429,5 +504,275 @@ export function createTaskTools(memory: MemoryManager, driftService?: DriftDetec } }, }, + + // Consensus tools + consensus_check: { + name: 'consensus_check', + description: 'Check if a task would require consensus before creation', + inputSchema: { + type: 'object', + properties: { + agentType: { type: 'string', description: 'Agent type for this task' }, + input: { type: 'string', description: 'Task input/description' }, + parentTaskId: { type: 'string', description: 'Parent task ID' }, + riskLevel: { type: 'string', enum: ['low', 'medium', 'high'], description: 'Risk level override' }, + }, + required: ['agentType'], + }, + handler: async (params: Record) => { + if (!consensusService) { + return { + success: false, + error: 'Consensus service not available', + }; + } + + try { + const agentType = params.agentType as string; + const input = params.input as string | undefined; + const parentTaskId = params.parentTaskId as string | undefined; + const riskLevelOverride = params.riskLevel as TaskRiskLevel | undefined; + + const riskLevel = riskLevelOverride || + consensusService.estimateRiskLevel(agentType, input); + const depth = consensusService.calculateTaskDepth(parentTaskId); + const result = consensusService.requiresConsensus(riskLevel, depth, parentTaskId); + + return { + success: true, + requiresConsensus: result.requiresConsensus, + reason: result.reason, + riskLevel, + depth, + config: { + enabled: consensusService.isEnabled(), + requireForRiskLevels: consensusService.getConfig().requireForRiskLevels, + maxDepth: consensusService.getConfig().maxDepth, + }, + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }, + + consensus_list_pending: { + name: 'consensus_list_pending', + description: 'List pending consensus checkpoints', + inputSchema: { + type: 'object', + properties: { + limit: { type: 'number', description: 'Max checkpoints to return' }, + offset: { type: 'number', description: 'Offset for pagination' }, + }, + }, + handler: async (params: Record) => { + if (!consensusService) { + return { + success: false, + error: 'Consensus service not available', + }; + } + + try { + const limit = params.limit as number | undefined; + const offset = params.offset as number | undefined; + const checkpoints = consensusService.listPendingCheckpoints({ limit, offset }); + + return { + success: true, + count: checkpoints.length, + checkpoints: checkpoints.map(cp => ({ + id: cp.id, + taskId: cp.taskId, + parentTaskId: cp.parentTaskId, + riskLevel: cp.riskLevel, + status: cp.status, + reviewerStrategy: cp.reviewerStrategy, + subtaskCount: cp.proposedSubtasks.length, + createdAt: cp.createdAt.toISOString(), + expiresAt: cp.expiresAt.toISOString(), + })), + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }, + + consensus_get: { + name: 'consensus_get', + description: 'Get a consensus checkpoint by ID', + inputSchema: { + type: 'object', + properties: { + checkpointId: { type: 'string', description: 'Checkpoint ID' }, + }, + required: ['checkpointId'], + }, + handler: async (params: Record) => { + if (!consensusService) { + return { + success: false, + error: 'Consensus service not available', + }; + } + + try { + const checkpointId = params.checkpointId as string; + const checkpoint = consensusService.getCheckpoint(checkpointId); + + if (!checkpoint) { + return { + success: false, + error: 'Checkpoint not found', + }; + } + + return { + success: true, + checkpoint: { + id: checkpoint.id, + taskId: checkpoint.taskId, + parentTaskId: checkpoint.parentTaskId, + proposedSubtasks: checkpoint.proposedSubtasks, + riskLevel: checkpoint.riskLevel, + status: checkpoint.status, + reviewerStrategy: checkpoint.reviewerStrategy, + reviewerId: checkpoint.reviewerId, + reviewerType: checkpoint.reviewerType, + decision: checkpoint.decision, + createdAt: checkpoint.createdAt.toISOString(), + expiresAt: checkpoint.expiresAt.toISOString(), + decidedAt: checkpoint.decidedAt?.toISOString(), + }, + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }, + + consensus_approve: { + name: 'consensus_approve', + description: 'Approve a consensus checkpoint', + inputSchema: { + type: 'object', + properties: { + checkpointId: { type: 'string', description: 'Checkpoint ID' }, + reviewedBy: { type: 'string', description: 'Reviewer ID' }, + feedback: { type: 'string', description: 'Optional feedback' }, + }, + required: ['checkpointId', 'reviewedBy'], + }, + handler: async (params: Record) => { + if (!consensusService) { + return { + success: false, + error: 'Consensus service not available', + }; + } + + try { + const checkpointId = params.checkpointId as string; + const reviewedBy = params.reviewedBy as string; + const feedback = params.feedback as string | undefined; + + const result = consensusService.approveCheckpoint(checkpointId, reviewedBy, feedback); + + if (!result.success) { + return { + success: false, + error: result.error, + }; + } + + return { + success: true, + checkpoint: result.checkpoint ? { + id: result.checkpoint.id, + status: result.checkpoint.status, + decidedAt: result.checkpoint.decidedAt?.toISOString(), + } : undefined, + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }, + + consensus_reject: { + name: 'consensus_reject', + description: 'Reject a consensus checkpoint', + inputSchema: { + type: 'object', + properties: { + checkpointId: { type: 'string', description: 'Checkpoint ID' }, + reviewedBy: { type: 'string', description: 'Reviewer ID' }, + feedback: { type: 'string', description: 'Rejection reason' }, + rejectedSubtaskIds: { + type: 'array', + items: { type: 'string' }, + description: 'Specific subtask IDs to reject (partial rejection)', + }, + }, + required: ['checkpointId', 'reviewedBy'], + }, + handler: async (params: Record) => { + if (!consensusService) { + return { + success: false, + error: 'Consensus service not available', + }; + } + + try { + const checkpointId = params.checkpointId as string; + const reviewedBy = params.reviewedBy as string; + const feedback = params.feedback as string | undefined; + const rejectedSubtaskIds = params.rejectedSubtaskIds as string[] | undefined; + + const result = consensusService.rejectCheckpoint( + checkpointId, + reviewedBy, + feedback, + rejectedSubtaskIds + ); + + if (!result.success) { + return { + success: false, + error: result.error, + }; + } + + return { + success: true, + checkpoint: result.checkpoint ? { + id: result.checkpoint.id, + status: result.checkpoint.status, + decidedAt: result.checkpoint.decidedAt?.toISOString(), + } : undefined, + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } + }, + }, }; } diff --git a/src/memory/index.ts b/src/memory/index.ts index e76c806..1385360 100644 --- a/src/memory/index.ts +++ b/src/memory/index.ts @@ -515,8 +515,18 @@ export class MemoryManager { // ==================== Task Operations ==================== - createTask(agentType: string, input?: string, sessionId?: string): Task { - return this.sqliteStore.createTask(agentType, input, sessionId); + createTask( + agentType: string, + input?: string, + sessionId?: string, + options?: { + riskLevel?: 'low' | 'medium' | 'high'; + parentTaskId?: string; + depth?: number; + consensusCheckpointId?: string; + } + ): Task { + return this.sqliteStore.createTask(agentType, input, sessionId, options); } getTask(id: string): Task | null { diff --git a/src/memory/sqlite-store.ts b/src/memory/sqlite-store.ts index 836f10b..0c1777f 100644 --- a/src/memory/sqlite-store.ts +++ b/src/memory/sqlite-store.ts @@ -33,6 +33,12 @@ import type { ResourceExhaustionEvent, ResourceExhaustionAction, ResourceThresholds, + ConsensusCheckpoint, + ConsensusStatus, + TaskRiskLevel, + ReviewerStrategy, + ProposedSubtask, + ConsensusDecision, } from '../types.js'; import { logger } from '../utils/logger.js'; @@ -162,7 +168,13 @@ CREATE TABLE IF NOT EXISTS tasks ( output TEXT, created_at INTEGER NOT NULL, completed_at INTEGER, - FOREIGN KEY (session_id) REFERENCES sessions(id) + risk_level TEXT CHECK (risk_level IS NULL OR risk_level IN ('low', 'medium', 'high')), + parent_task_id TEXT, + depth INTEGER, + consensus_checkpoint_id TEXT, + FOREIGN KEY (session_id) REFERENCES sessions(id), + FOREIGN KEY (parent_task_id) REFERENCES tasks(id), + FOREIGN KEY (consensus_checkpoint_id) REFERENCES consensus_checkpoints(id) ); CREATE INDEX IF NOT EXISTS idx_tasks_session ON tasks(session_id); @@ -412,6 +424,42 @@ CREATE TABLE IF NOT EXISTS resource_exhaustion_events ( CREATE INDEX IF NOT EXISTS idx_resource_exhaustion_events_agent ON resource_exhaustion_events(agent_id); CREATE INDEX IF NOT EXISTS idx_resource_exhaustion_events_created ON resource_exhaustion_events(created_at DESC); + +-- Consensus Checkpoints +CREATE TABLE IF NOT EXISTS consensus_checkpoints ( + id TEXT PRIMARY KEY, + task_id TEXT NOT NULL, + parent_task_id TEXT, + proposed_subtasks TEXT NOT NULL, + risk_level TEXT NOT NULL CHECK (risk_level IN ('low', 'medium', 'high')), + status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending', 'approved', 'rejected', 'expired')), + reviewer_strategy TEXT NOT NULL CHECK (reviewer_strategy IN ('adversarial', 'different-model', 'human')), + reviewer_id TEXT, + reviewer_type TEXT CHECK (reviewer_type IN ('agent', 'human')), + decision TEXT, + created_at INTEGER NOT NULL, + expires_at INTEGER NOT NULL, + decided_at INTEGER +); + +CREATE INDEX IF NOT EXISTS idx_consensus_checkpoints_task ON consensus_checkpoints(task_id); +CREATE INDEX IF NOT EXISTS idx_consensus_checkpoints_status ON consensus_checkpoints(status); +CREATE INDEX IF NOT EXISTS idx_consensus_checkpoints_expires ON consensus_checkpoints(expires_at); + +-- Consensus Checkpoint Events (audit log) +CREATE TABLE IF NOT EXISTS consensus_checkpoint_events ( + id TEXT PRIMARY KEY, + checkpoint_id TEXT NOT NULL, + event_type TEXT NOT NULL CHECK (event_type IN ('created', 'review_started', 'approved', 'rejected', 'expired', 'subtask_rejected')), + actor_id TEXT, + actor_type TEXT CHECK (actor_type IN ('agent', 'human', 'system')), + details TEXT, + created_at INTEGER NOT NULL, + FOREIGN KEY (checkpoint_id) REFERENCES consensus_checkpoints(id) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_consensus_events_checkpoint ON consensus_checkpoint_events(checkpoint_id); +CREATE INDEX IF NOT EXISTS idx_consensus_events_created ON consensus_checkpoint_events(created_at DESC); `; export class SQLiteStore { @@ -1221,17 +1269,33 @@ export class SQLiteStore { createTask( agentType: string, input?: string, - sessionId?: string + sessionId?: string, + options?: { + riskLevel?: TaskRiskLevel; + parentTaskId?: string; + depth?: number; + consensusCheckpointId?: string; + } ): Task { const id = randomUUID(); const now = Date.now(); this.db .prepare(` - INSERT INTO tasks (id, session_id, agent_type, status, input, created_at) - VALUES (?, ?, ?, 'pending', ?, ?) + INSERT INTO tasks (id, session_id, agent_type, status, input, created_at, risk_level, parent_task_id, depth, consensus_checkpoint_id) + VALUES (?, ?, ?, 'pending', ?, ?, ?, ?, ?, ?) `) - .run(id, sessionId ?? null, agentType, input ?? null, now); + .run( + id, + sessionId ?? null, + agentType, + input ?? null, + now, + options?.riskLevel ?? null, + options?.parentTaskId ?? null, + options?.depth ?? null, + options?.consensusCheckpointId ?? null + ); return { id, @@ -1240,6 +1304,10 @@ export class SQLiteStore { status: 'pending', input, createdAt: new Date(now), + riskLevel: options?.riskLevel, + parentTaskId: options?.parentTaskId, + depth: options?.depth, + consensusCheckpointId: options?.consensusCheckpointId, }; } @@ -1299,6 +1367,10 @@ export class SQLiteStore { output: row.output ?? undefined, createdAt: new Date(row.created_at), completedAt: row.completed_at ? new Date(row.completed_at) : undefined, + riskLevel: row.risk_level as TaskRiskLevel | undefined, + parentTaskId: row.parent_task_id ?? undefined, + depth: row.depth ?? undefined, + consensusCheckpointId: row.consensus_checkpoint_id ?? undefined, }; } @@ -2561,6 +2633,254 @@ export class SQLiteStore { }; } + // ==================== Consensus Checkpoint Operations ==================== + + /** + * Create a new consensus checkpoint + */ + createConsensusCheckpoint(options: { + id: string; + taskId: string; + parentTaskId?: string; + proposedSubtasks: ProposedSubtask[]; + riskLevel: TaskRiskLevel; + reviewerStrategy: ReviewerStrategy; + timeout: number; + }): ConsensusCheckpoint { + const now = Date.now(); + const expiresAt = now + options.timeout; + const proposedSubtasksJson = JSON.stringify(options.proposedSubtasks); + + this.db + .prepare(` + INSERT INTO consensus_checkpoints ( + id, task_id, parent_task_id, proposed_subtasks, risk_level, + status, reviewer_strategy, created_at, expires_at + ) + VALUES (?, ?, ?, ?, ?, 'pending', ?, ?, ?) + `) + .run( + options.id, + options.taskId, + options.parentTaskId ?? null, + proposedSubtasksJson, + options.riskLevel, + options.reviewerStrategy, + now, + expiresAt + ); + + // Log event + this.logConsensusEvent(options.id, 'created', undefined, undefined, { + riskLevel: options.riskLevel, + subtaskCount: options.proposedSubtasks.length, + }); + + return { + id: options.id, + taskId: options.taskId, + parentTaskId: options.parentTaskId, + proposedSubtasks: options.proposedSubtasks, + riskLevel: options.riskLevel, + status: 'pending', + reviewerStrategy: options.reviewerStrategy, + createdAt: new Date(now), + expiresAt: new Date(expiresAt), + }; + } + + /** + * Get a consensus checkpoint by ID + */ + getConsensusCheckpoint(id: string): ConsensusCheckpoint | null { + const row = this.db + .prepare('SELECT * FROM consensus_checkpoints WHERE id = ?') + .get(id) as ConsensusCheckpointRow | undefined; + + return row ? this.rowToConsensusCheckpoint(row) : null; + } + + /** + * Get consensus checkpoint by task ID + */ + getConsensusCheckpointByTaskId(taskId: string): ConsensusCheckpoint | null { + const row = this.db + .prepare('SELECT * FROM consensus_checkpoints WHERE task_id = ? ORDER BY created_at DESC LIMIT 1') + .get(taskId) as ConsensusCheckpointRow | undefined; + + return row ? this.rowToConsensusCheckpoint(row) : null; + } + + /** + * Update consensus checkpoint status + */ + updateConsensusCheckpointStatus( + id: string, + status: ConsensusStatus, + decision?: ConsensusDecision + ): boolean { + const now = Date.now(); + const decisionJson = decision ? JSON.stringify(decision) : null; + + const result = this.db + .prepare(` + UPDATE consensus_checkpoints + SET status = ?, decision = ?, decided_at = ?, + reviewer_id = ?, reviewer_type = ? + WHERE id = ? + `) + .run( + status, + decisionJson, + status !== 'pending' ? now : null, + decision?.reviewedBy ?? null, + decision?.reviewerType ?? null, + id + ); + + if (result.changes > 0) { + // Log event + const eventType = status === 'approved' ? 'approved' : + status === 'rejected' ? 'rejected' : + status === 'expired' ? 'expired' : 'created'; + this.logConsensusEvent( + id, + eventType, + decision?.reviewedBy, + decision?.reviewerType, + { feedback: decision?.feedback } + ); + } + + return result.changes > 0; + } + + /** + * List pending consensus checkpoints + */ + listPendingCheckpoints(options?: { + limit?: number; + offset?: number; + }): ConsensusCheckpoint[] { + const limit = options?.limit ?? 50; + const offset = options?.offset ?? 0; + + const rows = this.db + .prepare(` + SELECT * FROM consensus_checkpoints + WHERE status = 'pending' + ORDER BY created_at ASC + LIMIT ? OFFSET ? + `) + .all(limit, offset) as ConsensusCheckpointRow[]; + + return rows.map(row => this.rowToConsensusCheckpoint(row)); + } + + /** + * Expire old checkpoints + */ + expireOldCheckpoints(): number { + const now = Date.now(); + + // Get checkpoints to expire + const toExpire = this.db + .prepare(` + SELECT id FROM consensus_checkpoints + WHERE status = 'pending' AND expires_at < ? + `) + .all(now) as Array<{ id: string }>; + + // Update status + const result = this.db + .prepare(` + UPDATE consensus_checkpoints + SET status = 'expired', decided_at = ? + WHERE status = 'pending' AND expires_at < ? + `) + .run(now, now); + + // Log events for each expired checkpoint + for (const checkpoint of toExpire) { + this.logConsensusEvent(checkpoint.id, 'expired', undefined, 'system'); + } + + return result.changes; + } + + /** + * Get checkpoint events (audit log) + */ + getConsensusCheckpointEvents(checkpointId: string, limit: number = 100): Array<{ + id: string; + checkpointId: string; + eventType: string; + actorId?: string; + actorType?: string; + details?: Record; + createdAt: Date; + }> { + const rows = this.db + .prepare(` + SELECT * FROM consensus_checkpoint_events + WHERE checkpoint_id = ? + ORDER BY created_at DESC + LIMIT ? + `) + .all(checkpointId, limit) as ConsensusCheckpointEventRow[]; + + return rows.map(row => ({ + id: row.id, + checkpointId: row.checkpoint_id, + eventType: row.event_type, + actorId: row.actor_id ?? undefined, + actorType: row.actor_type ?? undefined, + details: row.details ? JSON.parse(row.details) as Record : undefined, + createdAt: new Date(row.created_at), + })); + } + + /** + * Log a consensus checkpoint event + */ + private logConsensusEvent( + checkpointId: string, + eventType: 'created' | 'review_started' | 'approved' | 'rejected' | 'expired' | 'subtask_rejected', + actorId?: string, + actorType?: 'agent' | 'human' | 'system', + details?: Record + ): void { + const id = randomUUID(); + const detailsJson = details ? JSON.stringify(details) : null; + + this.db + .prepare(` + INSERT INTO consensus_checkpoint_events ( + id, checkpoint_id, event_type, actor_id, actor_type, details, created_at + ) + VALUES (?, ?, ?, ?, ?, ?, ?) + `) + .run(id, checkpointId, eventType, actorId ?? null, actorType ?? null, detailsJson, Date.now()); + } + + private rowToConsensusCheckpoint(row: ConsensusCheckpointRow): ConsensusCheckpoint { + return { + id: row.id, + taskId: row.task_id, + parentTaskId: row.parent_task_id ?? undefined, + proposedSubtasks: JSON.parse(row.proposed_subtasks) as ProposedSubtask[], + riskLevel: row.risk_level as TaskRiskLevel, + status: row.status as ConsensusStatus, + reviewerStrategy: row.reviewer_strategy as ReviewerStrategy, + reviewerId: row.reviewer_id ?? undefined, + reviewerType: row.reviewer_type as 'agent' | 'human' | undefined, + decision: row.decision ? JSON.parse(row.decision) as ConsensusDecision : undefined, + createdAt: new Date(row.created_at), + expiresAt: new Date(row.expires_at), + decidedAt: row.decided_at ? new Date(row.decided_at) : undefined, + }; + } + // ==================== Cleanup ==================== /** @@ -2621,6 +2941,10 @@ interface TaskRow { output: string | null; created_at: number; completed_at: number | null; + risk_level: string | null; + parent_task_id: string | null; + depth: number | null; + consensus_checkpoint_id: string | null; } interface ProjectRow { @@ -2730,3 +3054,29 @@ interface ResourceExhaustionEventRow { triggered_by: string; created_at: number; } + +interface ConsensusCheckpointRow { + id: string; + task_id: string; + parent_task_id: string | null; + proposed_subtasks: string; + risk_level: string; + status: string; + reviewer_strategy: string; + reviewer_id: string | null; + reviewer_type: string | null; + decision: string | null; + created_at: number; + expires_at: number; + decided_at: number | null; +} + +interface ConsensusCheckpointEventRow { + id: string; + checkpoint_id: string; + event_type: string; + actor_id: string | null; + actor_type: string | null; + details: string | null; + created_at: number; +} diff --git a/src/tasks/consensus-service.ts b/src/tasks/consensus-service.ts new file mode 100644 index 0000000..879a372 --- /dev/null +++ b/src/tasks/consensus-service.ts @@ -0,0 +1,514 @@ +/** + * Consensus Service + * Manages consensus checkpoints for high-stakes tasks, preventing self-reinforcing agent cycles + */ + +import { randomUUID } from 'node:crypto'; +import type { SQLiteStore } from '../memory/sqlite-store.js'; +import type { + AgentStackConfig, + ConsensusConfig, + ConsensusCheckpoint, + ConsensusDecision, + ConsensusStatus, + ProposedSubtask, + TaskRiskLevel, + ReviewerStrategy, + Task, +} from '../types.js'; +import { logger } from '../utils/logger.js'; + +const log = logger.child('consensus'); + +const DEFAULT_CONFIG: ConsensusConfig = { + enabled: false, + requireForRiskLevels: ['high', 'medium'], + reviewerStrategy: 'adversarial', + timeout: 300000, // 5 minutes + maxDepth: 5, + autoReject: false, +}; + +export interface ConsensusCheckResult { + requiresConsensus: boolean; + reason?: string; + riskLevel?: TaskRiskLevel; + depth?: number; +} + +export interface CreateCheckpointOptions { + taskId: string; + parentTaskId?: string; + proposedSubtasks: ProposedSubtask[]; + riskLevel: TaskRiskLevel; +} + +export class ConsensusService { + private store: SQLiteStore; + private config: ConsensusConfig; + private expirationInterval: NodeJS.Timeout | null = null; + + constructor(store: SQLiteStore, appConfig: AgentStackConfig) { + this.store = store; + this.config = { ...DEFAULT_CONFIG, ...appConfig.consensus }; + + log.debug('Consensus service initialized', { + enabled: this.config.enabled, + requireForRiskLevels: this.config.requireForRiskLevels, + reviewerStrategy: this.config.reviewerStrategy, + timeout: this.config.timeout, + maxDepth: this.config.maxDepth, + }); + + // Start expiration check if enabled + if (this.config.enabled) { + this.startExpirationCheck(); + } + } + + /** + * Check if consensus is enabled + */ + isEnabled(): boolean { + return this.config.enabled; + } + + /** + * Get the current configuration + */ + getConfig(): ConsensusConfig { + return { ...this.config }; + } + + /** + * Check if a task requires consensus before creating subtasks + */ + requiresConsensus( + riskLevel: TaskRiskLevel, + depth: number, + parentTaskId?: string + ): ConsensusCheckResult { + // Disabled - no consensus required + if (!this.config.enabled) { + return { requiresConsensus: false }; + } + + // Check if risk level requires consensus + if (!this.config.requireForRiskLevels.includes(riskLevel)) { + return { + requiresConsensus: false, + reason: `Risk level '${riskLevel}' does not require consensus`, + riskLevel, + depth, + }; + } + + // Check depth limit + if (depth > this.config.maxDepth) { + return { + requiresConsensus: true, + reason: `Task depth ${depth} exceeds maximum allowed depth ${this.config.maxDepth}`, + riskLevel, + depth, + }; + } + + // Root tasks (no parent) don't require consensus + if (!parentTaskId) { + return { + requiresConsensus: false, + reason: 'Root tasks do not require consensus', + riskLevel, + depth, + }; + } + + return { + requiresConsensus: true, + reason: `Risk level '${riskLevel}' at depth ${depth} requires consensus`, + riskLevel, + depth, + }; + } + + /** + * Create a consensus checkpoint + */ + createCheckpoint(options: CreateCheckpointOptions): ConsensusCheckpoint { + const checkpointId = randomUUID(); + + log.info('Creating consensus checkpoint', { + checkpointId, + taskId: options.taskId, + riskLevel: options.riskLevel, + subtaskCount: options.proposedSubtasks.length, + }); + + const checkpoint = this.store.createConsensusCheckpoint({ + id: checkpointId, + taskId: options.taskId, + parentTaskId: options.parentTaskId, + proposedSubtasks: options.proposedSubtasks, + riskLevel: options.riskLevel, + reviewerStrategy: this.config.reviewerStrategy, + timeout: this.config.timeout, + }); + + return checkpoint; + } + + /** + * Get a checkpoint by ID + */ + getCheckpoint(checkpointId: string): ConsensusCheckpoint | null { + return this.store.getConsensusCheckpoint(checkpointId); + } + + /** + * Get checkpoint by task ID + */ + getCheckpointByTaskId(taskId: string): ConsensusCheckpoint | null { + return this.store.getConsensusCheckpointByTaskId(taskId); + } + + /** + * Start agent review process for a checkpoint + * Returns information about the reviewer that should be spawned + */ + startAgentReview(checkpointId: string): { + success: boolean; + checkpoint?: ConsensusCheckpoint; + reviewerConfig?: { + agentType: string; + prompt: string; + checkpointId: string; + }; + error?: string; + } { + const checkpoint = this.store.getConsensusCheckpoint(checkpointId); + + if (!checkpoint) { + return { success: false, error: 'Checkpoint not found' }; + } + + if (checkpoint.status !== 'pending') { + return { success: false, error: `Checkpoint is already ${checkpoint.status}` }; + } + + // Determine reviewer type based on strategy + let agentType = 'adversarial'; + if (checkpoint.reviewerStrategy === 'different-model') { + // Could implement model selection logic here + agentType = 'reviewer'; + } + + const subtaskSummary = checkpoint.proposedSubtasks + .map((s, i) => `${i + 1}. [${s.agentType}] ${s.input.substring(0, 100)}...`) + .join('\n'); + + const prompt = ` +You are reviewing a consensus checkpoint for high-stakes task execution. + +**Risk Level:** ${checkpoint.riskLevel} +**Task ID:** ${checkpoint.taskId} +**Parent Task ID:** ${checkpoint.parentTaskId || 'None (root task)'} + +**Proposed Subtasks:** +${subtaskSummary} + +Please analyze these subtasks for: +1. Scope creep or mission drift from the parent task +2. Potential for infinite recursion or self-reinforcing loops +3. Appropriate risk assessment +4. Security or safety concerns + +Respond with your decision in this JSON format: +{ + "approved": true/false, + "rejectedSubtaskIds": ["id1", "id2"], // optional - only if partially approving + "feedback": "Your detailed reasoning" +} +`.trim(); + + log.info('Starting agent review', { + checkpointId, + agentType, + reviewerStrategy: checkpoint.reviewerStrategy, + }); + + return { + success: true, + checkpoint, + reviewerConfig: { + agentType, + prompt, + checkpointId, + }, + }; + } + + /** + * Submit a decision for a checkpoint + */ + submitDecision( + checkpointId: string, + decision: ConsensusDecision + ): { + success: boolean; + checkpoint?: ConsensusCheckpoint; + error?: string; + } { + const checkpoint = this.store.getConsensusCheckpoint(checkpointId); + + if (!checkpoint) { + return { success: false, error: 'Checkpoint not found' }; + } + + if (checkpoint.status !== 'pending') { + return { success: false, error: `Checkpoint is already ${checkpoint.status}` }; + } + + // Check if checkpoint has expired + if (new Date() > checkpoint.expiresAt) { + this.store.updateConsensusCheckpointStatus(checkpointId, 'expired'); + return { success: false, error: 'Checkpoint has expired' }; + } + + const newStatus: ConsensusStatus = decision.approved ? 'approved' : 'rejected'; + + log.info('Decision submitted', { + checkpointId, + approved: decision.approved, + reviewedBy: decision.reviewedBy, + reviewerType: decision.reviewerType, + rejectedSubtasks: decision.rejectedSubtaskIds?.length ?? 0, + }); + + const success = this.store.updateConsensusCheckpointStatus( + checkpointId, + newStatus, + decision + ); + + if (!success) { + return { success: false, error: 'Failed to update checkpoint status' }; + } + + const updatedCheckpoint = this.store.getConsensusCheckpoint(checkpointId); + return { success: true, checkpoint: updatedCheckpoint ?? undefined }; + } + + /** + * Approve a checkpoint (convenience method for human approval) + */ + approveCheckpoint( + checkpointId: string, + reviewedBy: string, + feedback?: string + ): { + success: boolean; + checkpoint?: ConsensusCheckpoint; + error?: string; + } { + return this.submitDecision(checkpointId, { + approved: true, + feedback, + reviewedBy, + reviewerType: 'human', + }); + } + + /** + * Reject a checkpoint (convenience method for human rejection) + */ + rejectCheckpoint( + checkpointId: string, + reviewedBy: string, + feedback?: string, + rejectedSubtaskIds?: string[] + ): { + success: boolean; + checkpoint?: ConsensusCheckpoint; + error?: string; + } { + return this.submitDecision(checkpointId, { + approved: false, + rejectedSubtaskIds, + feedback, + reviewedBy, + reviewerType: 'human', + }); + } + + /** + * List pending checkpoints + */ + listPendingCheckpoints(options?: { + limit?: number; + offset?: number; + }): ConsensusCheckpoint[] { + return this.store.listPendingCheckpoints(options); + } + + /** + * Get checkpoint events (audit log) + */ + getCheckpointEvents(checkpointId: string, limit?: number) { + return this.store.getConsensusCheckpointEvents(checkpointId, limit); + } + + /** + * Expire old checkpoints + */ + expireCheckpoints(): number { + const count = this.store.expireOldCheckpoints(); + if (count > 0) { + log.info('Expired checkpoints', { count }); + } + return count; + } + + /** + * Calculate task depth from parent chain + */ + calculateTaskDepth(parentTaskId?: string): number { + if (!parentTaskId) { + return 0; + } + + let depth = 0; + let currentTaskId: string | undefined = parentTaskId; + + while (currentTaskId && depth < 100) { // Safety limit + const task = this.store.getTask(currentTaskId); + if (!task) break; + + depth++; + currentTaskId = task.parentTaskId; + } + + return depth; + } + + /** + * Estimate risk level based on agent type and input + */ + estimateRiskLevel(agentType: string, input?: string): TaskRiskLevel { + // High-risk agent types + const highRiskAgents = ['coder', 'devops', 'security-auditor']; + if (highRiskAgents.includes(agentType)) { + return 'high'; + } + + // Medium-risk agent types + const mediumRiskAgents = ['architect', 'coordinator', 'analyst']; + if (mediumRiskAgents.includes(agentType)) { + return 'medium'; + } + + // Check input for high-risk patterns + if (input) { + const loweredInput = input.toLowerCase(); + const highRiskPatterns = [ + 'delete', 'remove', 'drop', 'deploy', 'production', + 'credentials', 'secret', 'password', 'token', 'api key', + ]; + + for (const pattern of highRiskPatterns) { + if (loweredInput.includes(pattern)) { + return 'high'; + } + } + + const mediumRiskPatterns = [ + 'modify', 'update', 'change', 'configure', 'install', + ]; + + for (const pattern of mediumRiskPatterns) { + if (loweredInput.includes(pattern)) { + return 'medium'; + } + } + } + + return 'low'; + } + + /** + * Get approved subtasks from a checkpoint (filters out rejected ones) + */ + getApprovedSubtasks(checkpointId: string): ProposedSubtask[] { + const checkpoint = this.store.getConsensusCheckpoint(checkpointId); + + if (!checkpoint || checkpoint.status !== 'approved') { + return []; + } + + const rejectedIds = new Set(checkpoint.decision?.rejectedSubtaskIds ?? []); + + return checkpoint.proposedSubtasks.filter(s => !rejectedIds.has(s.id)); + } + + /** + * Start periodic expiration check + */ + private startExpirationCheck(): void { + // Check every minute + this.expirationInterval = setInterval(() => { + this.expireCheckpoints(); + }, 60000); + } + + /** + * Stop periodic expiration check + */ + stopExpirationCheck(): void { + if (this.expirationInterval) { + clearInterval(this.expirationInterval); + this.expirationInterval = null; + } + } + + /** + * Cleanup + */ + destroy(): void { + this.stopExpirationCheck(); + } +} + +// Singleton instance management +let consensusService: ConsensusService | null = null; +let lastConfig: string | null = null; + +/** + * Get the consensus service singleton + */ +export function getConsensusService( + store: SQLiteStore, + config: AgentStackConfig, + forceNew = false +): ConsensusService { + const configHash = JSON.stringify(config.consensus); + + if (forceNew || !consensusService || lastConfig !== configHash) { + if (consensusService) { + consensusService.destroy(); + } + consensusService = new ConsensusService(store, config); + lastConfig = configHash; + } + + return consensusService; +} + +/** + * Reset the consensus service singleton + */ +export function resetConsensusService(): void { + if (consensusService) { + consensusService.destroy(); + consensusService = null; + lastConfig = null; + } +} diff --git a/src/types.ts b/src/types.ts index c424859..073c622 100644 --- a/src/types.ts +++ b/src/types.ts @@ -126,10 +126,62 @@ export interface Task { output?: string; createdAt: Date; completedAt?: Date; + riskLevel?: TaskRiskLevel; + consensusCheckpointId?: string; + parentTaskId?: string; + depth?: number; } export type TaskStatus = 'pending' | 'running' | 'completed' | 'failed'; +// Task Risk Levels and Consensus Types +export type TaskRiskLevel = 'low' | 'medium' | 'high'; +export type ConsensusStatus = 'pending' | 'approved' | 'rejected' | 'expired'; +export type ReviewerStrategy = 'adversarial' | 'different-model' | 'human'; + +// Consensus Configuration +export interface ConsensusConfig { + enabled: boolean; + requireForRiskLevels: TaskRiskLevel[]; + reviewerStrategy: ReviewerStrategy; + timeout: number; + maxDepth: number; + autoReject: boolean; +} + +// Consensus Checkpoint +export interface ConsensusCheckpoint { + id: string; + taskId: string; + parentTaskId?: string; + proposedSubtasks: ProposedSubtask[]; + riskLevel: TaskRiskLevel; + status: ConsensusStatus; + reviewerStrategy: ReviewerStrategy; + reviewerId?: string; + reviewerType?: 'agent' | 'human'; + decision?: ConsensusDecision; + createdAt: Date; + expiresAt: Date; + decidedAt?: Date; +} + +export interface ProposedSubtask { + id: string; + agentType: string; + input: string; + estimatedRiskLevel: TaskRiskLevel; + parentTaskId: string; +} + +export interface ConsensusDecision { + approved: boolean; + rejectedSubtaskIds?: string[]; + feedback?: string; + reviewedBy: string; + reviewerType: 'agent' | 'human'; +} + // Project types export interface Project { id: string; @@ -289,6 +341,7 @@ export interface AgentStackConfig { slack?: SlackConfig; driftDetection?: DriftDetectionConfig; resourceExhaustion?: ResourceExhaustionConfig; + consensus?: ConsensusConfig; } export interface MemoryConfig { diff --git a/src/utils/config.ts b/src/utils/config.ts index 3e37547..6c52e96 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -119,6 +119,15 @@ const ResourceExhaustionConfigSchema = z.object({ pauseOnIntervention: z.boolean().default(true), }); +const ConsensusConfigSchema = z.object({ + enabled: z.boolean().default(false), + requireForRiskLevels: z.array(z.enum(['low', 'medium', 'high'])).default(['high', 'medium']), + reviewerStrategy: z.enum(['adversarial', 'different-model', 'human']).default('adversarial'), + timeout: z.number().min(30000).max(3600000).default(300000), + maxDepth: z.number().min(1).max(10).default(5), + autoReject: z.boolean().default(false), +}); + const ConfigSchema = z.object({ version: z.string().default('1.0.0'), memory: MemoryConfigSchema.default({}), @@ -131,6 +140,7 @@ const ConfigSchema = z.object({ slack: SlackConfigSchema.default({}), driftDetection: DriftDetectionConfigSchema.default({}), resourceExhaustion: ResourceExhaustionConfigSchema.default({}), + consensus: ConsensusConfigSchema.default({}), }); const CONFIG_FILE_NAME = 'aistack.config.json'; diff --git a/src/web/routes/consensus.ts b/src/web/routes/consensus.ts new file mode 100644 index 0000000..8e80f7b --- /dev/null +++ b/src/web/routes/consensus.ts @@ -0,0 +1,288 @@ +/** + * Consensus checkpoint routes + */ + +import type { AgentStackConfig } from '../../types.js'; +import type { Router } from '../router.js'; +import { sendJson, sendPaginated } from '../router.js'; +import { badRequest, notFound } from '../middleware/error.js'; +import { getMemoryManager } from '../../memory/index.js'; +import { getConsensusService } from '../../tasks/consensus-service.js'; +import { agentEvents } from '../websocket/event-bridge.js'; + +export function registerConsensusRoutes(router: Router, config: AgentStackConfig): void { + const getManager = () => getMemoryManager(config); + const getService = () => { + const manager = getManager(); + return getConsensusService(manager.getStore(), config); + }; + + // GET /api/v1/consensus/config - Get consensus configuration + router.get('/api/v1/consensus/config', (_req, res) => { + const service = getService(); + const cfg = service.getConfig(); + + sendJson(res, { + enabled: cfg.enabled, + requireForRiskLevels: cfg.requireForRiskLevels, + reviewerStrategy: cfg.reviewerStrategy, + timeout: cfg.timeout, + maxDepth: cfg.maxDepth, + autoReject: cfg.autoReject, + }); + }); + + // GET /api/v1/consensus/pending - List pending checkpoints + router.get('/api/v1/consensus/pending', (_req, res, params) => { + const limit = parseInt(params.query.limit || '50', 10); + const offset = parseInt(params.query.offset || '0', 10); + + const service = getService(); + const checkpoints = service.listPendingCheckpoints({ limit, offset }); + + sendPaginated( + res, + checkpoints.map(cp => ({ + id: cp.id, + taskId: cp.taskId, + parentTaskId: cp.parentTaskId, + riskLevel: cp.riskLevel, + status: cp.status, + reviewerStrategy: cp.reviewerStrategy, + proposedSubtaskCount: cp.proposedSubtasks.length, + createdAt: cp.createdAt.toISOString(), + expiresAt: cp.expiresAt.toISOString(), + })), + { limit, offset, total: checkpoints.length } + ); + }); + + // POST /api/v1/consensus/check - Check if consensus is required + router.post('/api/v1/consensus/check', (_req, res, params) => { + const body = params.body as { + riskLevel?: string; + depth?: number; + parentTaskId?: string; + agentType?: string; + input?: string; + } | undefined; + + const service = getService(); + + // Estimate risk level if not provided + let riskLevel = body?.riskLevel as 'low' | 'medium' | 'high' | undefined; + if (!riskLevel && body?.agentType) { + riskLevel = service.estimateRiskLevel(body.agentType, body.input); + } + if (!riskLevel) { + riskLevel = 'low'; + } + + // Calculate depth if not provided + let depth = body?.depth; + if (depth === undefined && body?.parentTaskId) { + depth = service.calculateTaskDepth(body.parentTaskId); + } + if (depth === undefined) { + depth = 0; + } + + const result = service.requiresConsensus(riskLevel, depth, body?.parentTaskId); + + sendJson(res, { + requiresConsensus: result.requiresConsensus, + reason: result.reason, + riskLevel: result.riskLevel || riskLevel, + depth: result.depth ?? depth, + config: { + enabled: service.isEnabled(), + requireForRiskLevels: service.getConfig().requireForRiskLevels, + maxDepth: service.getConfig().maxDepth, + }, + }); + }); + + // POST /api/v1/consensus/expire - Manually expire old checkpoints + router.post('/api/v1/consensus/expire', (_req, res) => { + const service = getService(); + const count = service.expireCheckpoints(); + + sendJson(res, { + success: true, + expiredCount: count, + }); + }); + + // Routes with :id parameter - more specific routes MUST come before less specific ones + + // GET /api/v1/consensus/:id/events - Get checkpoint audit log + router.get('/api/v1/consensus/:id/events', (_req, res, params) => { + const checkpointId = params.path[0]; + if (!checkpointId) { + throw badRequest('Checkpoint ID is required'); + } + + const limit = parseInt(params.query.limit || '100', 10); + + const service = getService(); + const checkpoint = service.getCheckpoint(checkpointId); + + if (!checkpoint) { + throw notFound('Consensus checkpoint'); + } + + const events = service.getCheckpointEvents(checkpointId, limit); + + sendJson(res, { + checkpointId, + events: events.map(e => ({ + id: e.id, + eventType: e.eventType, + actorId: e.actorId, + actorType: e.actorType, + details: e.details, + createdAt: e.createdAt.toISOString(), + })), + }); + }); + + // PUT /api/v1/consensus/:id/approve - Approve a checkpoint + router.put('/api/v1/consensus/:id/approve', (_req, res, params) => { + const checkpointId = params.path[0]; + if (!checkpointId) { + throw badRequest('Checkpoint ID is required'); + } + + const body = params.body as { + reviewedBy?: string; + feedback?: string; + } | undefined; + + if (!body?.reviewedBy) { + throw badRequest('reviewedBy is required'); + } + + const service = getService(); + const result = service.approveCheckpoint( + checkpointId, + body.reviewedBy, + body.feedback + ); + + if (!result.success) { + throw badRequest(result.error || 'Failed to approve checkpoint'); + } + + // Emit WebSocket event + agentEvents.emit('consensus:checkpoint:approved', { + checkpointId, + reviewedBy: body.reviewedBy, + }); + + sendJson(res, { + success: true, + checkpoint: result.checkpoint ? { + id: result.checkpoint.id, + status: result.checkpoint.status, + decidedAt: result.checkpoint.decidedAt?.toISOString(), + } : undefined, + }); + }); + + // PUT /api/v1/consensus/:id/reject - Reject a checkpoint + router.put('/api/v1/consensus/:id/reject', (_req, res, params) => { + const checkpointId = params.path[0]; + if (!checkpointId) { + throw badRequest('Checkpoint ID is required'); + } + + const body = params.body as { + reviewedBy?: string; + feedback?: string; + rejectedSubtaskIds?: string[]; + } | undefined; + + if (!body?.reviewedBy) { + throw badRequest('reviewedBy is required'); + } + + const service = getService(); + const result = service.rejectCheckpoint( + checkpointId, + body.reviewedBy, + body.feedback, + body.rejectedSubtaskIds + ); + + if (!result.success) { + throw badRequest(result.error || 'Failed to reject checkpoint'); + } + + // Emit WebSocket event + agentEvents.emit('consensus:checkpoint:rejected', { + checkpointId, + reviewedBy: body.reviewedBy, + rejectedSubtaskIds: body.rejectedSubtaskIds, + }); + + sendJson(res, { + success: true, + checkpoint: result.checkpoint ? { + id: result.checkpoint.id, + status: result.checkpoint.status, + decidedAt: result.checkpoint.decidedAt?.toISOString(), + } : undefined, + }); + }); + + // POST /api/v1/consensus/:id/start-review - Start agent review + router.post('/api/v1/consensus/:id/start-review', (_req, res, params) => { + const checkpointId = params.path[0]; + if (!checkpointId) { + throw badRequest('Checkpoint ID is required'); + } + + const service = getService(); + const result = service.startAgentReview(checkpointId); + + if (!result.success) { + throw badRequest(result.error || 'Failed to start review'); + } + + sendJson(res, { + success: true, + reviewerConfig: result.reviewerConfig, + }); + }); + + // GET /api/v1/consensus/:id - Get checkpoint details (MUST be after more specific :id routes) + router.get('/api/v1/consensus/:id', (_req, res, params) => { + const checkpointId = params.path[0]; + if (!checkpointId) { + throw badRequest('Checkpoint ID is required'); + } + + const service = getService(); + const checkpoint = service.getCheckpoint(checkpointId); + + if (!checkpoint) { + throw notFound('Consensus checkpoint'); + } + + sendJson(res, { + id: checkpoint.id, + taskId: checkpoint.taskId, + parentTaskId: checkpoint.parentTaskId, + proposedSubtasks: checkpoint.proposedSubtasks, + riskLevel: checkpoint.riskLevel, + status: checkpoint.status, + reviewerStrategy: checkpoint.reviewerStrategy, + reviewerId: checkpoint.reviewerId, + reviewerType: checkpoint.reviewerType, + decision: checkpoint.decision, + createdAt: checkpoint.createdAt.toISOString(), + expiresAt: checkpoint.expiresAt.toISOString(), + decidedAt: checkpoint.decidedAt?.toISOString(), + }); + }); +} diff --git a/src/web/routes/index.ts b/src/web/routes/index.ts index 5dbc2e7..0f2ac65 100644 --- a/src/web/routes/index.ts +++ b/src/web/routes/index.ts @@ -14,3 +14,4 @@ export { registerSpecificationRoutes } from './specifications.js'; export { registerFilesystemRoutes } from './filesystem.js'; export { createAuthRoutes } from './auth.js'; export { registerIdentityRoutes } from './identities.js'; +export { registerConsensusRoutes } from './consensus.js'; diff --git a/src/web/server.ts b/src/web/server.ts index f1d6aaa..bddb836 100644 --- a/src/web/server.ts +++ b/src/web/server.ts @@ -25,6 +25,7 @@ import { registerFilesystemRoutes, createAuthRoutes, registerIdentityRoutes, + registerConsensusRoutes, } from './routes/index.js'; import type { WebConfig } from './types.js'; import { AuthService } from '../auth/service.js'; @@ -96,6 +97,7 @@ export class WebServer { registerProjectRoutes(this.router, this.config); registerSpecificationRoutes(this.router, this.config); registerFilesystemRoutes(this.router, this.config); + registerConsensusRoutes(this.router, this.config); // Root endpoint this.router.get('/api/v1', (_req, res) => { @@ -115,6 +117,7 @@ export class WebServer { '/api/v1/projects', '/api/v1/specs', '/api/v1/filesystem', + '/api/v1/consensus', ], }); }); diff --git a/src/web/websocket/event-bridge.ts b/src/web/websocket/event-bridge.ts index e4c8d4e..8ac9b28 100644 --- a/src/web/websocket/event-bridge.ts +++ b/src/web/websocket/event-bridge.ts @@ -112,6 +112,29 @@ export interface AgentProgressEvent { message?: string; } +// Consensus checkpoint events +export interface ConsensusCheckpointCreatedEvent { + checkpointId: string; + taskId: string; + riskLevel: string; + subtaskCount: number; +} + +export interface ConsensusCheckpointApprovedEvent { + checkpointId: string; + reviewedBy: string; +} + +export interface ConsensusCheckpointRejectedEvent { + checkpointId: string; + reviewedBy: string; + rejectedSubtaskIds?: string[]; +} + +export interface ConsensusCheckpointExpiredEvent { + checkpointId: string; +} + // Event names export type EventName = | 'agent:spawned' @@ -132,7 +155,11 @@ export type EventName = | 'project:task:created' | 'project:task:phase' | 'spec:created' - | 'spec:status'; + | 'spec:status' + | 'consensus:checkpoint:created' + | 'consensus:checkpoint:approved' + | 'consensus:checkpoint:rejected' + | 'consensus:checkpoint:expired'; // Event bridge class for managing WebSocket subscriptions export class EventBridge { @@ -160,6 +187,10 @@ export class EventBridge { 'project:task:phase', 'spec:created', 'spec:status', + 'consensus:checkpoint:created', + 'consensus:checkpoint:approved', + 'consensus:checkpoint:rejected', + 'consensus:checkpoint:expired', ]; for (const event of events) { diff --git a/tests/unit/consensus-service.test.ts b/tests/unit/consensus-service.test.ts new file mode 100644 index 0000000..75e3402 --- /dev/null +++ b/tests/unit/consensus-service.test.ts @@ -0,0 +1,797 @@ +/** + * Consensus Service tests + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { SQLiteStore } from '../../src/memory/sqlite-store.js'; +import { + ConsensusService, + getConsensusService, + resetConsensusService, +} from '../../src/tasks/consensus-service.js'; +import type { AgentStackConfig, TaskRiskLevel } from '../../src/types.js'; + +// Helper to create test config +function createConfig(options: { + consensusEnabled?: boolean; + requireForRiskLevels?: TaskRiskLevel[]; + reviewerStrategy?: 'adversarial' | 'different-model' | 'human'; + timeout?: number; + maxDepth?: number; + autoReject?: boolean; +}): AgentStackConfig { + return { + version: '1.0.0', + memory: { + path: './data/memory.db', + defaultNamespace: 'default', + vectorSearch: { enabled: false }, + }, + providers: { default: 'anthropic' }, + agents: { maxConcurrent: 5, defaultTimeout: 300 }, + github: { enabled: false }, + plugins: { enabled: true, directory: './plugins' }, + mcp: { transport: 'stdio' }, + hooks: { sessionStart: true, sessionEnd: true, preTask: true, postTask: true }, + consensus: { + enabled: options.consensusEnabled ?? false, + requireForRiskLevels: options.requireForRiskLevels ?? ['high', 'medium'], + reviewerStrategy: options.reviewerStrategy ?? 'adversarial', + timeout: options.timeout ?? 300000, + maxDepth: options.maxDepth ?? 5, + autoReject: options.autoReject ?? false, + }, + }; +} + +describe('ConsensusService', () => { + let tmpDir: string; + let store: SQLiteStore; + + beforeEach(() => { + resetConsensusService(); + tmpDir = mkdtempSync(join(tmpdir(), 'consensus-test-')); + }); + + afterEach(() => { + if (store) { + store.close(); + } + rmSync(tmpDir, { recursive: true, force: true }); + }); + + describe('isEnabled', () => { + it('should return false when consensus is disabled', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: false }) + ); + + expect(service.isEnabled()).toBe(false); + }); + + it('should return true when consensus is enabled', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + expect(service.isEnabled()).toBe(true); + }); + }); + + describe('requiresConsensus', () => { + it('should return false when disabled', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: false }) + ); + + const result = service.requiresConsensus('high', 1, 'parent-id'); + + expect(result.requiresConsensus).toBe(false); + }); + + it('should return false for low risk when not in requireForRiskLevels', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + requireForRiskLevels: ['high', 'medium'], + }) + ); + + const result = service.requiresConsensus('low', 1, 'parent-id'); + + expect(result.requiresConsensus).toBe(false); + expect(result.reason).toContain("'low' does not require consensus"); + }); + + it('should return true for high risk with parent', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + requireForRiskLevels: ['high', 'medium'], + }) + ); + + const result = service.requiresConsensus('high', 1, 'parent-id'); + + expect(result.requiresConsensus).toBe(true); + expect(result.riskLevel).toBe('high'); + expect(result.depth).toBe(1); + }); + + it('should return true for medium risk with parent', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + requireForRiskLevels: ['high', 'medium'], + }) + ); + + const result = service.requiresConsensus('medium', 2, 'parent-id'); + + expect(result.requiresConsensus).toBe(true); + }); + + it('should return false for root tasks (no parent)', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + requireForRiskLevels: ['high'], + }) + ); + + const result = service.requiresConsensus('high', 0); + + expect(result.requiresConsensus).toBe(false); + expect(result.reason).toContain('Root tasks do not require consensus'); + }); + + it('should return true when depth exceeds maxDepth', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + requireForRiskLevels: ['high'], + maxDepth: 3, + }) + ); + + const result = service.requiresConsensus('high', 4, 'parent-id'); + + expect(result.requiresConsensus).toBe(true); + expect(result.reason).toContain('exceeds maximum allowed depth'); + }); + }); + + describe('createCheckpoint', () => { + it('should create a checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true, timeout: 60000 }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + parentTaskId: 'parent-1', + proposedSubtasks: [ + { + id: 'subtask-1', + agentType: 'coder', + input: 'Write code', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }, + ], + riskLevel: 'high', + }); + + expect(checkpoint.id).toBeDefined(); + expect(checkpoint.taskId).toBe('task-1'); + expect(checkpoint.parentTaskId).toBe('parent-1'); + expect(checkpoint.riskLevel).toBe('high'); + expect(checkpoint.status).toBe('pending'); + expect(checkpoint.proposedSubtasks).toHaveLength(1); + expect(checkpoint.expiresAt.getTime()).toBeGreaterThan(checkpoint.createdAt.getTime()); + }); + + it('should store checkpoint in database', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-2', + proposedSubtasks: [], + riskLevel: 'medium', + }); + + const retrieved = service.getCheckpoint(checkpoint.id); + + expect(retrieved).not.toBeNull(); + expect(retrieved?.taskId).toBe('task-2'); + expect(retrieved?.riskLevel).toBe('medium'); + }); + }); + + describe('getCheckpoint', () => { + it('should return null for non-existent checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.getCheckpoint('non-existent'); + + expect(result).toBeNull(); + }); + }); + + describe('approveCheckpoint', () => { + it('should approve a pending checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + const result = service.approveCheckpoint(checkpoint.id, 'reviewer-1', 'LGTM'); + + expect(result.success).toBe(true); + expect(result.checkpoint?.status).toBe('approved'); + expect(result.checkpoint?.decidedAt).toBeDefined(); + }); + + it('should fail for non-existent checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.approveCheckpoint('non-existent', 'reviewer-1'); + + expect(result.success).toBe(false); + expect(result.error).toBe('Checkpoint not found'); + }); + + it('should fail for already approved checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + service.approveCheckpoint(checkpoint.id, 'reviewer-1'); + const result = service.approveCheckpoint(checkpoint.id, 'reviewer-2'); + + expect(result.success).toBe(false); + expect(result.error).toBe('Checkpoint is already approved'); + }); + }); + + describe('rejectCheckpoint', () => { + it('should reject a pending checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [ + { + id: 'subtask-1', + agentType: 'coder', + input: 'Risky operation', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }, + ], + riskLevel: 'high', + }); + + const result = service.rejectCheckpoint( + checkpoint.id, + 'reviewer-1', + 'Too risky', + ['subtask-1'] + ); + + expect(result.success).toBe(true); + expect(result.checkpoint?.status).toBe('rejected'); + }); + }); + + describe('listPendingCheckpoints', () => { + it('should return only pending checkpoints', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + // Create multiple checkpoints + const cp1 = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + service.createCheckpoint({ + taskId: 'task-2', + proposedSubtasks: [], + riskLevel: 'medium', + }); + + // Approve one + service.approveCheckpoint(cp1.id, 'reviewer-1'); + + const pending = service.listPendingCheckpoints(); + + expect(pending).toHaveLength(1); + expect(pending[0].taskId).toBe('task-2'); + }); + + it('should respect limit and offset', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + // Create multiple checkpoints + for (let i = 0; i < 5; i++) { + service.createCheckpoint({ + taskId: `task-${i}`, + proposedSubtasks: [], + riskLevel: 'high', + }); + } + + const page1 = service.listPendingCheckpoints({ limit: 2, offset: 0 }); + const page2 = service.listPendingCheckpoints({ limit: 2, offset: 2 }); + + expect(page1).toHaveLength(2); + expect(page2).toHaveLength(2); + }); + }); + + describe('expireCheckpoints', () => { + it('should expire old checkpoints', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + timeout: 1, // 1ms timeout + }) + ); + + // Create checkpoint that will expire immediately + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + // Wait a bit for expiration + const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); + return delay(10).then(() => { + const expired = service.expireCheckpoints(); + + expect(expired).toBe(1); + + const retrieved = service.getCheckpoint(checkpoint.id); + expect(retrieved?.status).toBe('expired'); + }); + }); + + it('should not expire non-pending checkpoints', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + timeout: 1, + }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + // Approve before expiration + service.approveCheckpoint(checkpoint.id, 'reviewer-1'); + + const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); + return delay(10).then(() => { + const expired = service.expireCheckpoints(); + + expect(expired).toBe(0); + + const retrieved = service.getCheckpoint(checkpoint.id); + expect(retrieved?.status).toBe('approved'); + }); + }); + }); + + describe('estimateRiskLevel', () => { + it('should return high for coder agent type', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.estimateRiskLevel('coder'); + + expect(result).toBe('high'); + }); + + it('should return high for devops agent type', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.estimateRiskLevel('devops'); + + expect(result).toBe('high'); + }); + + it('should return medium for architect agent type', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.estimateRiskLevel('architect'); + + expect(result).toBe('medium'); + }); + + it('should return low for researcher agent type', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.estimateRiskLevel('researcher'); + + expect(result).toBe('low'); + }); + + it('should detect high risk from input patterns', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.estimateRiskLevel('researcher', 'Delete all files in production'); + + expect(result).toBe('high'); + }); + + it('should detect medium risk from input patterns', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.estimateRiskLevel('researcher', 'Modify the configuration'); + + expect(result).toBe('medium'); + }); + }); + + describe('calculateTaskDepth', () => { + it('should return 0 for no parent', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const depth = service.calculateTaskDepth(); + + expect(depth).toBe(0); + }); + + it('should return correct depth for task chain', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + // Create task chain + const grandparent = store.createTask('coder', 'Grandparent'); + const parent = store.createTask('coder', 'Parent', undefined, { + parentTaskId: grandparent.id, + }); + const child = store.createTask('coder', 'Child', undefined, { + parentTaskId: parent.id, + }); + + const depth = service.calculateTaskDepth(child.id); + + // If creating a new task with child as parent: + // grandparent (depth 0) -> parent (depth 1) -> child (depth 2) -> new task (depth 3) + // So passing child.id returns 3 (the depth of a new task with child as parent) + expect(depth).toBe(3); + }); + }); + + describe('startAgentReview', () => { + it('should return reviewer config for pending checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [ + { + id: 'subtask-1', + agentType: 'coder', + input: 'Write some code', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }, + ], + riskLevel: 'high', + }); + + const result = service.startAgentReview(checkpoint.id); + + expect(result.success).toBe(true); + expect(result.reviewerConfig).toBeDefined(); + expect(result.reviewerConfig?.agentType).toBe('adversarial'); + expect(result.reviewerConfig?.prompt).toContain('Risk Level'); + expect(result.reviewerConfig?.checkpointId).toBe(checkpoint.id); + }); + + it('should fail for non-existent checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const result = service.startAgentReview('non-existent'); + + expect(result.success).toBe(false); + expect(result.error).toBe('Checkpoint not found'); + }); + + it('should fail for already decided checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + service.approveCheckpoint(checkpoint.id, 'reviewer-1'); + + const result = service.startAgentReview(checkpoint.id); + + expect(result.success).toBe(false); + expect(result.error).toBe('Checkpoint is already approved'); + }); + }); + + describe('getApprovedSubtasks', () => { + it('should return all subtasks for fully approved checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [ + { + id: 'subtask-1', + agentType: 'coder', + input: 'Task 1', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }, + { + id: 'subtask-2', + agentType: 'tester', + input: 'Task 2', + estimatedRiskLevel: 'low', + parentTaskId: 'task-1', + }, + ], + riskLevel: 'high', + }); + + service.approveCheckpoint(checkpoint.id, 'reviewer-1'); + + const approved = service.getApprovedSubtasks(checkpoint.id); + + expect(approved).toHaveLength(2); + }); + + it('should filter out rejected subtasks', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [ + { + id: 'subtask-1', + agentType: 'coder', + input: 'Task 1', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }, + { + id: 'subtask-2', + agentType: 'tester', + input: 'Task 2', + estimatedRiskLevel: 'low', + parentTaskId: 'task-1', + }, + ], + riskLevel: 'high', + }); + + // Reject with one subtask rejected + service.submitDecision(checkpoint.id, { + approved: true, + rejectedSubtaskIds: ['subtask-1'], + reviewedBy: 'reviewer-1', + reviewerType: 'human', + }); + + const approved = service.getApprovedSubtasks(checkpoint.id); + + expect(approved).toHaveLength(1); + expect(approved[0].id).toBe('subtask-2'); + }); + + it('should return empty for rejected checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [ + { + id: 'subtask-1', + agentType: 'coder', + input: 'Task 1', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }, + ], + riskLevel: 'high', + }); + + service.rejectCheckpoint(checkpoint.id, 'reviewer-1', 'Not allowed'); + + const approved = service.getApprovedSubtasks(checkpoint.id); + + expect(approved).toHaveLength(0); + }); + }); + + describe('getCheckpointEvents', () => { + it('should return audit events for a checkpoint', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + service.approveCheckpoint(checkpoint.id, 'reviewer-1', 'LGTM'); + + const events = service.getCheckpointEvents(checkpoint.id); + + expect(events.length).toBeGreaterThanOrEqual(2); // created + approved + expect(events.some(e => e.eventType === 'created')).toBe(true); + expect(events.some(e => e.eventType === 'approved')).toBe(true); + }); + }); + + describe('getConsensusService (singleton)', () => { + it('should return the same instance for same config', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const config = createConfig({ consensusEnabled: true }); + + const service1 = getConsensusService(store, config); + const service2 = getConsensusService(store, config); + + expect(service1).toBe(service2); + }); + + it('should create new instance when config changes', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const config1 = createConfig({ consensusEnabled: true, maxDepth: 5 }); + const config2 = createConfig({ consensusEnabled: true, maxDepth: 10 }); + + const service1 = getConsensusService(store, config1); + const service2 = getConsensusService(store, config2); + + expect(service1).not.toBe(service2); + expect(service1.getConfig().maxDepth).toBe(5); + expect(service2.getConfig().maxDepth).toBe(10); + }); + + it('should create new instance when forceNew is true', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const config = createConfig({ consensusEnabled: true }); + + const service1 = getConsensusService(store, config); + const service2 = getConsensusService(store, config, true); + + expect(service1).not.toBe(service2); + }); + }); +}); diff --git a/tests/unit/web/consensus-routes.test.ts b/tests/unit/web/consensus-routes.test.ts new file mode 100644 index 0000000..5d3b48a --- /dev/null +++ b/tests/unit/web/consensus-routes.test.ts @@ -0,0 +1,408 @@ +/** + * Consensus Routes tests + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import type { IncomingMessage, ServerResponse } from 'node:http'; +import { registerConsensusRoutes } from '../../../src/web/routes/consensus.js'; +import { Router } from '../../../src/web/router.js'; +import type { AgentStackConfig } from '../../../src/types.js'; +import { resetMemoryManager, getMemoryManager } from '../../../src/memory/index.js'; +import { resetConsensusService, getConsensusService } from '../../../src/tasks/consensus-service.js'; + +// Mock config +const mockConfig: AgentStackConfig = { + version: '1.0.0', + memory: { + path: ':memory:', + defaultNamespace: 'default', + vectorSearch: { enabled: false }, + }, + providers: { default: 'anthropic' }, + agents: { maxConcurrent: 5, defaultTimeout: 300 }, + github: { enabled: false }, + plugins: { enabled: false, directory: './plugins' }, + mcp: { transport: 'stdio' }, + hooks: { sessionStart: false, sessionEnd: false, preTask: false, postTask: false }, + consensus: { + enabled: true, + requireForRiskLevels: ['high', 'medium'], + reviewerStrategy: 'adversarial', + timeout: 300000, + maxDepth: 5, + autoReject: false, + }, +}; + +// Mock response +function createMockResponse() { + let body = ''; + const res = { + writeHead: vi.fn(), + end: vi.fn((data: string) => { body = data; }), + headersSent: false, + getBody: () => body ? JSON.parse(body) : null, + } as unknown as ServerResponse & { getBody: () => unknown }; + return res; +} + +// Mock request +function createMockRequest( + method: string, + url: string, + body?: unknown, + headers: Record = {} +): IncomingMessage { + const req = { + method, + url, + headers, + on: vi.fn((event: string, callback: (data?: Buffer) => void) => { + if (event === 'data' && body) { + callback(Buffer.from(JSON.stringify(body))); + } + if (event === 'end') { + callback(); + } + return req; + }), + } as unknown as IncomingMessage; + return req; +} + +describe('Consensus Routes', () => { + let router: Router; + + beforeEach(() => { + resetMemoryManager(); + resetConsensusService(); + router = new Router(); + registerConsensusRoutes(router, mockConfig); + }); + + afterEach(() => { + resetMemoryManager(); + resetConsensusService(); + }); + + describe('GET /api/v1/consensus/config', () => { + it('should return consensus configuration', async () => { + const req = createMockRequest('GET', '/api/v1/consensus/config'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.enabled).toBe(true); + expect(body.data.requireForRiskLevels).toEqual(['high', 'medium']); + expect(body.data.reviewerStrategy).toBe('adversarial'); + expect(body.data.timeout).toBe(300000); + expect(body.data.maxDepth).toBe(5); + }); + }); + + describe('GET /api/v1/consensus/pending', () => { + it('should return empty list when no pending checkpoints', async () => { + const req = createMockRequest('GET', '/api/v1/consensus/pending'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data).toEqual([]); + expect(body.pagination.total).toBe(0); + }); + + it('should return pending checkpoints with pagination', async () => { + const req = createMockRequest('GET', '/api/v1/consensus/pending?limit=10&offset=0'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data).toBeDefined(); + expect(body.pagination).toBeDefined(); + expect(body.pagination.limit).toBe(10); + expect(body.pagination.offset).toBe(0); + }); + }); + + describe('POST /api/v1/consensus/check', () => { + it('should check if consensus is required for high-risk agent', async () => { + const req = createMockRequest('POST', '/api/v1/consensus/check', { + agentType: 'coder', + input: 'Write some code', + parentTaskId: 'some-parent-id', + }); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.requiresConsensus).toBeDefined(); + expect(body.data.riskLevel).toBe('high'); // coder is high risk + }); + + it('should estimate low risk for researcher agent', async () => { + const req = createMockRequest('POST', '/api/v1/consensus/check', { + agentType: 'researcher', + }); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.riskLevel).toBe('low'); // researcher is low risk + }); + + it('should use provided risk level override', async () => { + const req = createMockRequest('POST', '/api/v1/consensus/check', { + agentType: 'researcher', + riskLevel: 'high', + }); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.riskLevel).toBe('high'); + }); + + it('should return consensus config in response', async () => { + const req = createMockRequest('POST', '/api/v1/consensus/check', { + agentType: 'coder', + }); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.config).toBeDefined(); + expect(body.data.config.enabled).toBe(true); + expect(body.data.config.requireForRiskLevels).toEqual(['high', 'medium']); + expect(body.data.config.maxDepth).toBe(5); + }); + }); + + describe('POST /api/v1/consensus/expire', () => { + it('should expire old checkpoints', async () => { + const req = createMockRequest('POST', '/api/v1/consensus/expire'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.success).toBe(true); + expect(body.data.expiredCount).toBe(0); + }); + }); + + describe('GET /api/v1/consensus/:id', () => { + it('should return 404 for non-existent checkpoint', async () => { + const req = createMockRequest('GET', '/api/v1/consensus/non-existent-id'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(false); + expect(body.error).toContain('not found'); + }); + + it('should return checkpoint details when exists', async () => { + // Create a checkpoint first + const manager = getMemoryManager(mockConfig); + const service = getConsensusService(manager.getStore(), mockConfig); + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [{ + id: 'subtask-1', + agentType: 'coder', + input: 'Test', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }], + riskLevel: 'high', + }); + + const req = createMockRequest('GET', `/api/v1/consensus/${checkpoint.id}`); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.id).toBe(checkpoint.id); + expect(body.data.taskId).toBe('task-1'); + expect(body.data.riskLevel).toBe('high'); + expect(body.data.status).toBe('pending'); + }); + }); + + describe('PUT /api/v1/consensus/:id/approve', () => { + it('should approve a checkpoint', async () => { + // Create a checkpoint first + const manager = getMemoryManager(mockConfig); + const service = getConsensusService(manager.getStore(), mockConfig); + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + const req = createMockRequest('PUT', `/api/v1/consensus/${checkpoint.id}/approve`, { + reviewedBy: 'user-1', + feedback: 'Looks good', + }); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.success).toBe(true); + expect(body.data.checkpoint.status).toBe('approved'); + }); + + it('should fail when reviewedBy is missing', async () => { + const manager = getMemoryManager(mockConfig); + const service = getConsensusService(manager.getStore(), mockConfig); + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + const req = createMockRequest('PUT', `/api/v1/consensus/${checkpoint.id}/approve`, {}); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(false); + expect(body.error).toContain('reviewedBy is required'); + }); + }); + + describe('PUT /api/v1/consensus/:id/reject', () => { + it('should reject a checkpoint', async () => { + // Create a checkpoint first + const manager = getMemoryManager(mockConfig); + const service = getConsensusService(manager.getStore(), mockConfig); + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [{ + id: 'subtask-1', + agentType: 'coder', + input: 'Risky', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }], + riskLevel: 'high', + }); + + const req = createMockRequest('PUT', `/api/v1/consensus/${checkpoint.id}/reject`, { + reviewedBy: 'user-1', + feedback: 'Too risky', + rejectedSubtaskIds: ['subtask-1'], + }); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.success).toBe(true); + expect(body.data.checkpoint.status).toBe('rejected'); + }); + }); + + describe('POST /api/v1/consensus/:id/start-review', () => { + it('should start agent review for a checkpoint', async () => { + // Create a checkpoint first + const manager = getMemoryManager(mockConfig); + const service = getConsensusService(manager.getStore(), mockConfig); + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [{ + id: 'subtask-1', + agentType: 'coder', + input: 'Test', + estimatedRiskLevel: 'high', + parentTaskId: 'task-1', + }], + riskLevel: 'high', + }); + + const req = createMockRequest('POST', `/api/v1/consensus/${checkpoint.id}/start-review`); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.success).toBe(true); + expect(body.data.reviewerConfig).toBeDefined(); + expect(body.data.reviewerConfig.agentType).toBe('adversarial'); + expect(body.data.reviewerConfig.checkpointId).toBe(checkpoint.id); + }); + + it('should fail for non-existent checkpoint', async () => { + const req = createMockRequest('POST', '/api/v1/consensus/non-existent/start-review'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(false); + expect(body.error).toContain('Checkpoint not found'); + }); + }); + + describe('GET /api/v1/consensus/:id/events', () => { + it('should return checkpoint events', async () => { + // Create a checkpoint first + const manager = getMemoryManager(mockConfig); + const service = getConsensusService(manager.getStore(), mockConfig); + const checkpoint = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [], + riskLevel: 'high', + }); + + // Approve it to generate events + service.approveCheckpoint(checkpoint.id, 'user-1', 'LGTM'); + + const req = createMockRequest('GET', `/api/v1/consensus/${checkpoint.id}/events`); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.checkpointId).toBe(checkpoint.id); + expect(body.data.events.length).toBeGreaterThanOrEqual(2); // created + approved + expect(body.data.events.some((e: any) => e.eventType === 'created')).toBe(true); + expect(body.data.events.some((e: any) => e.eventType === 'approved')).toBe(true); + }); + + it('should return 404 for non-existent checkpoint', async () => { + const req = createMockRequest('GET', '/api/v1/consensus/non-existent/events'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(false); + expect(body.error).toContain('not found'); + }); + }); +}); From 2afd6b8cfac034214881b9d8e00ee3e313630415 Mon Sep 17 00:00:00 2001 From: Alessio Rocchi Date: Thu, 29 Jan 2026 01:52:39 +0100 Subject: [PATCH 3/3] fix: improve consensus checkpoints based on code review - Fix pagination total count: add countPendingCheckpoints() to return actual total instead of page size - Make risk estimation configurable: add highRiskAgentTypes, mediumRiskAgentTypes, highRiskPatterns, mediumRiskPatterns to config - Use unref() on expiration interval to prevent keeping process alive - Add warning log when creating checkpoint with empty proposedSubtasks - Add tests for new functionality (7 new test cases) Co-Authored-By: Claude Opus 4.5 --- src/memory/sqlite-store.ts | 11 ++ src/tasks/consensus-service.ts | 61 ++++++++--- src/types.ts | 5 + src/utils/config.ts | 10 ++ src/web/routes/consensus.ts | 3 +- tests/unit/consensus-service.test.ts | 133 ++++++++++++++++++++++++ tests/unit/web/consensus-routes.test.ts | 34 ++++++ 7 files changed, 241 insertions(+), 16 deletions(-) diff --git a/src/memory/sqlite-store.ts b/src/memory/sqlite-store.ts index 0c1777f..22b7d98 100644 --- a/src/memory/sqlite-store.ts +++ b/src/memory/sqlite-store.ts @@ -2777,6 +2777,17 @@ export class SQLiteStore { return rows.map(row => this.rowToConsensusCheckpoint(row)); } + /** + * Count total pending consensus checkpoints + */ + countPendingCheckpoints(): number { + const row = this.db + .prepare('SELECT COUNT(*) as count FROM consensus_checkpoints WHERE status = ?') + .get('pending') as { count: number }; + + return row.count; + } + /** * Expire old checkpoints */ diff --git a/src/tasks/consensus-service.ts b/src/tasks/consensus-service.ts index 879a372..24af422 100644 --- a/src/tasks/consensus-service.ts +++ b/src/tasks/consensus-service.ts @@ -27,6 +27,14 @@ const DEFAULT_CONFIG: ConsensusConfig = { timeout: 300000, // 5 minutes maxDepth: 5, autoReject: false, + // Risk estimation defaults + highRiskAgentTypes: ['coder', 'devops', 'security-auditor'], + mediumRiskAgentTypes: ['architect', 'coordinator', 'analyst'], + highRiskPatterns: [ + 'delete', 'remove', 'drop', 'deploy', 'production', + 'credentials', 'secret', 'password', 'token', 'api key', + ], + mediumRiskPatterns: ['modify', 'update', 'change', 'configure', 'install'], }; export interface ConsensusCheckResult { @@ -50,7 +58,17 @@ export class ConsensusService { constructor(store: SQLiteStore, appConfig: AgentStackConfig) { this.store = store; - this.config = { ...DEFAULT_CONFIG, ...appConfig.consensus }; + // Merge config carefully to preserve defaults when values are undefined + const userConfig = appConfig.consensus ?? {}; + this.config = { + ...DEFAULT_CONFIG, + ...userConfig, + // Preserve defaults for optional array fields when not explicitly set + highRiskAgentTypes: userConfig.highRiskAgentTypes ?? DEFAULT_CONFIG.highRiskAgentTypes, + mediumRiskAgentTypes: userConfig.mediumRiskAgentTypes ?? DEFAULT_CONFIG.mediumRiskAgentTypes, + highRiskPatterns: userConfig.highRiskPatterns ?? DEFAULT_CONFIG.highRiskPatterns, + mediumRiskPatterns: userConfig.mediumRiskPatterns ?? DEFAULT_CONFIG.mediumRiskPatterns, + }; log.debug('Consensus service initialized', { enabled: this.config.enabled, @@ -137,6 +155,15 @@ export class ConsensusService { createCheckpoint(options: CreateCheckpointOptions): ConsensusCheckpoint { const checkpointId = randomUUID(); + // Warn if creating a checkpoint with no subtasks + if (options.proposedSubtasks.length === 0) { + log.warn('Creating consensus checkpoint with no proposed subtasks', { + checkpointId, + taskId: options.taskId, + riskLevel: options.riskLevel, + }); + } + log.info('Creating consensus checkpoint', { checkpointId, taskId: options.taskId, @@ -350,6 +377,13 @@ Respond with your decision in this JSON format: return this.store.listPendingCheckpoints(options); } + /** + * Count total pending checkpoints + */ + countPendingCheckpoints(): number { + return this.store.countPendingCheckpoints(); + } + /** * Get checkpoint events (audit log) */ @@ -394,38 +428,33 @@ Respond with your decision in this JSON format: * Estimate risk level based on agent type and input */ estimateRiskLevel(agentType: string, input?: string): TaskRiskLevel { - // High-risk agent types - const highRiskAgents = ['coder', 'devops', 'security-auditor']; + // High-risk agent types (configurable) + const highRiskAgents = this.config.highRiskAgentTypes ?? []; if (highRiskAgents.includes(agentType)) { return 'high'; } - // Medium-risk agent types - const mediumRiskAgents = ['architect', 'coordinator', 'analyst']; + // Medium-risk agent types (configurable) + const mediumRiskAgents = this.config.mediumRiskAgentTypes ?? []; if (mediumRiskAgents.includes(agentType)) { return 'medium'; } - // Check input for high-risk patterns + // Check input for high-risk patterns (configurable) if (input) { const loweredInput = input.toLowerCase(); - const highRiskPatterns = [ - 'delete', 'remove', 'drop', 'deploy', 'production', - 'credentials', 'secret', 'password', 'token', 'api key', - ]; + const highRiskPatterns = this.config.highRiskPatterns ?? []; for (const pattern of highRiskPatterns) { - if (loweredInput.includes(pattern)) { + if (loweredInput.includes(pattern.toLowerCase())) { return 'high'; } } - const mediumRiskPatterns = [ - 'modify', 'update', 'change', 'configure', 'install', - ]; + const mediumRiskPatterns = this.config.mediumRiskPatterns ?? []; for (const pattern of mediumRiskPatterns) { - if (loweredInput.includes(pattern)) { + if (loweredInput.includes(pattern.toLowerCase())) { return 'medium'; } } @@ -457,6 +486,8 @@ Respond with your decision in this JSON format: this.expirationInterval = setInterval(() => { this.expireCheckpoints(); }, 60000); + // Don't keep the process alive just for expiration checks + this.expirationInterval.unref(); } /** diff --git a/src/types.ts b/src/types.ts index 073c622..510cdf8 100644 --- a/src/types.ts +++ b/src/types.ts @@ -147,6 +147,11 @@ export interface ConsensusConfig { timeout: number; maxDepth: number; autoReject: boolean; + // Risk estimation configuration + highRiskAgentTypes?: string[]; + mediumRiskAgentTypes?: string[]; + highRiskPatterns?: string[]; + mediumRiskPatterns?: string[]; } // Consensus Checkpoint diff --git a/src/utils/config.ts b/src/utils/config.ts index 6c52e96..fc49499 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -126,6 +126,16 @@ const ConsensusConfigSchema = z.object({ timeout: z.number().min(30000).max(3600000).default(300000), maxDepth: z.number().min(1).max(10).default(5), autoReject: z.boolean().default(false), + // Risk estimation configuration + highRiskAgentTypes: z.array(z.string()).default(['coder', 'devops', 'security-auditor']), + mediumRiskAgentTypes: z.array(z.string()).default(['architect', 'coordinator', 'analyst']), + highRiskPatterns: z.array(z.string()).default([ + 'delete', 'remove', 'drop', 'deploy', 'production', + 'credentials', 'secret', 'password', 'token', 'api key', + ]), + mediumRiskPatterns: z.array(z.string()).default([ + 'modify', 'update', 'change', 'configure', 'install', + ]), }); const ConfigSchema = z.object({ diff --git a/src/web/routes/consensus.ts b/src/web/routes/consensus.ts index 8e80f7b..97f2e15 100644 --- a/src/web/routes/consensus.ts +++ b/src/web/routes/consensus.ts @@ -39,6 +39,7 @@ export function registerConsensusRoutes(router: Router, config: AgentStackConfig const service = getService(); const checkpoints = service.listPendingCheckpoints({ limit, offset }); + const total = service.countPendingCheckpoints(); sendPaginated( res, @@ -53,7 +54,7 @@ export function registerConsensusRoutes(router: Router, config: AgentStackConfig createdAt: cp.createdAt.toISOString(), expiresAt: cp.expiresAt.toISOString(), })), - { limit, offset, total: checkpoints.length } + { limit, offset, total } ); }); diff --git a/tests/unit/consensus-service.test.ts b/tests/unit/consensus-service.test.ts index 75e3402..78fe3f4 100644 --- a/tests/unit/consensus-service.test.ts +++ b/tests/unit/consensus-service.test.ts @@ -22,6 +22,10 @@ function createConfig(options: { timeout?: number; maxDepth?: number; autoReject?: boolean; + highRiskAgentTypes?: string[]; + mediumRiskAgentTypes?: string[]; + highRiskPatterns?: string[]; + mediumRiskPatterns?: string[]; }): AgentStackConfig { return { version: '1.0.0', @@ -43,6 +47,10 @@ function createConfig(options: { timeout: options.timeout ?? 300000, maxDepth: options.maxDepth ?? 5, autoReject: options.autoReject ?? false, + highRiskAgentTypes: options.highRiskAgentTypes, + mediumRiskAgentTypes: options.mediumRiskAgentTypes, + highRiskPatterns: options.highRiskPatterns, + mediumRiskPatterns: options.mediumRiskPatterns, }, }; } @@ -523,6 +531,131 @@ describe('ConsensusService', () => { expect(result).toBe('medium'); }); + + it('should use custom high risk agent types when configured', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + highRiskAgentTypes: ['custom-risky-agent'], + mediumRiskAgentTypes: [], + }) + ); + + expect(service.estimateRiskLevel('custom-risky-agent')).toBe('high'); + expect(service.estimateRiskLevel('coder')).toBe('low'); // Not in custom list + }); + + it('should use custom medium risk agent types when configured', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + highRiskAgentTypes: [], + mediumRiskAgentTypes: ['custom-medium-agent'], + }) + ); + + expect(service.estimateRiskLevel('custom-medium-agent')).toBe('medium'); + expect(service.estimateRiskLevel('architect')).toBe('low'); // Not in custom list + }); + + it('should use custom high risk patterns when configured', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + highRiskAgentTypes: [], + highRiskPatterns: ['dangerous-keyword'], + mediumRiskPatterns: [], + }) + ); + + expect(service.estimateRiskLevel('researcher', 'This is a dangerous-keyword test')).toBe('high'); + expect(service.estimateRiskLevel('researcher', 'Delete everything')).toBe('low'); // Not in custom list + }); + + it('should use custom medium risk patterns when configured', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ + consensusEnabled: true, + highRiskAgentTypes: [], + highRiskPatterns: [], + mediumRiskPatterns: ['slightly-risky'], + }) + ); + + expect(service.estimateRiskLevel('researcher', 'This is slightly-risky')).toBe('medium'); + expect(service.estimateRiskLevel('researcher', 'Modify something')).toBe('low'); // Not in custom list + }); + }); + + describe('countPendingCheckpoints', () => { + it('should return 0 when no checkpoints exist', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + expect(service.countPendingCheckpoints()).toBe(0); + }); + + it('should return correct count of pending checkpoints', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + // Create 3 checkpoints + service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [{ id: 's1', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-1' }], + riskLevel: 'high', + }); + service.createCheckpoint({ + taskId: 'task-2', + proposedSubtasks: [{ id: 's2', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-2' }], + riskLevel: 'high', + }); + service.createCheckpoint({ + taskId: 'task-3', + proposedSubtasks: [{ id: 's3', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-3' }], + riskLevel: 'high', + }); + + expect(service.countPendingCheckpoints()).toBe(3); + }); + + it('should not count approved checkpoints', () => { + store = new SQLiteStore(join(tmpDir, 'test.db')); + const service = new ConsensusService( + store, + createConfig({ consensusEnabled: true }) + ); + + const checkpoint1 = service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [{ id: 's1', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-1' }], + riskLevel: 'high', + }); + service.createCheckpoint({ + taskId: 'task-2', + proposedSubtasks: [{ id: 's2', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-2' }], + riskLevel: 'high', + }); + + // Approve first checkpoint + service.approveCheckpoint(checkpoint1.id, 'user-1'); + + expect(service.countPendingCheckpoints()).toBe(1); + }); }); describe('calculateTaskDepth', () => { diff --git a/tests/unit/web/consensus-routes.test.ts b/tests/unit/web/consensus-routes.test.ts index 5d3b48a..8517f43 100644 --- a/tests/unit/web/consensus-routes.test.ts +++ b/tests/unit/web/consensus-routes.test.ts @@ -128,6 +128,40 @@ describe('Consensus Routes', () => { expect(body.pagination.limit).toBe(10); expect(body.pagination.offset).toBe(0); }); + + it('should return accurate total count independent of limit', async () => { + // Create 3 checkpoints + const manager = getMemoryManager(mockConfig); + const service = getConsensusService(manager.getStore(), mockConfig); + + service.createCheckpoint({ + taskId: 'task-1', + proposedSubtasks: [{ id: 's1', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-1' }], + riskLevel: 'high', + }); + service.createCheckpoint({ + taskId: 'task-2', + proposedSubtasks: [{ id: 's2', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-2' }], + riskLevel: 'high', + }); + service.createCheckpoint({ + taskId: 'task-3', + proposedSubtasks: [{ id: 's3', agentType: 'coder', input: 'Test', estimatedRiskLevel: 'high', parentTaskId: 'task-3' }], + riskLevel: 'high', + }); + + // Request with limit=1 should still show total=3 + const req = createMockRequest('GET', '/api/v1/consensus/pending?limit=1&offset=0'); + const res = createMockResponse(); + + await router.handle(req, res); + + const body = res.getBody() as any; + expect(body.success).toBe(true); + expect(body.data.length).toBe(1); // Only 1 returned due to limit + expect(body.pagination.total).toBe(3); // But total is accurate + expect(body.pagination.limit).toBe(1); + }); }); describe('POST /api/v1/consensus/check', () => {