diff --git a/.github/workflows/monitoring.yml b/.github/workflows/monitoring.yml new file mode 100644 index 0000000..14140f4 --- /dev/null +++ b/.github/workflows/monitoring.yml @@ -0,0 +1,39 @@ +name: Public Monitoring + +on: + schedule: + # Run every 5 minutes + - cron: '*/5 * * * *' + workflow_dispatch: + # Allow manual triggers + +permissions: + contents: write + +jobs: + monitor: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Run monitoring check + run: node monitoring.mjs --once + + - name: Commit and push monitoring data + run: | + git config user.name "Monitoring Bot" + git config user.email "bot@deep-assistant.com" + git add monitoring-data/ + if git diff --staged --quiet; then + echo "No changes to commit" + else + git commit -m "Update monitoring data [skip ci]" + git push + fi diff --git a/.gitignore b/.gitignore index 1170717..7409b8d 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,6 @@ dist .yarn/build-state.yml .yarn/install-state.gz .pnp.* + +# Monitoring test data +test-monitoring-data/ diff --git a/MONITORING.md b/MONITORING.md new file mode 100644 index 0000000..4adf651 --- /dev/null +++ b/MONITORING.md @@ -0,0 +1,229 @@ +# Public Monitoring System + +This repository includes a public monitoring system for tracking the health and latency of Deep Assistant services. + +## Overview + +The monitoring system tracks: +- **API Latency**: Minimum response times for the smallest possible requests +- **Service Availability**: Uptime percentage and operational status +- **Historical Data**: Trends over time (up to 24 hours of history) + +## Components + +### 1. Monitoring Service (`monitoring.mjs`) + +A Node.js script that periodically checks service endpoints and measures latency. + +**Features:** +- HTTP endpoint health checks +- Ping-style connectivity tests +- Configurable check intervals +- JSON data output for easy integration +- Historical data tracking (up to 1440 records = 24 hours at 1-minute intervals) + +**Usage:** + +```bash +# Run continuous monitoring (checks every 60 seconds) +node monitoring.mjs + +# Run a single check and exit +node monitoring.mjs --once + +# Show help +node monitoring.mjs --help +``` + +### 2. Status Dashboard (`status.html`) + +A static HTML page that displays real-time monitoring data in a user-friendly format. + +**Features:** +- Real-time status indicators +- Latency metrics (current, average, minimum) +- Success rate percentages +- Auto-refresh every 30 seconds +- Responsive design for mobile and desktop + +**Usage:** + +Simply open `status.html` in a web browser. The page will automatically load monitoring data from `monitoring-data/status.json`. + +For production deployment, serve this HTML file along with the `monitoring-data` directory using any static file server. + +### 3. Test Suite (`monitoring.test.mjs`) + +Automated tests to verify monitoring functionality. + +**Usage:** + +```bash +node monitoring.test.mjs +``` + +## Configuration + +Edit the `MONITOR_CONFIG` object in `monitoring.mjs` to customize: + +```javascript +const MONITOR_CONFIG = { + // Services to monitor + services: [ + { + name: 'API Gateway', + url: 'https://api.deep-assistant.com/health', + type: 'http', + method: 'GET', + timeout: 10000, + }, + // Add more services... + ], + + // Check interval (milliseconds) + checkInterval: 60000, // 1 minute + + // Historical records to keep + historyLimit: 1440, // 24 hours at 1-minute intervals + + // Output directory + outputDir: './monitoring-data', +}; +``` + +## Data Format + +### Status Data (`monitoring-data/status.json`) + +```json +{ + "lastUpdate": "2025-10-30T04:42:00.000Z", + "services": [ + { + "name": "API Gateway", + "status": "operational", + "latency": 45, + "avgLatency": 48, + "minLatency": 42, + "successRate": 100, + "lastCheck": "2025-10-30T04:42:00.000Z" + } + ] +} +``` + +### Historical Data (`monitoring-data/history.json`) + +```json +{ + "API Gateway": [ + { + "timestamp": "2025-10-30T04:42:00.000Z", + "success": true, + "latency": 45, + "status": 200 + } + ] +} +``` + +## Deployment + +### Option 1: GitHub Pages + +1. Enable GitHub Pages for this repository +2. Configure monitoring service to run on a server (e.g., GitHub Actions scheduled workflow) +3. Commit monitoring data to the repository (or use GitHub Actions artifacts) +4. Access the status page at: `https://deep-assistant.github.io/master-plan/status.html` + +### Option 2: Standalone Server + +1. Deploy `monitoring.mjs` to a server or cloud function +2. Run it in continuous mode or as a scheduled task +3. Serve `status.html` and `monitoring-data/` via a web server (nginx, Apache, etc.) + +### Option 3: GitHub Actions (Recommended) + +Create `.github/workflows/monitoring.yml`: + +```yaml +name: Public Monitoring + +on: + schedule: + - cron: '*/5 * * * *' # Every 5 minutes + workflow_dispatch: # Allow manual triggers + +jobs: + monitor: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Run monitoring check + run: node monitoring.mjs --once + + - name: Commit monitoring data + run: | + git config user.name "Monitoring Bot" + git config user.email "bot@deep-assistant.com" + git add monitoring-data/ + git commit -m "Update monitoring data" || exit 0 + git push +``` + +## Monitored Services + +Currently monitoring: +- **API Gateway**: OpenAI-compatible API gateway with multi-provider failover +- **Telegram Bot**: Deep Assistant Telegram bot +- **Web Capture**: Web page capture microservice + +## Metrics Explained + +- **Current Latency**: Time taken for the most recent check (in milliseconds) +- **Average Latency**: Average response time over the last 10 checks +- **Min Latency**: Fastest response time over the last 10 checks +- **Success Rate**: Percentage of successful checks out of the last 10 attempts +- **Status**: + - `operational`: Service is responding normally + - `down`: Service is not responding or returning errors + +## Troubleshooting + +### Monitoring service not starting + +- Ensure Node.js v18+ is installed +- Check that all service URLs are accessible +- Review timeout settings if services are slow to respond + +### Status page not loading data + +- Verify `monitoring-data/status.json` exists and is valid JSON +- Check browser console for errors +- Ensure the page is served via HTTP/HTTPS (not `file://`) + +### High latency values + +- Check network connectivity +- Verify service endpoints are correct +- Consider adjusting timeout values +- Review service logs for performance issues + +## Contributing + +To add new services to monitor: + +1. Edit `MONITOR_CONFIG.services` in `monitoring.mjs` +2. Add service details (name, URL, type, method, timeout) +3. Test with `node monitoring.mjs --once` +4. Update this documentation + +## License + +Same as the main repository. diff --git a/README.md b/README.md index f2900fe..3d129f5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ The repository to host: * [issues](https://github.com/deep-assistant/master-plan/issues) that related to entire organization; -* [community discussions](https://github.com/deep-assistant/master-plan/discussions). +* [community discussions](https://github.com/deep-assistant/master-plan/discussions); +* [public monitoring](status.html) of Deep Assistant services. ## Architecture Documentation @@ -13,6 +14,17 @@ Detailed architecture documentation for each project: - **[GPTutor](https://github.com/deep-assistant/GPTutor/blob/main/ARCHITECTURE.md)** - Multi-platform educational AI (VK/Telegram mini apps) - **[web-capture](https://github.com/deep-assistant/web-capture/blob/main/ARCHITECTURE.md)** - Web page capture microservice (HTML/Markdown/PNG) +## Public Monitoring + +Real-time monitoring of Deep Assistant services is available at [status.html](status.html). + +The monitoring system tracks: +- **API Latency**: Minimum response times for minimal requests +- **Service Availability**: Uptime percentage and operational status +- **Historical Data**: Performance trends over time + +For more information, see [MONITORING.md](MONITORING.md). + # End Goal / Mission * Personal AI assistant that is available at any your device and can be hosted on your hardware or on in the cloud with easy migrations and synchronization between them. diff --git a/monitoring.mjs b/monitoring.mjs new file mode 100755 index 0000000..45ff916 --- /dev/null +++ b/monitoring.mjs @@ -0,0 +1,327 @@ +#!/usr/bin/env node + +/** + * Public Monitoring Service for Deep Assistant APIs + * + * This service monitors API latency by sending minimal requests to various endpoints + * and tracks response times for public visibility. + */ + +import { performance } from 'perf_hooks'; +import { writeFile, readFile, mkdir } from 'fs/promises'; +import { join } from 'path'; + +const MONITOR_CONFIG = { + // Services to monitor + services: [ + { + name: 'API Gateway', + url: 'https://api.deep-assistant.com/health', + type: 'http', + method: 'GET', + timeout: 10000, + }, + { + name: 'Telegram Bot', + url: 'https://t.me/deep_assistant_bot', + type: 'ping', + timeout: 10000, + }, + { + name: 'Web Capture', + url: 'https://web-capture.deep-assistant.com/health', + type: 'http', + method: 'GET', + timeout: 10000, + }, + ], + + // How often to check (in milliseconds) + checkInterval: 60000, // 1 minute + + // How many historical records to keep + historyLimit: 1440, // 24 hours at 1-minute intervals + + // Output directory for status data + outputDir: './monitoring-data', +}; + +/** + * Measures HTTP request latency + */ +async function measureHttpLatency(service) { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), service.timeout); + + try { + const startTime = performance.now(); + + const response = await fetch(service.url, { + method: service.method || 'GET', + signal: controller.signal, + headers: { + 'User-Agent': 'Deep-Assistant-Monitor/1.0', + }, + }); + + const endTime = performance.now(); + clearTimeout(timeoutId); + + return { + success: response.ok, + latency: Math.round(endTime - startTime), + status: response.status, + timestamp: new Date().toISOString(), + }; + } catch (error) { + clearTimeout(timeoutId); + return { + success: false, + latency: null, + error: error.message, + timestamp: new Date().toISOString(), + }; + } +} + +/** + * Measures ping latency (for services without HTTP endpoints) + */ +async function measurePingLatency(service) { + // For services that don't have a dedicated health endpoint, + // we attempt a basic connection test + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), service.timeout); + + try { + const startTime = performance.now(); + + // Try to fetch the URL with HEAD method for minimal overhead + const response = await fetch(service.url, { + method: 'HEAD', + signal: controller.signal, + headers: { + 'User-Agent': 'Deep-Assistant-Monitor/1.0', + }, + }); + + const endTime = performance.now(); + clearTimeout(timeoutId); + + return { + success: true, // Connection successful + latency: Math.round(endTime - startTime), + status: response.status, + timestamp: new Date().toISOString(), + }; + } catch (error) { + clearTimeout(timeoutId); + return { + success: false, + latency: null, + error: error.message, + timestamp: new Date().toISOString(), + }; + } +} + +/** + * Check a single service + */ +async function checkService(service) { + console.log(`Checking ${service.name}...`); + + let result; + if (service.type === 'http') { + result = await measureHttpLatency(service); + } else if (service.type === 'ping') { + result = await measurePingLatency(service); + } else { + throw new Error(`Unknown service type: ${service.type}`); + } + + return { + service: service.name, + ...result, + }; +} + +/** + * Load historical monitoring data + */ +async function loadHistory() { + const historyPath = join(MONITOR_CONFIG.outputDir, 'history.json'); + try { + const data = await readFile(historyPath, 'utf-8'); + return JSON.parse(data); + } catch (error) { + // File doesn't exist or is invalid, start fresh + return {}; + } +} + +/** + * Save monitoring results + */ +async function saveResults(results, history) { + await mkdir(MONITOR_CONFIG.outputDir, { recursive: true }); + + // Update history for each service + for (const result of results) { + if (!history[result.service]) { + history[result.service] = []; + } + + history[result.service].push({ + timestamp: result.timestamp, + success: result.success, + latency: result.latency, + status: result.status, + error: result.error, + }); + + // Keep only the last N records + if (history[result.service].length > MONITOR_CONFIG.historyLimit) { + history[result.service] = history[result.service].slice(-MONITOR_CONFIG.historyLimit); + } + } + + // Save history + await writeFile( + join(MONITOR_CONFIG.outputDir, 'history.json'), + JSON.stringify(history, null, 2) + ); + + // Calculate and save current status + const status = { + lastUpdate: new Date().toISOString(), + services: results.map(result => { + const serviceHistory = history[result.service] || []; + const recentChecks = serviceHistory.slice(-10); // Last 10 checks + const successRate = recentChecks.length > 0 + ? (recentChecks.filter(c => c.success).length / recentChecks.length) * 100 + : 0; + + const recentLatencies = recentChecks + .filter(c => c.success && c.latency !== null) + .map(c => c.latency); + + const avgLatency = recentLatencies.length > 0 + ? Math.round(recentLatencies.reduce((a, b) => a + b, 0) / recentLatencies.length) + : null; + + const minLatency = recentLatencies.length > 0 + ? Math.min(...recentLatencies) + : null; + + return { + name: result.service, + status: result.success ? 'operational' : 'down', + latency: result.latency, + avgLatency, + minLatency, + successRate: Math.round(successRate), + lastCheck: result.timestamp, + error: result.error, + }; + }), + }; + + await writeFile( + join(MONITOR_CONFIG.outputDir, 'status.json'), + JSON.stringify(status, null, 2) + ); + + return status; +} + +/** + * Run a single monitoring check for all services + */ +async function runMonitoringCheck() { + console.log(`\n=== Monitoring Check: ${new Date().toISOString()} ===`); + + const results = await Promise.all( + MONITOR_CONFIG.services.map(service => checkService(service)) + ); + + const history = await loadHistory(); + const status = await saveResults(results, history); + + // Display results + console.log('\nCurrent Status:'); + for (const service of status.services) { + const statusIcon = service.status === 'operational' ? '✅' : '❌'; + const latencyInfo = service.latency !== null + ? `${service.latency}ms (avg: ${service.avgLatency}ms, min: ${service.minLatency}ms)` + : 'N/A'; + console.log(`${statusIcon} ${service.name}: ${service.status} - ${latencyInfo} (${service.successRate}% uptime)`); + if (service.error) { + console.log(` Error: ${service.error}`); + } + } + + return status; +} + +/** + * Run monitoring in continuous mode + */ +async function runContinuousMonitoring() { + console.log('Starting Deep Assistant Public Monitoring Service...'); + console.log(`Checking every ${MONITOR_CONFIG.checkInterval / 1000} seconds`); + console.log(`Monitoring ${MONITOR_CONFIG.services.length} services`); + + // Run initial check + await runMonitoringCheck(); + + // Schedule periodic checks + setInterval(async () => { + try { + await runMonitoringCheck(); + } catch (error) { + console.error('Error during monitoring check:', error); + } + }, MONITOR_CONFIG.checkInterval); + + console.log('\nMonitoring service is running. Press Ctrl+C to stop.'); +} + +/** + * Main entry point + */ +async function main() { + const args = process.argv.slice(2); + + if (args.includes('--once')) { + // Run a single check and exit + await runMonitoringCheck(); + console.log('\nSingle check completed.'); + } else if (args.includes('--help')) { + console.log(` +Deep Assistant Public Monitoring Service + +Usage: + node monitoring.mjs [options] + +Options: + --once Run a single monitoring check and exit + --help Show this help message + +Without options, runs in continuous monitoring mode. + `); + } else { + // Run in continuous mode + await runContinuousMonitoring(); + } +} + +// Run if executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + main().catch(error => { + console.error('Fatal error:', error); + process.exit(1); + }); +} + +export { runMonitoringCheck, MONITOR_CONFIG }; diff --git a/monitoring.test.mjs b/monitoring.test.mjs new file mode 100755 index 0000000..563335f --- /dev/null +++ b/monitoring.test.mjs @@ -0,0 +1,167 @@ +#!/usr/bin/env node + +/** + * Tests for the monitoring service + */ + +import { strict as assert } from 'assert'; +import { runMonitoringCheck, MONITOR_CONFIG } from './monitoring.mjs'; +import { readFile, rm } from 'fs/promises'; +import { join } from 'path'; + +const TEST_OUTPUT_DIR = './test-monitoring-data'; + +// Override output directory for tests +MONITOR_CONFIG.outputDir = TEST_OUTPUT_DIR; + +// Use a simple test service that should respond quickly +MONITOR_CONFIG.services = [ + { + name: 'Test Service', + url: 'https://httpbin.org/status/200', + type: 'http', + method: 'GET', + timeout: 5000, + }, +]; + +/** + * Clean up test data + */ +async function cleanup() { + try { + await rm(TEST_OUTPUT_DIR, { recursive: true, force: true }); + } catch (error) { + // Ignore errors if directory doesn't exist + } +} + +/** + * Test: Monitoring check runs successfully + */ +async function testMonitoringCheckRuns() { + console.log('Test: Monitoring check runs successfully'); + + await cleanup(); + + const status = await runMonitoringCheck(); + + assert.ok(status, 'Status should be returned'); + assert.ok(status.lastUpdate, 'Status should have lastUpdate'); + assert.ok(Array.isArray(status.services), 'Status should have services array'); + assert.strictEqual(status.services.length, 1, 'Should have one service'); + + console.log('✅ Test passed'); +} + +/** + * Test: Status file is created + */ +async function testStatusFileCreated() { + console.log('Test: Status file is created'); + + await cleanup(); + + await runMonitoringCheck(); + + const statusPath = join(TEST_OUTPUT_DIR, 'status.json'); + const statusContent = await readFile(statusPath, 'utf-8'); + const status = JSON.parse(statusContent); + + assert.ok(status.lastUpdate, 'Status file should contain lastUpdate'); + assert.ok(status.services, 'Status file should contain services'); + + console.log('✅ Test passed'); +} + +/** + * Test: History file is created + */ +async function testHistoryFileCreated() { + console.log('Test: History file is created'); + + await cleanup(); + + await runMonitoringCheck(); + + const historyPath = join(TEST_OUTPUT_DIR, 'history.json'); + const historyContent = await readFile(historyPath, 'utf-8'); + const history = JSON.parse(historyContent); + + assert.ok(history['Test Service'], 'History should contain Test Service'); + assert.ok(Array.isArray(history['Test Service']), 'Service history should be an array'); + assert.ok(history['Test Service'].length > 0, 'Service history should have at least one entry'); + + console.log('✅ Test passed'); +} + +/** + * Test: Service metrics are tracked + */ +async function testServiceMetricsTracked() { + console.log('Test: Service metrics are tracked'); + + await cleanup(); + + const status = await runMonitoringCheck(); + const service = status.services[0]; + + assert.ok(service.name, 'Service should have a name'); + assert.ok(service.status, 'Service should have a status'); + assert.ok(typeof service.latency === 'number' || service.latency === null, 'Service should have latency'); + assert.ok(typeof service.successRate === 'number', 'Service should have success rate'); + assert.ok(service.lastCheck, 'Service should have lastCheck timestamp'); + + console.log('✅ Test passed'); +} + +/** + * Test: Multiple checks accumulate history + */ +async function testMultipleChecksAccumulateHistory() { + console.log('Test: Multiple checks accumulate history'); + + await cleanup(); + + // Run three checks + await runMonitoringCheck(); + await runMonitoringCheck(); + await runMonitoringCheck(); + + const historyPath = join(TEST_OUTPUT_DIR, 'history.json'); + const historyContent = await readFile(historyPath, 'utf-8'); + const history = JSON.parse(historyContent); + + assert.strictEqual(history['Test Service'].length, 3, 'Should have three historical records'); + + console.log('✅ Test passed'); +} + +/** + * Run all tests + */ +async function runTests() { + console.log('=== Running Monitoring Service Tests ===\n'); + + try { + await testMonitoringCheckRuns(); + await testStatusFileCreated(); + await testHistoryFileCreated(); + await testServiceMetricsTracked(); + await testMultipleChecksAccumulateHistory(); + + console.log('\n=== All tests passed! ==='); + await cleanup(); + process.exit(0); + } catch (error) { + console.error('\n❌ Test failed:', error.message); + console.error(error.stack); + await cleanup(); + process.exit(1); + } +} + +// Run tests if executed directly +if (import.meta.url === `file://${process.argv[1]}`) { + runTests(); +} diff --git a/status.html b/status.html new file mode 100644 index 0000000..3cbaa5b --- /dev/null +++ b/status.html @@ -0,0 +1,293 @@ + + +
+ + +Public Status Dashboard
+