diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0a301ce --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,31 @@ +name: CI + +on: + push: + branches: [ main, central-dev, silicon-dev, tokyo-dev ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + cache: 'npm' + + - name: Install dependencies + run: npm ci + + - name: Typecheck + run: npm run typecheck + + - name: Build + run: npm run build + + - name: Run tests + run: npm run test diff --git a/fsc/fsc-gateway-daemon.ts b/fsc/fsc-gateway-daemon.ts new file mode 100644 index 0000000..09e3a5f --- /dev/null +++ b/fsc/fsc-gateway-daemon.ts @@ -0,0 +1,254 @@ +#!/usr/bin/env bun +/** + * FSC Gateway Daemon(中央调度器) + * + * 功能: + * - 接收任务(来自 API/CLI) + * - 分发任务到 Redis 队列 + * - 收集结果 + * - Session 管理 + * - CLI 接口:submit/query/status + */ + +import { createClient } from 'redis'; +import winston from 'winston'; +import { randomUUID } from 'crypto'; + +// ============ 配置 ============ +const REDIS_HOST = process.env.REDIS_HOST || '127.0.0.1'; +const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379'); +const TASK_QUEUE = 'fsc:task_queue'; +const RESULT_QUEUE = 'fsc:result_queue'; +const FAILED_QUEUE = 'fsc:failed_tasks'; +const TASK_STORE_PREFIX = 'fsc:task:'; +const RESULT_STORE_PREFIX = 'fsc:result:'; + +// ============ Logger ============ +const logger = winston.createLogger({ + level: 'info', + format: winston.format.combine( + winston.format.timestamp(), + winston.format.json() + ), + transports: [ + new winston.transports.Console({ + format: winston.format.combine( + winston.format.colorize(), + winston.format.simple() + ) + }), + new winston.transports.File({ filename: 'fsc-gateway.log' }) + ] +}); + +// ============ Redis Client ============ +const redis = createClient({ + socket: { + host: REDIS_HOST, + port: REDIS_PORT, + reconnectStrategy: (retries) => { + if (retries > 10) { + logger.error('Redis reconnect failed after 10 attempts'); + return new Error('Max reconnect attempts reached'); + } + return Math.min(retries * 100, 3000); + } + } +}); + +redis.on('error', (err) => logger.error('Redis error:', err)); +redis.on('connect', () => logger.info('Redis connected')); +redis.on('reconnecting', () => logger.warn('Redis reconnecting...')); + +// ============ 任务类型 ============ +interface Task { + id: string; + image: string; + commands: string[]; + timeoutSeconds?: number; +} + +interface TaskResult { + taskId: string; + status: 'success' | 'failure' | 'timeout'; + output?: string; + error?: string; + timestamp: number; +} + +// ============ 存储任务和结果 ============ +async function storeTask(task: Task) { + await redis.set(`${TASK_STORE_PREFIX}${task.id}`, JSON.stringify({ + ...task, + createdAt: Date.now(), + status: 'queued' + })); +} + +async function getTask(taskId: string): Promise { + const data = await redis.get(`${TASK_STORE_PREFIX}${taskId}`); + return data ? JSON.parse(data) : null; +} + +async function storeResult(result: TaskResult) { + await redis.set(`${RESULT_STORE_PREFIX}${result.taskId}`, JSON.stringify(result)); + // 更新任务状态 + const task = await getTask(result.taskId); + if (task) { + await redis.set(`${TASK_STORE_PREFIX}${result.taskId}`, JSON.stringify({ + ...task, + status: result.status, + completedAt: result.timestamp + })); + } +} + +async function getResult(taskId: string): Promise { + const data = await redis.get(`${RESULT_STORE_PREFIX}${taskId}`); + return data ? JSON.parse(data) : null; +} + +// ============ 提交任务 ============ +async function submitTask(task: Task): Promise { + logger.info(`[Gateway] Submitting task ${task.id}`); + await storeTask(task); + await redis.rPush(TASK_QUEUE, JSON.stringify(task)); + return task.id; +} + +// ============ 结果收集循环 ============ +let isShuttingDown = false; + +async function resultCollectorLoop() { + logger.info('Result collector starting...'); + + while (!isShuttingDown) { + try { + const result = await redis.blPop(RESULT_QUEUE, 5); + + if (!result) { + continue; + } + + const taskResult = JSON.parse(result.element) as TaskResult; + logger.info(`[Gateway] Received result for task ${taskResult.taskId}: ${taskResult.status}`); + + // 存储结果 + await storeResult(taskResult); + } catch (error) { + logger.error('Result collector error:', error); + await new Promise(resolve => setTimeout(resolve, 1000)); + } + } + + logger.info('Result collector exited'); +} + +// ============ CLI 接口 ============ +async function handleCli() { + const args = process.argv.slice(2); + + if (args.length === 0) { + // 启动 daemon 模式 + await main(); + return; + } + + // CLI 命令模式 + await redis.connect(); + + try { + if (args[0] === 'submit') { + // 提交任务:submit + const image = args[1]; + const commands = args.slice(2); + + if (!image || commands.length === 0) { + console.error('Usage: submit '); + process.exit(1); + } + + const taskId = randomUUID(); + const task: Task = { id: taskId, image, commands, timeoutSeconds: 300 }; + await submitTask(task); + console.log(`Task submitted: ${taskId}`); + process.exit(0); + + } else if (args[0] === 'query') { + // 查询任务:query + const taskId = args[1]; + if (!taskId) { + console.error('Usage: query '); + process.exit(1); + } + + const task = await getTask(taskId); + const result = await getResult(taskId); + + console.log('Task:', task); + console.log('Result:', result); + process.exit(0); + + } else if (args[0] === 'status') { + // 查看状态:status + const queueLen = await redis.lLen(TASK_QUEUE); + const resultLen = await redis.lLen(RESULT_QUEUE); + const failedLen = await redis.lLen(FAILED_QUEUE); + + console.log(`Queue: ${queueLen} pending, ${resultLen} results, ${failedLen} failed`); + process.exit(0); + + } else { + console.error('Unknown command:', args[0]); + console.error('Commands: submit | query | status'); + process.exit(1); + } + } finally { + await redis.quit(); + } +} + +// ============ 健康检查 ============ +setInterval(async () => { + try { + const queueLen = await redis.lLen(TASK_QUEUE); + const resultLen = await redis.lLen(RESULT_QUEUE); + const failedLen = await redis.lLen(FAILED_QUEUE); + + await redis.set('fsc:gateway:health', JSON.stringify({ + timestamp: Date.now(), + queues: { + task: queueLen, + result: resultLen, + failed: failedLen + } + }), { EX: 60 }); + } catch (error) { + logger.error('Health check failed:', error); + } +}, 30000); + +// ============ 优雅退出 ============ +async function shutdown(signal: string) { + logger.info(`Received ${signal}, shutting down gracefully...`); + isShuttingDown = true; + await redis.quit(); + logger.info('Shutdown complete'); + process.exit(0); +} + +process.on('SIGTERM', () => shutdown('SIGTERM')); +process.on('SIGINT', () => shutdown('SIGINT')); + +// ============ 启动 ============ +async function main() { + logger.info('FSC Gateway Daemon starting...'); + await redis.connect(); + resultCollectorLoop().catch((error) => { + logger.error('Fatal error in result collector:', error); + process.exit(1); + }); +} + +// ============ 入口 ============ +handleCli(); diff --git a/monitoring/grafana/dashboard.json b/monitoring/grafana/dashboard.json new file mode 100644 index 0000000..3f874ad --- /dev/null +++ b/monitoring/grafana/dashboard.json @@ -0,0 +1,391 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "panels": [], + "title": "FSC Mesh Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "redis_list_length{queue=\"fsc:task_queue\"}", + "legendFormat": "Task Queue Depth", + "refId": "A" + } + ], + "title": "Queue Depth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "fsc_worker_running_tasks", + "legendFormat": "Worker Load - {{instance}}", + "refId": "A" + } + ], + "title": "Worker Load", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 4, + "panels": [], + "title": "Alerts & Health", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "${DS_LOKI}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 5, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "expr": "{job=\"fsc-mesh\"} |= `dead_letter`", + "legendFormat": "{{level}} - {{message}}", + "refId": "A" + } + ], + "title": "Dead Letter Queue", + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 120 + }, + { + "color": "red", + "value": 180 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "wireguard_latest_handshake_seconds", + "legendFormat": "WG Handshake - {{peer}}", + "refId": "A" + } + ], + "title": "WireGuard Health", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "fsc-mesh" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "FSC Mesh Dashboard", + "uid": "fsc-mesh-dashboard", + "version": 1, + "weekStart": "" +} diff --git a/scripts/mem-gc.sh b/scripts/mem-gc.sh new file mode 100755 index 0000000..b1e385f --- /dev/null +++ b/scripts/mem-gc.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# MemoV .mem/ GC script +# Triggers when .mem/ > 1GB, keeps last 30 days of commits + +MEM_DIR="${1:-.mem}" +MAX_SIZE_GB=1 +KEEP_DAYS=30 + +echo "=== MemoV GC starting ===" +echo "Mem dir: $MEM_DIR" +echo "Max size: ${MAX_SIZE_GB}GB" +echo "Keep days: $KEEP_DAYS" + +# Check if mem dir exists +if [ ! -d "$MEM_DIR" ]; then + echo "Error: Mem dir $MEM_DIR not found" + exit 1 +fi + +# Calculate current size +CURRENT_SIZE_GB=$(du -sm "$MEM_DIR" | awk '{print $1/1024}') +echo "Current size: ${CURRENT_SIZE_GB}GB" + +# Check if GC needed +if (( $(echo "$CURRENT_SIZE_GB < $MAX_SIZE_GB" | bc -l) )); then + echo "Size below threshold, no GC needed" + exit 0 +fi + +echo "Size exceeds threshold, starting GC..." + +# Git GC +cd "$MEM_DIR" || exit 1 + +# Expire old reflogs +git reflog expire --expire="${KEEP_DAYS} days" --all + +# Prune loose objects +git gc --prune="${KEEP_DAYS} days" --aggressive + +# Cleanup worktrees if any +git worktree prune + +# Verify size after GC +NEW_SIZE_GB=$(du -sm "$MEM_DIR" | awk '{print $1/1024}') +echo "GC complete!" +echo "New size: ${NEW_SIZE_GB}GB" +echo "Freed: $(echo "$CURRENT_SIZE_GB - $NEW_SIZE_GB" | bc -l)GB"