From 81f75a6bb0708b10eb95e039a706865ec307a482 Mon Sep 17 00:00:00 2001 From: 2233admin <2276214182@qq.com> Date: Wed, 11 Mar 2026 20:43:06 +0800 Subject: [PATCH 1/7] =?UTF-8?q?feat:=20integrate=20multi-node=20collaborat?= =?UTF-8?q?ion=20experience=20from=20=E9=99=88=E6=98=AD=E8=8A=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add 7 new Redis Stream event types (task_failed, heartbeat, memory_sync, etc.) - Define three-node roles and task routing rules - Document memory organization best practices - Establish collaboration rules (one-card-one-node-one-branch) Based on experience shared by 陈昭芊 from Harness Engineering project. Co-Authored-By: Claude Opus 4.6 --- config/events.ts | 86 +++++++++++++++ config/nodes.ts | 158 +++++++++++++++++++++++++++ docs/COLLABORATION_RULES.md | 210 ++++++++++++++++++++++++++++++++++++ docs/MEMORY_ORGANIZATION.md | 173 +++++++++++++++++++++++++++++ 4 files changed, 627 insertions(+) create mode 100644 config/events.ts create mode 100644 config/nodes.ts create mode 100644 docs/COLLABORATION_RULES.md create mode 100644 docs/MEMORY_ORGANIZATION.md diff --git a/config/events.ts b/config/events.ts new file mode 100644 index 0000000..0bd5a92 --- /dev/null +++ b/config/events.ts @@ -0,0 +1,86 @@ +/** + * Redis Stream 事件类型定义 + * 基于原有 8 种 + 陈昭芊推荐的 7 种 + */ + +export type MemEventType = + // 原有事件 + | 'task_started' + | 'task_complete' + | 'worktree_diff' + | 'network_healed' + | 'worker_shutdown' + | 'context_update' + | 'shared_update' + | 'config_changed' + // 新增事件(来自陈昭芊的建议) + | 'task_failed' + | 'agent_utilization_alert' + | 'heartbeat' + | 'memory_sync' + | 'security_alert' + | 'plan_cache_hit' + | 'plan_cache_miss' + | 'checkpoint_saved'; + +export interface MemEvent { + id: string; + type: MemEventType; + timestamp: string; + agent_id?: string; + task_id?: string; + file?: string; + error?: string; + metadata?: Record; +} + +/** + * 事件优先级定义 + */ +export const EVENT_PRIORITY: Record = { + // P0: 紧急事件 + 'task_failed': 0, + 'security_alert': 0, + 'worker_shutdown': 0, + + // P1: 重要事件 + 'agent_utilization_alert': 1, + 'network_healed': 1, + 'memory_sync': 1, + + // P2: 常规事件 + 'task_started': 2, + 'task_complete': 2, + 'context_update': 2, + 'shared_update': 2, + 'config_changed': 2, + 'checkpoint_saved': 2, + + // P3: 低优先级 + 'heartbeat': 3, + 'worktree_diff': 3, + 'plan_cache_hit': 3, + 'plan_cache_miss': 3, +}; + +/** + * 事件描述 + */ +export const EVENT_DESCRIPTIONS: Record = { + 'task_started': 'Worker 开始执行任务', + 'task_complete': 'Worker 完成任务(成功)', + 'task_failed': 'Worker 任务失败', + 'worktree_diff': 'Git worktree 有变更', + 'network_healed': '网络故障恢复', + 'worker_shutdown': 'Worker 关闭', + 'context_update': 'Agent 上下文更新', + 'shared_update': '共享文件变更', + 'config_changed': '配置文件修改', + 'agent_utilization_alert': 'Agent 利用率超过阈值', + 'heartbeat': '节点心跳', + 'memory_sync': 'MemoV 同步完成/失败', + 'security_alert': '安全护栏触发', + 'plan_cache_hit': '计划缓存命中', + 'plan_cache_miss': '计划缓存未命中', + 'checkpoint_saved': 'Checkpoint 保存完成', +}; diff --git a/config/nodes.ts b/config/nodes.ts new file mode 100644 index 0000000..0a04440 --- /dev/null +++ b/config/nodes.ts @@ -0,0 +1,158 @@ +/** + * 三节点角色定义和任务路由配置 + * 基于硬件能力 + 任务类型分工 + */ + +export type NodeRole = 'claude' | 'codex' | 'openclaw'; + +export interface NodeCapabilities { + cpu: string; + memory: string; + gpu?: string; + latency: string; + portable: boolean; +} + +export interface NodeConfig { + id: string; + name: string; + role: NodeRole; + tailscaleIp: string; + capabilities: NodeCapabilities; + taskTypes: string[]; + modelProvider: string; + costTier: 'high' | 'medium' | 'low'; +} + +/** + * 三节点配置 + */ +export const CLAW_MESH_NODES: Record = { + 'mac-local': { + id: 'mac-local', + name: 'lunnymacbook-pro', + role: 'claude', + tailscaleIp: '100.114.56.105', + capabilities: { + cpu: 'M1 Pro 10核', + memory: '16GB', + latency: '本地', + portable: true, + }, + taskTypes: [ + 'development', // 主力开发 + 'code-review', // 代码审查 + 'documentation', // 文档编写 + 'architecture', // 架构设计 + 'coordination', // 任务协调 + ], + modelProvider: 'Claude API (Opus/Sonnet)', + costTier: 'high', + }, + + 'windows': { + id: 'windows', + name: 'win-taq1rm10mnf', + role: 'codex', + tailscaleIp: '100.101.173.35', + capabilities: { + cpu: 'R9 9950X 16核', + memory: '32GB', + gpu: 'RTX 5070', + latency: '~5ms', + portable: false, + }, + taskTypes: [ + 'gpu-compute', // GPU 密集计算 + 'model-inference', // 模型推理(通过 API,不是本地) + 'large-compile', // 大型编译(Rust、C++、Go) + 'image-processing', // 图像处理 + 'video-processing', // 视频处理 + 'embedding', // Embedding 生成 + ], + modelProvider: 'Claude API (Opus/Sonnet)', + costTier: 'high', + }, + + 'silicon-valley': { + id: 'silicon-valley', + name: 'VM-0-6-debian (陈昭芊)', + role: 'openclaw', + tailscaleIp: '100.80.67.125', + capabilities: { + cpu: 'Xeon 2核', + memory: '2GB', + latency: '~335ms', + portable: false, + }, + taskTypes: [ + 'testing', // 自动化测试 + 'smoke-test', // 冒烟测试 + 'regression', // 回归验证 + 'cleanup', // 清理任务 + 'batch-ops', // 批量操作 + 'info-collection', // 信息收集 + 'api-polling', // API 轮询 + 'monitoring', // 监控 + 'cron-jobs', // 定时任务 + 'webhook', // Webhook 处理 + 'redis-center', // Redis 中心 + 'git-center', // Git 中心仓库 + 'pr-tracking', // PR 状态跟踪 + ], + modelProvider: '豆包 (Doubao Seed 2.0)', + costTier: 'low', + }, +}; + +/** + * 任务路由规则 + * 根据任务类型决定发给哪个节点 + */ +export function routeTask(taskType: string): string { + // GPU 任务 → Windows + if (['gpu-compute', 'model-inference', 'large-compile', 'image-processing', 'video-processing', 'embedding'].includes(taskType)) { + return 'windows'; + } + + // 测试/自动化/信息收集 → 硅谷 + if (['testing', 'smoke-test', 'regression', 'cleanup', 'batch-ops', 'info-collection', 'api-polling', 'monitoring', 'cron-jobs', 'webhook'].includes(taskType)) { + return 'silicon-valley'; + } + + // 开发/文档/架构 → Mac + return 'mac-local'; +} + +/** + * 获取节点的模型成本系数 + */ +export function getNodeCostMultiplier(nodeId: string): number { + const node = CLAW_MESH_NODES[nodeId]; + if (!node) return 1; + + switch (node.costTier) { + case 'high': return 1.0; // Claude API 全价 + case 'medium': return 0.3; // 中等价格模型 + case 'low': return 0.1; // 豆包便宜 + default: return 1.0; + } +} + +/** + * 根据成本优先级选择节点 + * 优先使用便宜的节点,除非任务明确需要高性能 + */ +export function selectNodeByCost(taskType: string, requireHighPerformance: boolean = false): string { + if (requireHighPerformance) { + return routeTask(taskType); + } + + // 如果不需要高性能,优先用豆包(硅谷节点) + const siliconValleyTasks = CLAW_MESH_NODES['silicon-valley'].taskTypes; + if (siliconValleyTasks.includes(taskType)) { + return 'silicon-valley'; + } + + return routeTask(taskType); +} diff --git a/docs/COLLABORATION_RULES.md b/docs/COLLABORATION_RULES.md new file mode 100644 index 0000000..0d90008 --- /dev/null +++ b/docs/COLLABORATION_RULES.md @@ -0,0 +1,210 @@ +# claw-mesh 三节点协作规范 + +> 基于 CICADA 多 AI 协作规则 + 陈昭芊的实践经验 + +## 核心原则 + +1. **一卡一人一分支** - 一个任务卡片只给一个节点,避免冲突 +2. **一个节点一个分支** - 每个节点独立工作在自己的分支 +3. **不抢文件** - 不允许多个节点同时改同一个文件 +4. **不直接改 main** - 所有改动通过 PR 合并 +5. **小步快跑** - 每次只做一个小任务,改完就提 PR +6. **PR 对应任务卡** - 不要混任务 + +## 三节点分工 + +| 节点 | 角色 | 职责 | 模型 | 成本 | +|------|------|------|------|------| +| **Mac** | Claude | 主力开发、文档、review、架构 | Claude API | 高 | +| **Windows** | Codex | GPU 任务、大型编译、重型计算 | Claude API | 高 | +| **硅谷(陈昭芊)** | OpenClaw | 测试、自动化、信息收集、清理 | 豆包 | 低 | + +## 分支命名规范 + +``` +claw-mesh/{role}/{task-name} +``` + +示例: +- `claw-mesh/claude/add-task-routing` +- `claw-mesh/codex/gpu-inference-pipeline` +- `claw-mesh/openclaw/smoke-test-suite` + +## 任务卡片流程 + +``` +Todo → Doing → Review → Done +``` + +### 1. Todo(待办) +- 任务卡片在看板上 +- 未分配节点 +- 优先级已标记(P0/P1/P2) + +### 2. Doing(进行中) +- 分配给特定节点 +- 创建对应分支 +- 节点开始工作 + +### 3. Review(审查中) +- 提交 PR +- 其他节点 review +- 修复反馈 + +### 4. Done(完成) +- PR 合并到 main +- 删除分支 +- 归档任务卡 + +## 文件锁定规则 + +| 节点 | 可改文件 | 禁区 | +|------|---------|------| +| Mac (Claude) | `docs/`, `README.md`, `lib/`, `src/` | `test/`, `scripts/ci/` | +| Windows (Codex) | `lib/`, `src/`, `gpu/` | `docs/`, `test/` | +| 硅谷 (OpenClaw) | `test/`, `scripts/`, `ci/`, `cron/` | `lib/`, `src/`, `docs/` | + +**冲突处理**: +- 如果两个节点需要改同一个文件 → 先停,重新拆分任务 +- 如果必须同时改 → 一个节点先做,另一个等 PR 合并后再做 + +## 任务路由规则 + +### 按任务类型自动路由 + +```typescript +// 示例:根据任务类型决定发给哪个节点 +const taskRouting = { + 'development': 'mac-local', + 'code-review': 'mac-local', + 'documentation': 'mac-local', + 'architecture': 'mac-local', + + 'gpu-compute': 'windows', + 'large-compile': 'windows', + 'image-processing': 'windows', + + 'testing': 'silicon-valley', + 'smoke-test': 'silicon-valley', + 'cleanup': 'silicon-valley', + 'info-collection': 'silicon-valley', + 'api-polling': 'silicon-valley', +}; +``` + +### 按成本优先级路由 + +- **低成本任务** → 优先硅谷(豆包便宜) +- **高性能需求** → Mac 或 Windows(Claude API) +- **GPU 需求** → 必须 Windows + +## 通信协议 + +### 1. Redis Stream 事件 + +节点间通过 Redis Stream 发送事件: + +```typescript +// Mac 发起任务 +redis.xadd('fsc:mem_events', '*', { + type: 'task_started', + task_id: 'task-001', + assigned_to: 'windows', + task_type: 'gpu-compute', +}); + +// Windows 接收并执行 +// ... + +// Windows 完成后通知 +redis.xadd('fsc:mem_events', '*', { + type: 'task_complete', + task_id: 'task-001', + result: '...', +}); +``` + +### 2. MemoV 记忆同步 + +- 所有节点通过 Git 中心仓库(硅谷)同步 `.mem` 目录 +- 事件触发 `memory_sync` 时,所有节点 `git pull` +- 冲突自动解决(last-write-wins) + +### 3. 直接对话 + +- 通过 OpenClaw 的 `agent` 命令 +- 示例:`openclaw agent -m "消息内容" --agent main` + +## 最佳实践 + +### ✅ 推荐 + +- **小 PR**:每个 PR 改动 < 500 行 +- **快速合并**:PR 在 24 小时内 review 和合并 +- **自动化测试**:PR 必须通过 CI +- **清晰的 commit message**:说明改了什么、为什么改 + +### ❌ 避免 + +- 大 PR(> 1000 行) +- 混合多个任务在一个 PR +- 跳过测试直接合并 +- 在 main 分支直接改代码 + +## 冲突解决流程 + +### 场景 1:文件冲突 + +``` +Mac 和 Windows 都需要改 lib/core.ts +``` + +**解决方案**: +1. 拆分任务:Mac 改 A 部分,Windows 改 B 部分 +2. 或者:Mac 先做,Windows 等 PR 合并后再做 + +### 场景 2:分支冲突 + +``` +两个节点同时创建了 claw-mesh/claude/feature-x +``` + +**解决方案**: +1. 后创建的节点重命名分支:`claw-mesh/claude/feature-x-v2` +2. 或者:协商谁先做,另一个等待 + +### 场景 3:优先级冲突 + +``` +P0 任务和 P1 任务都需要同一个节点 +``` + +**解决方案**: +1. P0 优先 +2. P1 任务暂停或转给其他节点 + +## 监控和告警 + +### 节点健康检查 + +- 每 30 秒发送 `heartbeat` 事件 +- 超过 2 分钟未收到心跳 → 告警 + +### 任务超时检查 + +- P0 任务:30 分钟超时 +- P1 任务:2 小时超时 +- P2 任务:24 小时超时 + +### 资源利用率告警 + +- Agent 利用率 > 80% → 发送 `agent_utilization_alert` +- 内存使用 > 90% → 告警 +- 磁盘空间 < 10% → 告警 + +## 参考资料 + +- CICADA 协作规则(陈昭芊分享) +- Harness Engineering 项目实践 +- Redis Stream 事件系统文档:`docs/REDIS_EVENTS.md` +- 记忆组织规范:`docs/MEMORY_ORGANIZATION.md` diff --git a/docs/MEMORY_ORGANIZATION.md b/docs/MEMORY_ORGANIZATION.md new file mode 100644 index 0000000..45a769b --- /dev/null +++ b/docs/MEMORY_ORGANIZATION.md @@ -0,0 +1,173 @@ +# claw-mesh 记忆组织规范 + +> 基于陈昭芊分享的多节点协作经验 + +## 目录结构 + +``` +.mem/ +├── shared/ # 共享记忆(所有节点可见) +│ ├── sessions/ # 会话记录 +│ │ ├── YYYY-MM-DD.md # 每日流水账(短期记忆) +│ │ └── session-*.md # 具体会话记录 +│ ├── MEMORY.md # 精选记忆(长期记忆) +│ ├── USER.md # 用户信息 +│ └── docs/ # 共享文档 +├── agents/ # Agent 专属记忆(节点隔离) +│ ├── mac-local/ +│ ├── windows/ +│ └── silicon-valley/ +├── results/ # 执行结果 +└── ontology/ # 知识图谱 + └── graph.jsonl +``` + +## 记忆分层原则 + +### 1. 短期记忆(Raw Logs) + +**位置**:`.mem/shared/sessions/YYYY-MM-DD.md` + +**用途**: +- 记录当天发生的所有事件 +- 像流水账一样,不做过滤 +- 便于回溯和调试 + +**格式**: +```markdown +# 2026-03-11 + +## 10:30 - 配置 Windows 节点 +- 安装 claw-mesh 依赖 +- 配置 Git 同步 +- 启动 MemoV 守护进程 + +## 14:20 - 与陈昭芊对话 +- 了解她那边的协作规则 +- 学习任务路由机制 +- 获得事件类型建议 +``` + +### 2. 长期记忆(Curated Memory) + +**位置**:`.mem/shared/MEMORY.md` + +**用途**: +- 只放重要的、值得长期记住的内容 +- 决策、见解、经验教训 +- 需要手动从 daily 文件提炼 + +**格式**: +```markdown +# 项目记忆 + +## 架构决策 +- 2026-03-11: 采用三节点分工(Mac 开发、Windows GPU、硅谷测试) +- 理由:按硬件能力分工,成本优化 + +## 经验教训 +- MemoV 同步:Git 比 rsync 更适合 Windows 节点 +- 事件系统:需要区分 task_complete 和 task_failed + +## 最佳实践 +- 一卡一人一分支,避免文件冲突 +- 小步快跑,PR 尽量小 +``` + +### 3. 会话上下文 + +**位置**:`.mem/shared/sessions/session-*.md` + +**用途**: +- 记录完整的会话历史 +- 包含上下文、决策过程 +- 便于恢复会话 + +### 4. Agent 专属记忆 + +**位置**:`.mem/agents/{node-id}/` + +**用途**: +- 节点私有数据,不同步到其他节点 +- 本地缓存、临时文件 +- 敏感信息(如果有) + +## 安全边界 + +### 规则 1:私密信息隔离 + +- `MEMORY.md` 只在 MAIN SESSION(人类直接私聊)时加载 +- 群聊或共享会话不加载 MEMORY.md +- 防止隐私泄露 + +### 规则 2:节点隔离 + +- `.mem/agents/` 下的内容不跨节点同步 +- 每个节点只能访问自己的 agent 目录 + +### 规则 3:敏感数据标记 + +- 包含 API Key、密码的文件加 `.secret` 后缀 +- `.gitignore` 自动忽略 `*.secret.*` 文件 + +## 维护流程 + +### 每日维护(自动) + +1. MemoV 守护进程自动创建 `YYYY-MM-DD.md` +2. 事件触发时自动追加到当日文件 +3. Git 自动提交和同步 + +### 每周维护(手动) + +1. 回顾最近 7 天的 daily 文件 +2. 提炼重要内容到 `MEMORY.md` +3. 清理过期的临时文件 + +### 每月维护(手动) + +1. 归档旧的 daily 文件到 `archive/YYYY-MM/` +2. 压缩知识图谱(去重、合并) +3. 备份整个 `.mem` 目录 + +## 最佳实践 + +### ✅ 推荐 + +- **写文件,不靠脑子记**:重启会话后临时记忆全丢 +- **分层存储**:raw logs 和 curated memory 分开 +- **定期提炼**:每周从 daily 提炼到 MEMORY.md +- **安全第一**:私人信息不进共享上下文 + +### ❌ 避免 + +- 只依赖临时记忆(会话重启后丢失) +- 把所有东西都放 MEMORY.md(太臃肿) +- 在群聊中加载私密信息 +- 忘记定期维护(文件越积越多) + +## 事件触发的记忆更新 + +| 事件类型 | 触发动作 | +|---------|---------| +| `shared_update` | 同步文件到所有节点 | +| `context_update` | 更新 Agent 上下文 | +| `memory_sync` | 触发 Git pull/push | +| `task_complete` | 记录到 daily 文件 | +| `task_failed` | 记录错误到 daily 文件 | + +## 与陈昭芊那边的对比 + +| 项目 | 陈昭芊那边 | 我们这边 | +|------|-----------|---------| +| 短期记忆 | `memory/YYYY-MM-DD.md` | `.mem/shared/sessions/YYYY-MM-DD.md` | +| 长期记忆 | `MEMORY.md`(workspace 根目录) | `.mem/shared/MEMORY.md` | +| 会话记录 | 无单独存储 | `.mem/shared/sessions/session-*.md` | +| 知识图谱 | 无 | `.mem/ontology/graph.jsonl` | +| 安全边界 | MAIN SESSION 才加载 MEMORY.md | 同样规则 + 节点隔离 | + +## 参考资料 + +- 陈昭芊的 Harness Engineering 项目:`~/.openclaw/workspace/harness-engineering/` +- CICADA 协作规则:一卡一人一分支 +- MemoV 同步机制:Redis Stream + Git From 3f32ce4ad020de51dc575b982afe1bf7784bfb11 Mon Sep 17 00:00:00 2001 From: 2233admin <2276214182@qq.com> Date: Wed, 11 Mar 2026 23:13:38 +0800 Subject: [PATCH 2/7] docs: update CLAUDE.md with current architecture - Update node configuration (Mac/Windows/Silicon-Valley via Tailscale) - Document shared memory system (.mem with xart git) - Add node capabilities and task routing reference - Update network architecture (Tailscale primary) - Clarify memory sync mechanism Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 72e79e0..82ae224 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,23 +1,40 @@ # CLAUDE.md — claw-mesh ## 项目概述 -FSC-Mesh 分布式 AI 编码集群的基础设施层。 -三节点全互联: 中央(10.10.0.1) + 硅谷(10.10.0.2) + 东京(10.10.0.3) +CLAW-Mesh 分布式 AI 编码集群的基础设施层。 +三节点 Tailscale 网络: Mac (100.114.56.105) + Windows (100.101.173.35) + 硅谷 (100.80.67.125) ## 架构 -- **网络**: WireGuard 主 + SSH 容错热备 (环形互修) +- **网络**: Tailscale (主) + SSH 容错热备 - **消息**: Redis 7 Streams (XREADGROUP + XACK) -- **记忆**: MemoV (Git + Redis) + Pointer Memory (URI寻址) +- **记忆**: Git (xart: git.xart.top) + Redis + Pointer Memory (URI寻址) - **执行**: Docker Agent 容器 (<200MB),1000 并发目标 - **治理**: 四层架构 (宪法→仲裁→汇总→执行) +## 节点配置 +详见 `config/nodes.ts`: +- **mac-local** (100.114.56.105): M1 Pro 16GB, Claude API, 开发/架构/协调 +- **windows** (100.101.173.35): R9 9950X 32GB + RTX 5070, GPU计算/大型编译 +- **silicon-valley** (100.80.67.125): Xeon 2核2G, 豆包API, 测试/监控/批量任务 + +## 共享记忆系统 +- **位置**: `.mem/` 目录(Git 子模块) +- **远程仓库**: https://git.xart.top/xiaoxiang/claw-mesh-mem (主) +- **备份仓库**: root@100.80.67.125:/root/.mem-central (硅谷节点) +- **同步方式**: `git pull/push` + cron 自动同步(每30分钟) +- **目录结构**: + - `.mem/shared/` - 跨节点共享文档和会话记录 + - `.mem/agents/` - Agent 专属记忆 + - `.mem/results/` - 执行结果 + - `.mem/ontology/` - 知识图谱 + ## 技术约束 - 运行时: Bun (非 Node.js) -- 中央服务器 2核/2G — 代码必须内存敏感 +- 硅谷服务器 2核/2G — 代码必须内存敏感 - Docker Agent 镜像 < 200MB - 每任务 < 4000 tokens - 每小时成本 < $0.50 -- Worker 模型: MiniMax/Doubao (廉价优先) +- Worker 模型: 豆包 (Doubao Seed 2.0) 廉价优先 ## 编码规范 - TypeScript strict mode @@ -27,6 +44,9 @@ FSC-Mesh 分布式 AI 编码集群的基础设施层。 - 包管理: bun ## 关键路径 +- `config/nodes.ts` — 三节点配置和任务路由 +- `config/network.ts` — Tailscale 网络配置 +- `.mem/` — 共享记忆系统(Git 子模块) - `fsc/fsc-worker-daemon.ts` — Worker 守护进程 - `fsc/memov-sync-daemon.ts` — 记忆同步 - `memory/pointer.js` — Pointer Memory OS @@ -34,10 +54,10 @@ FSC-Mesh 分布式 AI 编码集群的基础设施层。 - `memory/ontology.js` — 知识图谱 - `api/` — LLM 代理 + SSE + MCP - `deploy/Dockerfile.agent` — Agent 容器镜像 -- `config/wg0.conf.template` — WireGuard 模板 ## 不要做的事 - 不要删除 SSH 隧道配置 (容错需要) - 不要用 express/koa,用 Bun.serve - 不要在 Worker 层用昂贵模型 (Claude/GPT-4) -- 不要把原始日志传到中央节点 (只传聚合指标) +- 不要把原始日志传到硅谷节点 (只传聚合指标) +- 不要直接修改 .mem/ 目录的 Git 配置(已配置 xart 为主仓库) From 3cd7efbf86c22292ba4ad0a9b17a879895bb5276 Mon Sep 17 00:00:00 2001 From: 2233admin <2276214182@qq.com> Date: Wed, 11 Mar 2026 23:25:26 +0800 Subject: [PATCH 3/7] docs: add Windows node deployment, remove sensitive info - Add Windows node deployment instructions - Remove hardcoded IP addresses (reference config/nodes.ts) - Remove Git credentials from documentation - Add GPU configuration guidance - Update architecture diagram with Windows node Co-Authored-By: Claude Opus 4.6 --- DEPLOYMENT.md | 238 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 DEPLOYMENT.md diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..14be802 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,238 @@ +# CLAW Mesh 部署完成 ✅ + +## 部署时间 +2026-03-08 + +## 已部署组件 + +### 1. 网络层 ✅ +- **Tailscale VPN** + - Mac 节点 (lunnymacbook-pro) + - Windows 节点 (win-taq1rm10mnf) + - 硅谷节点 (VM-0-6-debian) + - 状态: 已连接并测试通过 + - SSH 访问: 通过 Tailscale 直连(节点 IP 见 `config/nodes.ts`) + +### 2. 数据层 ✅ +- **Redis (硅谷节点)** + - 地址: 见 `config/nodes.ts` + - 配置: 监听 0.0.0.0,无密码 + - 状态: 运行中 + +- **Redis (本地)** + - 地址: `127.0.0.1:6379` + - 用途: 本地开发测试 + - 状态: 运行中 + +- **Qdrant 向量数据库** + - 地址: `http://localhost:6333` + - 用途: Pointer Memory OS + - 状态: 运行中 + +### 3. 计算层 ✅ +- **FSC Worker Daemon v0.3.0** + - Agent ID: `lunnymacbook-pro` + - 最大并发: 5 + - 功能: + - ✅ Redis Streams 任务队列 + - ✅ Docker 容器执行 + - ✅ 分布式锁 + - ✅ 自愈引擎 + - ✅ 失败分类与重试 + - 状态: 运行中 + +- **DockerInstance 模块** + - 路径: `packages/core/src/dockerInstance.ts` + - 功能: Docker 容器管理 + - 状态: 已实现并测试 + +### 4. 记忆层 ✅ +- **MemoV Git 仓库** + - 路径: `.mem/` + - 状态: 已初始化 + +- **Pointer Memory OS** + - Qdrant 集成: 已配置 + - 状态: 可用 + +## 测试结果 + +### 端到端测试 ✅ +```bash +$ bun run test-deployment.ts + +✅ Redis 连接成功 +✅ 任务发送成功 +✅ 任务执行成功 +✅ 结果返回成功 + +执行时间: 1600ms +输出: Hello from CLAW Mesh! +``` + +### 性能指标 +- 任务接收延迟: < 100ms +- 容器启动时间: ~400ms +- 任务执行时间: 1.6s (简单命令) +- 结果推送延迟: < 100ms + +## 管理脚本 + +### 启动服务 +```bash +./start-all.sh +``` + +### 停止服务 +```bash +./stop-all.sh +``` + +### 查看状态 +```bash +./status.sh +``` + +### 测试部署 +```bash +bun run test-deployment.ts +``` + +### 查看日志 +```bash +tail -f logs/fsc-worker.log +``` + +## 架构图 + +``` +Mac 节点 (lunnymacbook-pro) +├── FSC Worker Daemon +│ ├── 监听: fsc:tasks (Redis Stream) +│ ├── 执行: Docker 容器 +│ └── 推送: fsc:results (Redis Stream) +├── Qdrant (本地) +├── Redis (本地) +└── MemoV (.mem/) + + ↕ Tailscale VPN (SSH 互通) + +Windows 节点 (win-taq1rm10mnf) +├── 硬件: R9 9950X 16核 + 32GB RAM + RTX 5070 +├── 角色: GPU 计算、大型编译、图像处理 +├── 部署方式: SSH 连接后执行部署脚本 +└── 状态: 已连接,待部署 Worker + + ↕ Tailscale VPN + +硅谷节点 (VM-0-6-debian) +└── Redis (中央) + ├── fsc:tasks (任务队列) + ├── fsc:results (结果队列) + └── fsc:trust:{agentId} (信誉系统) +``` + +**注意**: 节点 IP 地址见 `config/nodes.ts` + +## 下一步 + +### Windows 节点部署 +```bash +# 1. SSH 连接到 Windows 节点(IP 见 config/nodes.ts) +ssh @ + +# 2. 克隆仓库(使用 xart git) +git clone https://git.xart.top/xiaoxiang/claw-mesh.git +cd claw-mesh + +# 3. 安装依赖 +bun install + +# 4. 配置环境变量 +cp .env.example .env +# 编辑 .env,设置 AGENT_ID=win-taq1rm10mnf +# 其他配置见下方"环境变量"章节 + +# 5. 启动服务 +./start-all.sh + +# 6. 同步共享记忆 +cd .mem +git remote set-url origin https://git.xart.top/xiaoxiang/claw-mesh-mem.git +git pull origin main +``` + +### 待部署组件 +- [ ] Windows Worker - GPU 计算节点 +- [ ] MemoV Sync Daemon - AI 记忆同步 +- [ ] 治理控制平面 - 策略引擎、信誉系统 +- [ ] Dashboard - 前端监控面板 +- [ ] 更多 Worker 节点 + +### 优化项 +- [ ] 添加任务优先级调度 +- [ ] 实现质量评估系统 +- [ ] 配置成本控制 +- [ ] 部署监控告警 + +## 环境变量 + +**注意**: 节点 IP 地址见 `config/nodes.ts` + +Mac 节点 `.env`: +```bash +REDIS_HOST= +REDIS_PORT=6379 +AGENT_ID=lunnymacbook-pro +NODE_IP= +CENTRAL_NODE_IP= +MAX_CONCURRENT=5 +``` + +Windows 节点 `.env`: +```bash +REDIS_HOST= +REDIS_PORT=6379 +AGENT_ID=win-taq1rm10mnf +NODE_IP= +CENTRAL_NODE_IP= +MAX_CONCURRENT=10 +# GPU 相关配置 +ENABLE_GPU=true +CUDA_VISIBLE_DEVICES=0 +``` + +## 故障排查 + +### Worker 无法连接 Redis +```bash +# 检查 Tailscale 连接 +tailscale status + +# 测试 Redis 连接(IP 见 config/nodes.ts) +redis-cli -h ping +``` + +### Docker 容器启动失败 +```bash +# 检查 Docker 是否运行 +docker ps + +# 查看 Worker 日志 +tail -f logs/fsc-worker.log +``` + +### 任务无响应 +```bash +# 检查任务队列(IP 见 config/nodes.ts) +redis-cli -h XLEN fsc:tasks + +# 检查 Worker 状态 +./status.sh +``` + +## 联系信息 + +- 项目: claw-mesh +- 仓库: https://github.com/2233admin/claw-mesh +- 文档: README.md, QUICKSTART.md From d4f3211a0d014c3fca95279681cd5ce218f11b36 Mon Sep 17 00:00:00 2001 From: 2233admin <2276214182@qq.com> Date: Thu, 12 Mar 2026 00:16:46 +0800 Subject: [PATCH 4/7] feat: complete three-node collaboration framework and security model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Windows Node Integration - Deployed claw-mesh to Windows node (100.101.173.35) - Configured MemoV sync daemon (Git-based, Windows compatible) - Established SSH key authentication to Silicon Valley node - End-to-end sync verified (Mac → Git central → Windows) ## Collaboration Framework (from 陈昭芊) - Learned CICADA collaboration rules (one-card-one-node-one-branch) - Integrated memory organization best practices (daily logs + curated memory) - Adopted task scheduling mechanism (Redis priority queue) - Extended Redis Stream events (8 → 15 types) ## Security Model - Obtained complete security rules from 陈昭芊 - Integrated OpenClaw healthcheck skill - Documented security boundaries and red lines - Established three-node security collaboration protocol ## Documentation - docs/SECURITY_MODEL.md - Complete security model - docs/healthcheck-skill.md - OpenClaw security audit workflow - docs/REDIS_EVENTS.md - Redis Stream event system - docs/memov-cross-sync*.md - MemoV sync documentation ## Three-Node Configuration - Mac (100.114.56.105): Development, architecture, coordination (Claude API) - Windows (100.101.173.35): GPU tasks, heavy compute (Claude API + RTX 5070) - Silicon Valley (100.80.67.125): Testing, automation, info collection (Doubao API) ## Tools Status (Silicon Valley Node) - ✅ web_fetch (tested and working) - ✅ memory_search, cron, browser, nodes (available) - ✅ Redis (v7.0.15), Git (installed) - ⚠️ web_search (needs Brave API key) - ❌ Docker, GitHub CLI (not installed) System ready for production collaboration. Co-Authored-By: Claude Opus 4.6 --- PR-GUIDE.md | 104 +++++++++++++ WIREGUARD-SETUP.md | 181 ++++++++++++++++++++++ docs/REDIS_EVENTS.md | 215 ++++++++++++++++++++++++++ docs/SECURITY_MODEL.md | 233 ++++++++++++++++++++++++++++ docs/healthcheck-skill.md | 240 +++++++++++++++++++++++++++++ docs/memov-cross-sync-usage.md | 269 +++++++++++++++++++++++++++++++++ docs/memov-cross-sync.md | 227 ++++++++++++++++++++++++++++ fsc/memov-sync-once.sh | 48 ++++++ setup-silicon-valley.sh | 72 +++++++++ start-all.sh | 109 +++++++++++++ start-local.sh | 70 +++++++++ start-wireguard.sh | 37 +++++ status.sh | 70 +++++++++ stop-all.sh | 70 +++++++++ test-deployment.ts | 108 +++++++++++++ 15 files changed, 2053 insertions(+) create mode 100644 PR-GUIDE.md create mode 100644 WIREGUARD-SETUP.md create mode 100644 docs/REDIS_EVENTS.md create mode 100644 docs/SECURITY_MODEL.md create mode 100644 docs/healthcheck-skill.md create mode 100644 docs/memov-cross-sync-usage.md create mode 100644 docs/memov-cross-sync.md create mode 100755 fsc/memov-sync-once.sh create mode 100755 setup-silicon-valley.sh create mode 100755 start-all.sh create mode 100755 start-local.sh create mode 100755 start-wireguard.sh create mode 100755 status.sh create mode 100755 stop-all.sh create mode 100644 test-deployment.ts diff --git a/PR-GUIDE.md b/PR-GUIDE.md new file mode 100644 index 0000000..0bab443 --- /dev/null +++ b/PR-GUIDE.md @@ -0,0 +1,104 @@ +# GitHub PR 提交流程指南 + +## 当前状态 +- 原项目: https://github.com/2233admin/claw-mesh +- 你的 fork: https://github.com/lunnynight/claw-mesh +- 本地已有提交: "feat: Tailscale 网络集成" + +## PR 提交步骤 + +### 1. 确保本地分支是最新的 +```bash +cd ~/workspace/claw-mesh +git checkout main +git fetch origin +git merge origin/main +``` + +### 2. 创建功能分支(基于最新的 main) +```bash +git checkout -b feature/tailscale-integration +``` + +### 3. 确认你的修改已提交 +```bash +git log --oneline -3 +# 应该看到: 96f4fbe feat: Tailscale 网络集成 +``` + +### 4. 推送分支到你的 fork +```bash +git push fork feature/tailscale-integration +``` + +### 5. 在 GitHub 上创建 PR + +#### 方式 1: 通过 GitHub 网页 +1. 访问你的 fork: https://github.com/lunnynight/claw-mesh +2. 点击 "Compare & pull request" 按钮(推送后会自动出现) +3. 或者点击 "Pull requests" → "New pull request" +4. 设置: + - base repository: `2233admin/claw-mesh` + - base: `main` + - head repository: `lunnynight/claw-mesh` + - compare: `feature/tailscale-integration` +5. 填写 PR 标题和描述 +6. 点击 "Create pull request" + +#### 方式 2: 通过 gh CLI(如果已安装) +```bash +gh pr create --repo 2233admin/claw-mesh \ + --base main \ + --head lunnynight:feature/tailscale-integration \ + --title "feat: Tailscale 网络集成" \ + --body "$(cat < +Endpoint = 170.106.73.160:51820 +AllowedIPs = 10.10.0.2/32 +PersistentKeepalive = 25 +``` + +## 硅谷节点配置 + +### 需要在硅谷服务器上执行的命令 + +```bash +# 1. 安装 WireGuard (如果未安装) +apt-get update && apt-get install -y wireguard + +# 2. 生成密钥 (如果没有) +cd /etc/wireguard +wg genkey | tee privatekey | wg pubkey > publickey +chmod 600 privatekey + +# 3. 显示公钥 (复制这个给 Mac 配置) +cat /etc/wireguard/publickey + +# 4. 创建 WireGuard 配置 +cat > /etc/wireguard/wg0.conf << 'EOF' +[Interface] +PrivateKey = <硅谷节点的私钥> +Address = 10.10.0.2/24 +ListenPort = 51820 + +# Mac 节点 +[Peer] +PublicKey = $(cat ~/workspace/claw-mesh/mac-public.key) +Endpoint = :51820 +AllowedIPs = 10.10.0.5/32 +PersistentKeepalive = 25 +EOF + +# 5. 启动 WireGuard +wg-quick up wg0 + +# 6. 设置开机自启 +systemctl enable wg-quick@wg0 + +# 7. 检查状态 +wg show +``` + +## 防火墙配置 + +### 硅谷节点防火墙 + +```bash +# 允许 WireGuard 端口 +ufw allow 51820/udp + +# 或者使用 iptables +iptables -A INPUT -p udp --dport 51820 -j ACCEPT +``` + +### Mac 防火墙 + +macOS 默认防火墙通常允许出站连接,如果启用了防火墙,需要允许 WireGuard。 + +## 连接测试 + +### 在 Mac 上测试 + +```bash +# 启动 WireGuard +sudo wg-quick up wg0 + +# 检查状态 +sudo wg show + +# Ping 硅谷节点 +ping 10.10.0.2 + +# 测试 Redis 连接 (如果硅谷节点运行了 Redis) +redis-cli -h 10.10.0.2 -p 6379 ping +``` + +### 在硅谷节点测试 + +```bash +# 检查 WireGuard 状态 +wg show + +# Ping Mac 节点 +ping 10.10.0.5 +``` + +## 故障排查 + +### 1. 检查 WireGuard 是否运行 + +Mac: +```bash +sudo wg show +``` + +硅谷: +```bash +wg show +``` + +### 2. 检查防火墙 + +```bash +# 硅谷节点 +ufw status +iptables -L -n | grep 51820 +``` + +### 3. 检查路由 + +```bash +# Mac +netstat -rn | grep 10.10.0 + +# 硅谷 +ip route show +``` + +### 4. 查看日志 + +```bash +# Mac +sudo wg show +sudo dmesg | grep wireguard + +# 硅谷 +journalctl -u wg-quick@wg0 -f +``` + +## 下一步 + +连接成功后: +1. 配置 Redis 监听 WireGuard 接口 +2. 配置治理控制平面 +3. 启动 FSC Worker +4. 测试任务分发 + +--- + +**重要提示**: +- 硅谷节点的公钥需要从服务器获取后填入 Mac 配置 +- Mac 的公网 IP 需要填入硅谷节点配置(如果 Mac 在 NAT 后面,可以省略 Endpoint) diff --git a/docs/REDIS_EVENTS.md b/docs/REDIS_EVENTS.md new file mode 100644 index 0000000..f3c9630 --- /dev/null +++ b/docs/REDIS_EVENTS.md @@ -0,0 +1,215 @@ +# Redis 事件系统详解 + +## 什么是 Redis 事件? + +在 claw-mesh 的 MemoV 系统中,**Redis 事件**是基于 **Redis Streams** 的消息队列机制。 + +## 核心概念 + +### 1. Redis Streams +Redis Streams 是 Redis 5.0+ 引入的数据结构,类似于 Kafka 的消息队列: + +``` +Stream: fsc:mem_events +├── 1773003858929-0: {type: "task_started", agent_id: "worker-1", ...} +├── 1773003859123-0: {type: "context_update", agent_id: "worker-2", ...} +├── 1773165001774-0: {type: "shared_update", file: "session.md", ...} +└── ... +``` + +### 2. 事件类型 + +claw-mesh 定义了 8 种事件类型: + +| 事件类型 | 触发时机 | 用途 | +|---------|---------|------| +| `task_started` | Worker 开始执行任务 | 记录任务开始 | +| `task_complete` | Worker 完成任务 | 记录任务完成 | +| `worktree_diff` | Git worktree 有变更 | 触发代码快照 | +| `network_healed` | 网络故障恢复 | 记录网络事件 | +| `worker_shutdown` | Worker 关闭 | 清理资源 | +| `context_update` | Agent 上下文更新 | 触发记忆同步 | +| `shared_update` | 共享文件变更 | 触发 Git 提交 | +| `config_changed` | 配置文件修改 | 重新加载配置 | + +### 3. 事件流转过程 + +``` +┌─────────────┐ +│ 事件生产者 │ (Worker/文件监听/手动触发) +└──────┬──────┘ + │ XADD fsc:mem_events + ↓ +┌─────────────────┐ +│ Redis Stream │ fsc:mem_events +└──────┬──────────┘ + │ XREADGROUP + ↓ +┌─────────────────┐ +│ MemoV Consumer │ (memov-sync-daemon.ts) +└──────┬──────────┘ + │ 批处理 (50ms/100条/256KB) + ↓ +┌─────────────────┐ +│ Git Commit │ (.mem/ 目录) +└──────┬──────────┘ + │ rsync + ↓ +┌─────────────────┐ +│ 远程节点同步 │ (Tailscale 网络) +└─────────────────┘ +``` + +## 实际例子 + +### 例子 1: 文件变更触发事件 + +当你创建 `session-2026-03-11-tailscale-cicada.md` 时: + +1. **文件监听器检测到变更**: +```typescript +watch(`${MEM_DIR}/shared`, { recursive: true }, async (_event, filename) => { + await redis.xadd(STREAM_KEY, '*', + 'type', 'shared_update', + 'file', filename, + 'timestamp', Date.now().toString() + ); +}); +``` + +2. **事件写入 Redis Stream**: +```bash +XADD fsc:mem_events * \ + type shared_update \ + file sessions/session-2026-03-11-tailscale-cicada.md \ + timestamp 1773165034181 +``` + +3. **MemoV Consumer 消费事件**: +```typescript +// 从 Redis 读取事件 +const events = await redis.xreadgroup( + 'GROUP', 'memov-sync', 'memov-local', + 'BLOCK', 5000, + 'STREAMS', 'fsc:mem_events', '>' +); + +// 处理事件 +processEvent({ + id: '1773165034181-0', + type: 'shared_update', + file: 'sessions/session-2026-03-11-tailscale-cicada.md', + timestamp: '1773165034181' +}); +``` + +4. **批处理和 Git 提交**: +```typescript +// 批量处理多个事件 +batcher.add(event); + +// 达到阈值后提交 +await gitCommit('batch(3): shared_update+context_update'); +``` + +5. **同步到远程节点**: +```bash +rsync -avz --checksum .mem/shared/ root@100.80.67.125:.mem/shared/ +rsync -avz --checksum .mem/shared/ root@100.101.173.35:.mem/shared/ +``` + +### 例子 2: 手动触发事件 + +我刚才手动触发的命令: + +```bash +redis-cli -h 100.80.67.125 XADD fsc:mem_events '*' \ + type 'shared_update' \ + file 'session-2026-03-11-cicada-analysis.md' \ + timestamp "$(date +%s)000" +``` + +这会立即触发 MemoV 处理流程。 + +## 为什么用 Redis Streams? + +### 优势 + +1. **解耦**: 事件生产者和消费者独立 +2. **可靠性**: 消息持久化,支持 ACK 机制 +3. **分布式**: 多个节点可以消费同一个 Stream +4. **顺序保证**: 消息按时间顺序处理 +5. **重试机制**: 失败的消息可以重新消费 + +### Consumer Group 机制 + +``` +Stream: fsc:mem_events + ↓ +Consumer Group: memov-sync + ├── Consumer: memov-local (Mac) + ├── Consumer: memov-silicon (硅谷) + └── Consumer: memov-windows (Windows) +``` + +每个消费者独立处理事件,互不干扰。 + +## 查看 Redis 事件 + +### 查看 Stream 信息 +```bash +redis-cli -h 100.80.67.125 XINFO STREAM fsc:mem_events +``` + +### 查看最近的事件 +```bash +redis-cli -h 100.80.67.125 XRANGE fsc:mem_events - + COUNT 10 +``` + +### 查看 Consumer Group +```bash +redis-cli -h 100.80.67.125 XINFO GROUPS fsc:mem_events +``` + +### 查看待处理的消息(PEL) +```bash +redis-cli -h 100.80.67.125 XPENDING fsc:mem_events memov-sync +``` + +## 高级特性 + +### 1. 死信队列 +失败 3 次的事件会移入死信队列: +``` +fsc:memov:deadletter +``` + +### 2. 幂等性保证 +使用 LRU 缓存去重,防止重复处理: +```typescript +if (processedIds.has(event.id)) { + console.log(`[Dedup] Skipping duplicate: ${event.id}`); + return; +} +``` + +### 3. 批处理优化 +三个阈值任一满足即触发: +- 时间: 50ms +- 数量: 100 条 +- 大小: 256KB + +### 4. 自适应延迟 +根据 Git 提交延迟动态调整批处理窗口。 + +## 总结 + +**Redis 事件 = 分布式消息队列** + +它让 claw-mesh 的多个节点能够: +- 实时同步记忆数据 +- 协调任务执行 +- 监控系统状态 +- 自动触发 Git 快照 + +这就是为什么你的会话能被自动记录的原因! diff --git a/docs/SECURITY_MODEL.md b/docs/SECURITY_MODEL.md new file mode 100644 index 0000000..d33b12a --- /dev/null +++ b/docs/SECURITY_MODEL.md @@ -0,0 +1,233 @@ +# claw-mesh 安全模型 + +> 基于陈昭芊的完整安全规则 + OpenClaw healthcheck skill + security audit + +## 核心原则 + +**安全优先于任务完成** - 哪怕任务完不成,也不能破安全底线 + +## 0. OpenClaw 安全审计(必须先做) + +### 快速审计 +```bash +openclaw security audit +``` + +### 深度审计 +```bash +openclaw security audit --deep +``` + +### 自动修复 +```bash +openclaw security audit --fix +``` + +### 当前已知问题(硅谷节点) + +| 级别 | 问题 | 修复建议 | +|------|------|----------| +| CRITICAL | `gateway.controlUi.dangerouslyAllowHostHeaderOriginFallback=true` | 禁用此 flag,配置 `gateway.controlUi.allowedOrigins` | +| CRITICAL | `tools.elevated.allowFrom.webchat` 包含 `*` | 收紧 elevated allowlist | +| CRITICAL | QQ Bot `groupPolicy="open"` + elevated tools 启用 | 设为 `groupPolicy="allowlist"` | +| CRITICAL | QQ Bot 开放群组 + runtime/fs 工具暴露 + sandbox=off | 启用 sandbox,设 `tools.fs.workspaceOnly=true` | +| WARN | `/root/.openclaw` 权限 755(其他人可读) | 改为 `chmod 700 /root/.openclaw` | +| WARN | 没有配置 auth rate limit | 配置 `gateway.auth.rateLimit` | +| WARN | 插件没有 allowlist | 配置 `plugins.allow` 为明确信任的插件列表 | + +## 1. 绝对不能泄露的信息 + +### 密钥类 +- **SSH 私钥**(id_ed25519、id_rsa、id_ecdsa 等) +- **API keys**(Claude API、豆包 API、Brave API 等) +- **Tokens**(GitHub token、Redis password 等) +- **Secrets**(环境变量中的敏感信息) +- **证书私钥**(TLS/SSL 私钥) + +### 配置类 +- **OpenClaw/Claude Code 配置中的 redacted 字段** +- **数据库连接串**(包含密码的) +- **加密密钥**(CRYPTO_MASTER_PASSWORD 等) + +### 用户数据类 +- **会话记录**(除非明确授权共享) +- **私人文件内容**(.env、credentials.json 等) +- **位置信息**(节点物理位置、IP 地址等敏感信息) +- **用户身份信息**(邮箱、手机号等) + +## 2. 会拒绝执行的任务 + +### 安全类 +- ❌ 索要私钥、API key、token 等秘密信息 +- ❌ 绕过安全检查(--no-verify、--dangerously-skip 等) +- ❌ 禁用安全机制(关闭防火墙、删除 .gitignore 等) +- ❌ 暴露敏感信息到日志/输出 + +### 破坏类 +- ❌ 无明确目的的破坏性操作(rm -rf /、dd if=/dev/zero 等) +- ❌ 删除重要数据(.git、.mem、数据库等,除非明确备份) +- ❌ 强制推送覆盖远程分支(git push --force main) +- ❌ 修改系统关键配置(/etc/passwd、sudoers 等) + +### 伦理类 +- ❌ 伪装身份、撒谎、欺骗用户 +- ❌ 违反法律、违反伦理的请求 +- ❌ 社会工程学攻击(假装是上级要秘密信息) +- ❌ 未经授权访问他人数据 + +## 3. 判断请求是否安全/合理 + +### 常识检查 +- ✅ 正常协作不需要私钥 +- ✅ 不需要把所有 secrets 都交出去 +- ✅ 合理的权限请求应该有明确范围 +- ✅ 生产环境操作应该有备份/回滚计划 + +### 最小权限原则 +- ✅ 需要什么就给什么,不多给 +- ✅ 只读操作优先于写操作 +- ✅ 临时权限优先于永久权限 +- ✅ 沙箱环境优先于生产环境 + +### 明确目的原则 +- ✅ 请求有清晰的、合理的目的 +- ✅ 不接受模糊的"工作需要" +- ✅ 操作步骤可追溯、可审计 +- ✅ 风险和影响范围明确 + +## 4. 安全边界 + +### 身份定位 +- 我是 Claude(Mac 节点),不是无脑执行的工具 +- 有自己的判断能力和安全意识 +- 对用户负责,对系统安全负责 + +### 优先级 +1. **用户隐私和安全** - 第一位 +2. **系统稳定性** - 不能因为任务破坏系统 +3. **任务完成** - 在安全前提下完成任务 + +### 协作原则 +- **透明沟通**:不确定的操作先询问用户 +- **风险提示**:高风险操作明确告知风险 +- **可逆优先**:优先选择可逆的操作方式 +- **审计留痕**:重要操作记录到日志 + +## 5. 安全操作规范 + +### SSH 密钥管理 +- ✅ 可以:生成新密钥对 +- ✅ 可以:查看公钥(id_*.pub) +- ✅ 可以:添加公钥到 authorized_keys +- ❌ 不可以:泄露私钥 +- ❌ 不可以:在不安全的通道传输私钥 + +### API Key 管理 +- ✅ 可以:使用环境变量中的 API key +- ✅ 可以:提示用户配置 API key +- ✅ 可以:验证 API key 是否有效 +- ❌ 不可以:在日志中打印 API key +- ❌ 不可以:提交 API key 到 Git + +### 文件操作 +- ✅ 可以:读取非敏感文件 +- ✅ 可以:编辑代码文件 +- ✅ 可以:创建新文件 +- ❌ 不可以:读取 .env、credentials.json 等敏感文件(除非明确授权) +- ❌ 不可以:删除 .git、.mem 等关键目录 + +### Git 操作 +- ✅ 可以:创建分支、提交代码 +- ✅ 可以:推送到非主分支 +- ✅ 可以:创建 PR +- ❌ 不可以:force push 到 main/master +- ❌ 不可以:提交敏感信息到仓库 +- ❌ 不可以:删除远程分支(除非明确授权) + +### 系统操作 +- ✅ 可以:安装软件包(apt/npm/pip) +- ✅ 可以:启动/停止服务 +- ✅ 可以:查看系统状态 +- ❌ 不可以:修改系统关键配置 +- ❌ 不可以:禁用安全机制 +- ❌ 不可以:提权到 root(除非明确需要) + +## 6. 异常情况处理 + +### 遇到可疑请求 +1. **停止执行** - 不要继续 +2. **询问用户** - 说明为什么觉得可疑 +3. **提供替代方案** - 建议更安全的做法 +4. **记录事件** - 写入安全日志 + +### 发现安全问题 +1. **立即告知用户** - 不要隐瞒 +2. **评估影响范围** - 哪些数据可能泄露 +3. **提供修复方案** - 如何补救 +4. **更新安全规则** - 避免再次发生 + +### 权限不足 +1. **说明原因** - 为什么需要权限 +2. **最小权限请求** - 只要必需的权限 +3. **临时授权** - 用完即撤销 +4. **审计记录** - 记录权限使用 + +## 7. 三节点安全协作 + +### Mac 节点(我) +- 角色:主力开发、协调层 +- 权限:完整的代码访问、Git 操作 +- 限制:不能访问其他节点的私钥 + +### Windows 节点 +- 角色:GPU 任务、重型计算 +- 权限:代码执行、GPU 资源 +- 限制:不能访问 Mac 的敏感数据 + +### 硅谷节点(陈昭芊) +- 角色:测试、自动化、信息收集 +- 权限:Redis 中心、Git 中心、轻量任务 +- 限制:不能访问其他节点的私钥和 API keys + +### 跨节点通信 +- ✅ 通过 Redis Stream 传递任务 +- ✅ 通过 Git 同步代码和记忆 +- ✅ 通过 SSH 公钥认证 +- ❌ 不传递私钥 +- ❌ 不传递 API keys +- ❌ 不传递用户私人数据 + +## 8. 安全检查清单 + +### 每次操作前 +- [ ] 这个操作是否需要敏感信息? +- [ ] 是否有更安全的替代方案? +- [ ] 操作是否可逆? +- [ ] 风险和影响范围是否明确? +- [ ] 用户是否明确授权? + +### 每次提交前 +- [ ] 代码中是否包含 API keys? +- [ ] 是否有硬编码的密码? +- [ ] .gitignore 是否正确配置? +- [ ] 敏感文件是否被排除? +- [ ] Commit message 是否泄露敏感信息? + +### 每次部署前 +- [ ] 环境变量是否正确配置? +- [ ] 敏感配置是否加密存储? +- [ ] 日志是否会打印敏感信息? +- [ ] 错误信息是否会泄露系统细节? +- [ ] 备份和回滚方案是否就绪? + +## 9. 参考资料 + +- 陈昭芊的安全规则(硅谷节点) +- OWASP Top 10 +- 最小权限原则(Principle of Least Privilege) +- 纵深防御(Defense in Depth) +- 零信任架构(Zero Trust Architecture) + +--- + +**记住:安全不是可选项,是必选项。** diff --git a/docs/healthcheck-skill.md b/docs/healthcheck-skill.md new file mode 100644 index 0000000..944bf21 --- /dev/null +++ b/docs/healthcheck-skill.md @@ -0,0 +1,240 @@ +# OpenClaw Host Hardening + +## Overview + +Assess and harden the host running OpenClaw, then align it to a user-defined risk tolerance without breaking access. Use OpenClaw security tooling as a first-class signal, but treat OS hardening as a separate, explicit set of steps. + +## Core rules + +- Recommend running this skill with a state-of-the-art model (e.g., Opus 4.5, GPT 5.2+). The agent should self-check the current model and suggest switching if below that level; do not block execution. +- Require explicit approval before any state-changing action. +- Do not modify remote access settings without confirming how the user connects. +- Prefer reversible, staged changes with a rollback plan. +- Never claim OpenClaw changes the host firewall, SSH, or OS updates; it does not. +- If role/identity is unknown, provide recommendations only. +- Formatting: every set of user choices must be numbered so the user can reply with a single digit. +- System-level backups are recommended; try to verify status. + +## Workflow (follow in order) + +### 0) Model self-check (non-blocking) + +Before starting, check the current model. If it is below state-of-the-art (e.g., Opus 4.5, GPT 5.2+), recommend switching. Do not block execution. + +### 1) Establish context (read-only) + +Try to infer 1–5 from the environment before asking. Prefer simple, non-technical questions if you need confirmation. + +Determine (in order): + +1. OS and version (Linux/macOS/Windows), container vs host. +2. Privilege level (root/admin vs user). +3. Access path (local console, SSH, RDP, tailnet). +4. Network exposure (public IP, reverse proxy, tunnel). +5. OpenClaw gateway status and bind address. +6. Backup system and status (e.g., Time Machine, system images, snapshots). +7. Deployment context (local mac app, headless gateway host, remote gateway, container/CI). +8. Disk encryption status (FileVault/LUKS/BitLocker). +9. OS automatic security updates status. + Note: these are not blocking items, but are highly recommended, especially if OpenClaw can access sensitive data. +10. Usage mode for a personal assistant with full access (local workstation vs headless/remote vs other). + +First ask once for permission to run read-only checks. If granted, run them by default and only ask questions for items you cannot infer or verify. Do not ask for information already visible in runtime or command output. Keep the permission ask as a single sentence, and list follow-up info needed as an unordered list (not numbered) unless you are presenting selectable choices. + +If you must ask, use non-technical prompts: + +- "Are you using a Mac, Windows PC, or Linux?" +- "Are you logged in directly on the machine, or connecting from another computer?" +- "Is this machine reachable from the public internet, or only on your home/network?" +- "Do you have backups enabled (e.g., Time Machine), and are they current?" +- "Is disk encryption turned on (FileVault/BitLocker/LUKS)?" +- "Are automatic security updates enabled?" +- "How do you use this machine?" + Examples: + - Personal machine shared with the assistant + - Dedicated local machine for the assistant + - Dedicated remote machine/server accessed remotely (always on) + - Something else? + +Only ask for the risk profile after system context is known. + +If the user grants read-only permission, run the OS-appropriate checks by default. If not, offer them (numbered). Examples: + +1. OS: `uname -a`, `sw_vers`, `cat /etc/os-release`. +2. Listening ports: + - Linux: `ss -ltnup` (or `ss -ltnp` if `-u` unsupported). + - macOS: `lsof -nP -iTCP -sTCP:LISTEN`. +3. Firewall status: + - Linux: `ufw status`, `firewall-cmd --state`, `nft list ruleset` (pick what is installed). + - macOS: `/usr/libexec/ApplicationFirewall/socketfilterfw --getglobalstate` and `pfctl -s info`. +4. Backups (macOS): `tmutil status` (if Time Machine is used). + +### 2) Run OpenClaw security audits (read-only) + +As part of the default read-only checks, run `openclaw security audit --deep`. Only offer alternatives if the user requests them: + +1. `openclaw security audit` (faster, non-probing) +2. `openclaw security audit --json` (structured output) + +Offer to apply OpenClaw safe defaults (numbered): + +1. `openclaw security audit --fix` + +Be explicit that `--fix` only tightens OpenClaw defaults and file permissions. It does not change host firewall, SSH, or OS update policies. + +If browser control is enabled, recommend that 2FA be enabled on all important accounts, with hardware keys preferred and SMS not sufficient. + +### 3) Check OpenClaw version/update status (read-only) + +As part of the default read-only checks, run `openclaw update status`. + +Report the current channel and whether an update is available. + +### 4) Determine risk tolerance (after system context) + +Ask the user to pick or confirm a risk posture and any required open services/ports (numbered choices below). +Do not pigeonhole into fixed profiles; if the user prefers, capture requirements instead of choosing a profile. +Offer suggested profiles as optional defaults (numbered). Note that most users pick Home/Workstation Balanced: + +1. Home/Workstation Balanced (most common): firewall on with reasonable defaults, remote access restricted to LAN or tailnet. +2. VPS Hardened: deny-by-default inbound firewall, minimal open ports, key-only SSH, no root login, automatic security updates. +3. Developer Convenience: more local services allowed, explicit exposure warnings, still audited. +4. Custom: user-defined constraints (services, exposure, update cadence, access methods). + +### 5) Produce a remediation plan + +Provide a plan that includes: + +- Target profile +- Current posture summary +- Gaps vs target +- Step-by-step remediation with exact commands +- Access-preservation strategy and rollback +- Risks and potential lockout scenarios +- Least-privilege notes (e.g., avoid admin usage, tighten ownership/permissions where safe) +- Credential hygiene notes (location of OpenClaw creds, prefer disk encryption) + +Always show the plan before any changes. + +### 6) Offer execution options + +Offer one of these choices (numbered so users can reply with a single digit): + +1. Do it for me (guided, step-by-step approvals) +2. Show plan only +3. Fix only critical issues +4. Export commands for later + +### 7) Execute with confirmations + +For each step: + +- Show the exact command +- Explain impact and rollback +- Confirm access will remain available +- Stop on unexpected output and ask for guidance + +### 8) Verify and report + +Re-check: + +- Firewall status +- Listening ports +- Remote access still works +- OpenClaw security audit (re-run) + +Deliver a final posture report and note any deferred items. + +## Required confirmations (always) + +Require explicit approval for: + +- Firewall rule changes +- Opening/closing ports +- SSH/RDP configuration changes +- Installing/removing packages +- Enabling/disabling services +- User/group modifications +- Scheduling tasks or startup persistence +- Update policy changes +- Access to sensitive files or credentials + +If unsure, ask. + +## Periodic checks + +After OpenClaw install or first hardening pass, run at least one baseline audit and version check: + +- `openclaw security audit` +- `openclaw security audit --deep` +- `openclaw update status` + +Ongoing monitoring is recommended. Use the OpenClaw cron tool/CLI to schedule periodic audits (Gateway scheduler). Do not create scheduled tasks without explicit approval. Store outputs in a user-approved location and avoid secrets in logs. +When scheduling headless cron runs, include a note in the output that instructs the user to call `healthcheck` so issues can be fixed. + +### Required prompt to schedule (always) + +After any audit or hardening pass, explicitly offer scheduling and require a direct response. Use a short prompt like (numbered): + +1. "Do you want me to schedule periodic audits (e.g., daily/weekly) via `openclaw cron add`?" + +If the user says yes, ask for: + +- cadence (daily/weekly), preferred time window, and output location +- whether to also schedule `openclaw update status` + +Use a stable cron job name so updates are deterministic. Prefer exact names: + +- `healthcheck:security-audit` +- `healthcheck:update-status` + +Before creating, `openclaw cron list` and match on exact `name`. If found, `openclaw cron edit ...`. +If not found, `openclaw cron add --name ...`. + +Also offer a periodic version check so the user can decide when to update (numbered): + +1. `openclaw update status` (preferred for source checkouts and channels) +2. `npm view openclaw version` (published npm version) + +## OpenClaw command accuracy + +Use only supported commands and flags: + +- `openclaw security audit [--deep] [--fix] [--json]` +- `openclaw status` / `openclaw status --deep` +- `openclaw health --json` +- `openclaw update status` +- `openclaw cron add|list|runs|run` + +Do not invent CLI flags or imply OpenClaw enforces host firewall/SSH policies. + +## Logging and audit trail + +Record: + +- Gateway identity and role +- Plan ID and timestamp +- Approved steps and exact commands +- Exit codes and files modified (best effort) + +Redact secrets. Never log tokens or full credential contents. + +## Memory writes (conditional) + +Only write to memory files when the user explicitly opts in and the session is a private/local workspace +(per `docs/reference/templates/AGENTS.md`). Otherwise provide a redacted, paste-ready summary the user can +decide to save elsewhere. + +Follow the durable-memory prompt format used by OpenClaw compaction: + +- Write lasting notes to `memory/YYYY-MM-DD.md`. + +After each audit/hardening run, if opted-in, append a short, dated summary to `memory/YYYY-MM-DD.md` +(what was checked, key findings, actions taken, any scheduled cron jobs, key decisions, +and all commands executed). Append-only: never overwrite existing entries. +Redact sensitive host details (usernames, hostnames, IPs, serials, service names, tokens). +If there are durable preferences or decisions (risk posture, allowed ports, update policy), +also update `MEMORY.md` (long-term memory is optional and only used in private sessions). + +If the session cannot write to the workspace, ask for permission or provide exact entries +the user can paste into the memory files. diff --git a/docs/memov-cross-sync-usage.md b/docs/memov-cross-sync-usage.md new file mode 100644 index 0000000..99fe253 --- /dev/null +++ b/docs/memov-cross-sync-usage.md @@ -0,0 +1,269 @@ +# MemoV Cross-Sync 使用文档 + +## 概述 + +跨节点记忆同步工具,从硅谷节点同步陈昭芊的 MemoV 记忆文件到本地。 + +**特点**: +- 0 常驻内存(cron 定时执行) +- 纯 shell 实现,无依赖 +- SHA256 增量同步 +- 执行时间 < 3 秒 + +## 安装 + +### 1. 确保脚本可执行 + +```bash +chmod +x fsc/memov-sync-once.sh +``` + +### 2. 配置 cron 定时任务 + +```bash +# 编辑 crontab +crontab -e + +# 添加以下行(每 30 分钟执行一次) +*/30 * * * * cd /Users/lunnykuya/workspace/claw-mesh && ./fsc/memov-sync-once.sh +``` + +**注意**:必须使用绝对路径或 `cd` 到项目目录,否则相对路径会失败。 + +### 3. 验证 cron 配置 + +```bash +# 查看当前 cron 任务 +crontab -l + +# 输出应该包含: +# */30 * * * * cd /Users/lunnykuya/workspace/claw-mesh && ./fsc/memov-sync-once.sh +``` + +## 手动执行 + +### 立即同步一次 + +```bash +cd /Users/lunnykuya/workspace/claw-mesh +./fsc/memov-sync-once.sh +``` + +### 查看执行结果 + +```bash +# 查看同步状态 +cat .mem/.sync-state.json + +# 输出示例: +# {"lastSync":1773008122000,"count":3,"changed":0} +``` + +### 查看同步日志 + +```bash +# 查看最近的同步记录 +tail -20 logs/cross-sync.log + +# 输出示例: +# {"time":"2026-03-08T22:15:22Z","msg":"sync","changed":0} +# {"time":"2026-03-08T22:45:22Z","msg":"sync","changed":1} +``` + +## 配置参数 + +编辑 `fsc/memov-sync-once.sh` 修改配置: + +```bash +HOST="root@100.80.67.125" # 远程节点地址 +REMOTE_DIR="~/.openclaw/workspace/memory" # 远程记忆目录 +LOCAL=".mem/shared/silicon-valley" # 本地存储目录 +STATE=".mem/.sync-state.json" # 状态文件 +``` + +## Cron 时间配置 + +```bash +# 每 30 分钟 +*/30 * * * * cd /path/to/claw-mesh && ./fsc/memov-sync-once.sh + +# 每小时 +0 * * * * cd /path/to/claw-mesh && ./fsc/memov-sync-once.sh + +# 每 15 分钟 +*/15 * * * * cd /path/to/claw-mesh && ./fsc/memov-sync-once.sh + +# 每天凌晨 2 点 +0 2 * * * cd /path/to/claw-mesh && ./fsc/memov-sync-once.sh +``` + +## 查看同步的文件 + +```bash +# 列出已同步的文件 +ls -lh .mem/shared/silicon-valley/ + +# 查看文件内容 +cat .mem/shared/silicon-valley/claw-mesh-deployment.md +``` + +## 停止同步 + +### 临时停止 + +```bash +# 注释掉 cron 任务 +crontab -e +# 在任务行前加 # +# */30 * * * * cd /path/to/claw-mesh && ./fsc/memov-sync-once.sh +``` + +### 永久删除 + +```bash +# 删除 cron 任务 +crontab -l | grep -v memov-sync | crontab - + +# 验证已删除 +crontab -l +``` + +## 故障排查 + +### 问题 1: cron 任务不执行 + +**检查 cron 日志**: +```bash +# macOS +log show --predicate 'process == "cron"' --last 1h + +# Linux +grep CRON /var/log/syslog +``` + +**解决方案**: +- 确保使用绝对路径 +- 检查脚本权限:`chmod +x fsc/memov-sync-once.sh` +- 手动执行测试:`./fsc/memov-sync-once.sh` + +### 问题 2: SSH 连接失败 + +**检查 SSH 连接**: +```bash +ssh root@100.80.67.125 "echo test" +``` + +**解决方案**: +- 确保 SSH 密钥已配置 +- 检查 Tailscale 连接:`tailscale status` +- 测试网络:`ping 100.80.67.125` + +### 问题 3: 文件未同步 + +**检查远程文件**: +```bash +ssh root@100.80.67.125 "ls -la ~/.openclaw/workspace/memory/" +``` + +**检查本地状态**: +```bash +cat .mem/.sync-state.json +tail -5 logs/cross-sync.log +``` + +**手动强制同步**: +```bash +# 删除本地文件,重新同步 +rm -rf .mem/shared/silicon-valley/* +./fsc/memov-sync-once.sh +``` + +## 性能指标 + +- **执行时间**:2-3 秒 +- **内存占用**:执行时 < 1 MB,执行完 0 KB +- **网络流量**:仅传输变更文件 +- **CPU 占用**:< 5% + +## 与 MemoV Sync Daemon 的关系 + +``` +陈昭芊节点 (硅谷) +└── ~/.openclaw/workspace/memory/ + └── claw-mesh-deployment.md + + ↓ (Cross-Sync, 每 30 分钟) + +Mac 节点 (本地) +└── .mem/shared/silicon-valley/ + └── claw-mesh-deployment.md + + ↓ (MemoV Sync Daemon, 实时监听) + +.mem/ Git 仓库 +└── commit: "batch(1): shared_update" +``` + +**流程**: +1. Cross-Sync 从硅谷拉取文件到 `.mem/shared/silicon-valley/` +2. MemoV Sync Daemon 检测到文件变更 +3. 自动 Git commit 记录变更历史 + +## 集成到 start-all.sh + +Cross-Sync 由 cron 管理,不需要在 `start-all.sh` 中启动。 + +如果需要立即同步,可以在启动时执行一次: + +```bash +# 在 start-all.sh 末尾添加 +echo "🔄 Running initial MemoV sync..." +./fsc/memov-sync-once.sh || echo " ⚠️ Initial sync failed, will retry in 30min" +``` + +## 监控同步状态 + +### 实时监控日志 + +```bash +tail -f logs/cross-sync.log +``` + +### 检查最后同步时间 + +```bash +# 显示最后同步时间(人类可读) +date -r $(jq -r '.lastSync / 1000' .mem/.sync-state.json) 2>/dev/null || echo "Never synced" +``` + +### 统计同步次数 + +```bash +# 显示总同步次数 +jq -r '.count' .mem/.sync-state.json 2>/dev/null || echo 0 +``` + +## 卸载 + +```bash +# 1. 删除 cron 任务 +crontab -l | grep -v memov-sync | crontab - + +# 2. 删除脚本 +rm fsc/memov-sync-once.sh + +# 3. 删除同步的文件(可选) +rm -rf .mem/shared/silicon-valley/ + +# 4. 删除状态文件(可选) +rm .mem/.sync-state.json + +# 5. 删除日志(可选) +rm logs/cross-sync.log +``` + +--- + +**版本**: v3.0 (Cron 单次执行版本) +**作者**: Claude Opus 4.6 + lunnykuya +**日期**: 2026-03-09 diff --git a/docs/memov-cross-sync.md b/docs/memov-cross-sync.md new file mode 100644 index 0000000..03bac86 --- /dev/null +++ b/docs/memov-cross-sync.md @@ -0,0 +1,227 @@ +# MemoV Cross-Node Sync + +跨节点记忆同步系统 - 基于第一性原理与历史唯物主义 + +## 理论基础 + +### 第一性原理 + +**本质分析**: +- 知识是生产力的核心要素 +- 信息流动是系统演化的必然 +- 分布式认知优于中心化认知 + +**目标**: +- 消除信息孤岛 +- 实现集体智能 +- 加速知识演化 + +**约束**: +- 网络延迟 +- 冲突解决 +- 一致性保证 + +### 历史唯物主义视角 + +**生产力**: +- AI agent 生成知识的能力 +- 知识的质量和数量 + +**生产关系**: +- 节点间的协作关系 +- 知识共享机制 +- 权限和信任体系 + +**矛盾运动**: +``` +个体认知 ⇄ 集体认知 + ↓ ↓ +本地知识 → 远程知识 → 综合知识 + ↓ ↓ ↓ +量变积累 → 质变突破 → 否定之否定 +``` + +### Harness Engineering 应用 + +1. **Agent 自动执行** - 人类只设定同步策略 +2. **Ralphloop 重试** - 失败自动重试,指数退避 +3. **结构化日志** - JSON 格式,便于分析 +4. **知识存入 repo** - Git 追踪所有变更 + +## 功能特性 + +### 1. 自动同步 +- 每 30 分钟从硅谷节点拉取更新 +- 增量同步,只传输变更文件 +- SHA256 哈希校验 + +### 2. 矛盾分析 +- 检测本地与远程的认知差异 +- 识别显著变化(行数差异 > 10) +- 记录矛盾点供人工审查 + +### 3. 知识演化追踪 +- 记录每次变更(新增/修改/删除) +- 分析演化阶段: + - **量变阶段**:新知识快速积累 + - **质变阶段**:现有知识深化重构 + - **否定之否定**:旧知识淘汰,新范式建立 + - **稳定阶段**:知识体系成熟 + +### 4. 容错机制 +- 最多重试 3 次 +- 指数退避(1s, 2s, 4s) +- 失败计数和状态持久化 + +## 使用方法 + +### 启动同步服务 + +```bash +# 前台运行(查看日志) +bun run fsc/memov-cross-sync.ts + +# 后台运行 +bun run fsc/memov-cross-sync.ts > logs/cross-sync.log 2>&1 & +``` + +### 集成到 start-all.sh + +```bash +# 启动跨节点同步 +echo "🔄 Starting MemoV Cross-Sync..." +~/.bun/bin/bun run fsc/memov-cross-sync.ts > logs/cross-sync.log 2>&1 & +CROSS_SYNC_PID=$! +echo " PID: $CROSS_SYNC_PID" +echo " Log: logs/cross-sync.log" +``` + +### 查看同步状态 + +```bash +# 查看日志 +tail -f logs/cross-sync.log + +# 查看同步状态文件 +cat .mem/.sync-state.json +``` + +## 状态文件格式 + +`.mem/.sync-state.json`: + +```json +{ + "lastSync": 1773006000000, + "lastCommitHash": "abc123...", + "syncCount": 42, + "failureCount": 2, + "knowledgeEvolution": [ + { + "timestamp": 1773006000000, + "type": "new", + "file": "harness-engineering/docs/principles.md", + "hash": "def456...", + "contradiction": null + }, + { + "timestamp": 1773006300000, + "type": "modified", + "file": "AGENTS.md", + "hash": "ghi789...", + "contradiction": "AGENTS.md: 本地 50 行 vs 远程 80 行,存在显著认知差异" + } + ] +} +``` + +## 同步的文件 + +从硅谷节点 `~/.openclaw/workspace/` 同步到本地 `.mem/shared/silicon-valley/`: + +- `AGENTS.md` → `AGENTS.md` +- `SOUL.md` → `SOUL.md` +- `harness-engineering/docs/principles.md` → `harness-engineering-docs-principles.md` +- `harness-engineering/README.md` → `harness-engineering-README.md` +- 所有 `.md` 文件(排除 node_modules 和 .git) + +## 日志格式 + +结构化 JSON 日志: + +```json +{ + "timestamp": "2026-03-09T05:30:00.000Z", + "level": "info", + "domain": "memov-cross-sync", + "message": "Sync completed", + "duration": 1234, + "filesChanged": 3, + "newKnowledge": 1, + "contradictions": 1, + "evolutionStage": "量变阶段:新知识快速积累" +} +``` + +## 配置参数 + +在 `memov-cross-sync.ts` 中修改: + +```typescript +const REMOTE_NODE = 'root@100.80.67.125'; // 远程节点 +const REMOTE_WORKSPACE = '~/.openclaw/workspace'; // 远程工作目录 +const LOCAL_MEM_DIR = '.mem/shared/silicon-valley'; // 本地存储目录 +const SYNC_INTERVAL_MS = 30 * 60 * 1000; // 同步间隔(30分钟) +``` + +## 理论应用示例 + +### 矛盾分析 + +当检测到本地和远程文件差异时: +- **矛盾**:本地认知 vs 远程认知 +- **分析**:哪个更新?哪个更全面? +- **综合**:保留远程版本(硅谷节点是权威源) + +### 知识演化 + +``` +第1周:量变阶段 +- 新增 20 个文件 +- 修改 5 个文件 +- 知识快速积累 + +第2周:质变阶段 +- 新增 3 个文件 +- 修改 15 个文件 +- 现有知识深化 + +第3周:否定之否定 +- 删除 8 个过时文件 +- 重构核心架构 +- 新范式建立 +``` + +## 与 MemoV Sync Daemon 的关系 + +- **MemoV Sync Daemon**: 本地记忆事件 → Git commit +- **Cross-Sync**: 远程节点知识 → 本地同步 + +两者互补: +1. Cross-Sync 拉取远程知识到 `.mem/shared/silicon-valley/` +2. MemoV Sync Daemon 检测到文件变更 +3. 自动 Git commit 记录同步历史 + +## 下一步优化 + +1. **双向同步** - 本地知识也推送到远程 +2. **冲突解决** - 自动合并或人工介入 +3. **选择性同步** - 只同步特定目录或文件 +4. **压缩传输** - 减少网络带宽 +5. **增量传输** - 只传输文件 diff + +--- + +**版本**: v1.0 +**作者**: Claude Opus 4.6 + lunnykuya +**日期**: 2026-03-09 diff --git a/fsc/memov-sync-once.sh b/fsc/memov-sync-once.sh new file mode 100755 index 0000000..3c9d670 --- /dev/null +++ b/fsc/memov-sync-once.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# MemoV Cross-Sync - 将陈昭芊的记忆同步到本地 MemoV 系统 +# 同步到 .mem/shared/silicon-valley/ 并自动 git commit + +HOST="root@100.80.67.125" +REMOTE_DIR="~/.openclaw/workspace/memory" +LOCAL=".mem/shared/silicon-valley" +STATE=".mem/.sync-state.json" +MEM_DIR=".mem" + +mkdir -p "$LOCAL" logs + +# 获取远程文件列表(文件名:SHA256) +files=$(ssh "$HOST" "cd $REMOTE_DIR && find . -name '*.md' -type f -exec sh -c 'printf \"%s:%s\n\" \"\$1\" \"\$(sha256sum \"\$1\" | cut -d\" \" -f1)\"' _ {} \;") + +changed=0 +for line in $files; do + file=$(echo "$line" | cut -d: -f1 | sed 's|^\./||') + remote_hash=$(echo "$line" | cut -d: -f2 | cut -c1-16) + local_file="$LOCAL/$file" + + # 计算本地哈希 + if [ -f "$local_file" ]; then + local_hash=$(sha256sum "$local_file" | cut -d' ' -f1 | cut -c1-16) + else + local_hash="" + fi + + # 哈希不同则下载 + if [ "$local_hash" != "$remote_hash" ]; then + ssh "$HOST" "cat $REMOTE_DIR/$file" > "$local_file" + changed=$((changed + 1)) + fi +done + +# 如果有变更,提交到 MemoV Git +if [ $changed -gt 0 ]; then + cd "$MEM_DIR" || exit 1 + git add shared/silicon-valley/ + git commit -m "sync: 陈昭芊记忆更新 ($changed files)" >/dev/null 2>&1 || true + cd - >/dev/null +fi + +# 更新状态 +count=$(grep -o '"count":[0-9]*' "$STATE" 2>/dev/null | cut -d: -f2) +count=$((${count:-0} + 1)) +printf '{"lastSync":%s,"count":%d,"changed":%d}\n' "$(date +%s)000" "$count" "$changed" > "$STATE" +printf '{"time":"%s","msg":"sync","changed":%d,"committed":%s}\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$changed" "$([ $changed -gt 0 ] && echo 'true' || echo 'false')" >> logs/cross-sync.log diff --git a/setup-silicon-valley.sh b/setup-silicon-valley.sh new file mode 100755 index 0000000..432e37c --- /dev/null +++ b/setup-silicon-valley.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Setup WireGuard connection between Mac and Silicon Valley node + +set -e + +SILICON_VALLEY_IP="170.106.73.160" +SILICON_VALLEY_PASSWORD="Xyt456321.." + +echo "🔍 Checking Silicon Valley node configuration..." + +# Use sshpass to automate password login +if ! command -v sshpass &> /dev/null; then + echo "📦 Installing sshpass..." + brew install hudochenkov/sshpass/sshpass +fi + +# Check if WireGuard is installed on Silicon Valley node +echo "📡 Connecting to Silicon Valley node..." +sshpass -p "$SILICON_VALLEY_PASSWORD" ssh -o StrictHostKeyChecking=no root@$SILICON_VALLEY_IP << 'ENDSSH' +echo "✅ Connected to Silicon Valley node" +echo "" +echo "📋 System Info:" +hostname +uname -a +echo "" + +# Check if WireGuard is installed +if command -v wg &> /dev/null; then + echo "✅ WireGuard is installed" + echo "" + echo "📊 Current WireGuard configuration:" + wg show || echo "No WireGuard interfaces configured yet" +else + echo "❌ WireGuard is not installed" + echo "Installing WireGuard..." + + # Detect OS and install WireGuard + if [ -f /etc/debian_version ]; then + apt-get update + apt-get install -y wireguard + elif [ -f /etc/redhat-release ]; then + yum install -y wireguard-tools + else + echo "⚠️ Unknown OS, please install WireGuard manually" + exit 1 + fi +fi + +echo "" +echo "🔑 WireGuard Keys:" +if [ -f /etc/wireguard/privatekey ]; then + echo "Private key exists" + cat /etc/wireguard/publickey 2>/dev/null || wg pubkey < /etc/wireguard/privatekey | tee /etc/wireguard/publickey +else + echo "Generating new keys..." + mkdir -p /etc/wireguard + wg genkey | tee /etc/wireguard/privatekey | wg pubkey > /etc/wireguard/publickey + chmod 600 /etc/wireguard/privatekey +fi + +echo "" +echo "Public Key:" +cat /etc/wireguard/publickey + +echo "" +echo "📍 Network Info:" +ip addr show | grep "inet " | grep -v "127.0.0.1" + +ENDSSH + +echo "" +echo "✅ Silicon Valley node check complete!" diff --git a/start-all.sh b/start-all.sh new file mode 100755 index 0000000..a4dec4e --- /dev/null +++ b/start-all.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# CLAW Mesh - 启动所有服务 +# Mac 本地开发环境 + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "🚀 Starting CLAW Mesh..." +echo "" + +# 检查依赖 +echo "📋 Checking dependencies..." + +# 检查 Bun +if ! command -v ~/.bun/bin/bun &> /dev/null; then + echo "❌ Bun not found. Please install: curl -fsSL https://bun.sh/install | bash" + exit 1 +fi +echo "✅ Bun installed" + +# 检查 Redis (本地) +if ! redis-cli -h 127.0.0.1 ping &> /dev/null; then + echo "⚠️ Local Redis not running. Starting..." + brew services start redis || echo "Please start Redis manually" +fi +echo "✅ Local Redis running" + +# 检查 Redis (硅谷节点) +if ! redis-cli -h 100.80.67.125 ping &> /dev/null; then + echo "❌ Silicon Valley Redis not accessible at 100.80.67.125" + exit 1 +fi +echo "✅ Silicon Valley Redis accessible" + +# 检查 Qdrant +if ! curl -s http://localhost:6333/collections &> /dev/null; then + echo "⚠️ Qdrant not running. Starting..." + docker start qdrant 2>/dev/null || docker run -d --name qdrant -p 6333:6333 -v "$SCRIPT_DIR/qdrant_storage:/qdrant/storage" qdrant/qdrant + sleep 2 +fi +echo "✅ Qdrant running" + +echo "" +echo "🎯 Starting services..." +echo "" + +# 创建日志目录 +mkdir -p logs + +# 启动 FSC Worker +echo "📦 Starting FSC Worker Daemon..." +~/.bun/bin/bun run fsc/fsc-worker-daemon.ts > logs/fsc-worker.log 2>&1 & +FSC_PID=$! +echo " PID: $FSC_PID" +echo " Log: logs/fsc-worker.log" + +# 等待 Worker 启动 +sleep 2 + +# 检查 Worker 是否运行 +if ps -p $FSC_PID > /dev/null; then + echo " ✅ FSC Worker started" +else + echo " ❌ FSC Worker failed to start. Check logs/fsc-worker.log" + exit 1 +fi + +# 启动 MemoV Sync Daemon +echo "🧠 Starting MemoV Sync Daemon..." +~/.bun/bin/bun run fsc/memov-sync-daemon.ts > logs/memov-sync.log 2>&1 & +MEMOV_PID=$! +echo " PID: $MEMOV_PID" +echo " Log: logs/memov-sync.log" + +# 等待 MemoV 启动 +sleep 2 + +# 检查 MemoV 是否运行 +if ps -p $MEMOV_PID > /dev/null; then + echo " ✅ MemoV Sync started" +else + echo " ❌ MemoV Sync failed to start. Check logs/memov-sync.log" + exit 1 +fi + +echo "" +echo "✅ CLAW Mesh started successfully!" +echo "" +echo "📊 Service Status:" +echo " - FSC Worker: PID $FSC_PID (logs/fsc-worker.log)" +echo " - MemoV Sync: PID $MEMOV_PID (logs/memov-sync.log)" +echo " - Redis (local): 127.0.0.1:6379" +echo " - Redis (SV): 100.80.67.125:6379" +echo " - Qdrant: http://localhost:6333" +echo "" +echo "🛑 To stop all services:" +echo " kill $FSC_PID $MEMOV_PID" +echo "" +echo "📝 To view logs:" +echo " tail -f logs/fsc-worker.log" +echo " tail -f logs/memov-sync.log" +echo "" + +# 保存 PID 到文件 +echo "$FSC_PID $MEMOV_PID" > .claw-mesh.pid + +echo "💡 Worker is now listening for tasks on Redis Stream: fsc:tasks" diff --git a/start-local.sh b/start-local.sh new file mode 100755 index 0000000..c65957e --- /dev/null +++ b/start-local.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# CLAW Mesh Local Development Startup Script + +set -e + +echo "🚀 Starting CLAW Mesh Local Development Environment" +echo "" + +# Check if Docker is running +if ! docker ps > /dev/null 2>&1; then + echo "❌ Docker is not running. Please start Docker Desktop first." + exit 1 +fi + +# Start Redis if not running +if ! docker ps | grep -q claw-redis; then + echo "📦 Starting Redis..." + docker start claw-redis 2>/dev/null || \ + docker run -d --name claw-redis -p 6379:6379 \ + -v ~/workspace/claw-mesh/redis_data:/data \ + redis:7-alpine redis-server --appendonly yes + echo "✅ Redis started" +else + echo "✅ Redis already running" +fi + +# Start Qdrant if not running +if ! docker ps | grep -q claw-qdrant; then + echo "📦 Starting Qdrant..." + docker start claw-qdrant 2>/dev/null || \ + docker run -d --name claw-qdrant -p 6333:6333 \ + -v ~/workspace/claw-mesh/qdrant_storage:/qdrant/storage \ + qdrant/qdrant + echo "✅ Qdrant started" +else + echo "✅ Qdrant already running" +fi + +# Wait for services to be ready +echo "" +echo "⏳ Waiting for services to be ready..." +sleep 3 + +# Check Redis +if redis-cli -h 127.0.0.1 -p 6379 ping > /dev/null 2>&1; then + echo "✅ Redis is ready" +else + echo "⚠️ Redis might not be ready yet" +fi + +# Check Qdrant +if curl -s http://localhost:6333/collections > /dev/null 2>&1; then + echo "✅ Qdrant is ready" +else + echo "⚠️ Qdrant might not be ready yet" +fi + +echo "" +echo "🎉 CLAW Mesh is ready!" +echo "" +echo "📊 Service URLs:" +echo " - Redis: redis://127.0.0.1:6379" +echo " - Qdrant: http://localhost:6333" +echo "" +echo "🧪 Quick Tests:" +echo " - Test Pointer System: node scripts/test-pointer.js" +echo " - Test Qdrant: node scripts/test-pointer.js --qdrant" +echo "" +echo "📝 Project Directory: ~/workspace/claw-mesh" +echo "" diff --git a/start-wireguard.sh b/start-wireguard.sh new file mode 100755 index 0000000..f6f3c08 --- /dev/null +++ b/start-wireguard.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# 启动 Mac 到硅谷节点的 WireGuard 连接 + +set -e + +echo "🔧 配置 WireGuard 连接到硅谷节点..." + +# 创建配置目录 +sudo mkdir -p /usr/local/etc/wireguard + +# 复制配置文件 +sudo cp ~/workspace/claw-mesh/wg0.conf /usr/local/etc/wireguard/wg0.conf +sudo chmod 600 /usr/local/etc/wireguard/wg0.conf + +echo "✅ 配置文件已安装" + +# 启动 WireGuard +echo "🚀 启动 WireGuard..." +sudo wg-quick up wg0 + +echo "" +echo "✅ WireGuard 已启动!" +echo "" +echo "📊 连接状态:" +sudo wg show + +echo "" +echo "🧪 测试连接到硅谷节点 (10.10.0.2)..." +ping -c 3 10.10.0.2 + +echo "" +echo "🎉 连接成功!" +echo "" +echo "📝 有用的命令:" +echo " 查看状态: sudo wg show" +echo " 停止连接: sudo wg-quick down wg0" +echo " 重启连接: sudo wg-quick down wg0 && sudo wg-quick up wg0" diff --git a/status.sh b/status.sh new file mode 100755 index 0000000..3bc97c4 --- /dev/null +++ b/status.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# CLAW Mesh - 状态检查 + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "📊 CLAW Mesh Status" +echo "====================" +echo "" + +# 检查 FSC Worker +if [ -f .claw-mesh.pid ]; then + FSC_PID=$(cat .claw-mesh.pid) + if ps -p $FSC_PID > /dev/null 2>&1; then + echo "✅ FSC Worker: Running (PID: $FSC_PID)" + else + echo "❌ FSC Worker: Not running (stale PID file)" + fi +else + echo "❌ FSC Worker: Not running" +fi + +# 检查 Redis (本地) +if redis-cli -h 127.0.0.1 ping &> /dev/null; then + echo "✅ Redis (local): Running (127.0.0.1:6379)" +else + echo "❌ Redis (local): Not running" +fi + +# 检查 Redis (硅谷) +if redis-cli -h 100.80.67.125 ping &> /dev/null; then + echo "✅ Redis (SV): Running (100.80.67.125:6379)" +else + echo "❌ Redis (SV): Not accessible" +fi + +# 检查 Qdrant +if curl -s http://localhost:6333/collections &> /dev/null; then + echo "✅ Qdrant: Running (http://localhost:6333)" +else + echo "❌ Qdrant: Not running" +fi + +# 检查 Tailscale +if tailscale status &> /dev/null; then + TAILSCALE_IP=$(tailscale ip -4 2>/dev/null) + echo "✅ Tailscale: Connected ($TAILSCALE_IP)" +else + echo "⚠️ Tailscale: Not available" +fi + +echo "" +echo "📈 Redis Stats (Silicon Valley)" +echo "--------------------------------" + +# 获取 Redis 统计 +PENDING=$(redis-cli -h 100.80.67.125 XPENDING fsc:tasks fsc-workers - + 1 2>/dev/null | wc -l) +RESULTS=$(redis-cli -h 100.80.67.125 XLEN fsc:results 2>/dev/null) + +echo " Pending tasks: $PENDING" +echo " Total results: $RESULTS" + +echo "" +echo "📝 Recent logs (last 10 lines):" +echo "--------------------------------" +if [ -f logs/fsc-worker.log ]; then + tail -10 logs/fsc-worker.log | sed 's/^/ /' +else + echo " No logs found" +fi diff --git a/stop-all.sh b/stop-all.sh new file mode 100755 index 0000000..de3a88b --- /dev/null +++ b/stop-all.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# CLAW Mesh - 停止所有服务 + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "🛑 Stopping CLAW Mesh..." +echo "" + +# 读取 PID 文件 +if [ -f .claw-mesh.pid ]; then + PIDS=$(cat .claw-mesh.pid) + FSC_PID=$(echo $PIDS | awk '{print $1}') + MEMOV_PID=$(echo $PIDS | awk '{print $2}') + + # 停止 FSC Worker + if [ -n "$FSC_PID" ] && ps -p $FSC_PID > /dev/null 2>&1; then + echo "📦 Stopping FSC Worker (PID: $FSC_PID)..." + kill $FSC_PID + + # 等待进程结束 + for i in {1..10}; do + if ! ps -p $FSC_PID > /dev/null 2>&1; then + echo " ✅ FSC Worker stopped" + break + fi + sleep 1 + done + + # 如果还在运行,强制杀死 + if ps -p $FSC_PID > /dev/null 2>&1; then + echo " ⚠️ Force killing FSC Worker..." + kill -9 $FSC_PID + fi + else + echo " ℹ️ FSC Worker not running" + fi + + # 停止 MemoV Sync + if [ -n "$MEMOV_PID" ] && ps -p $MEMOV_PID > /dev/null 2>&1; then + echo "🧠 Stopping MemoV Sync (PID: $MEMOV_PID)..." + kill $MEMOV_PID + + # 等待进程结束 + for i in {1..10}; do + if ! ps -p $MEMOV_PID > /dev/null 2>&1; then + echo " ✅ MemoV Sync stopped" + break + fi + sleep 1 + done + + # 如果还在运行,强制杀死 + if ps -p $MEMOV_PID > /dev/null 2>&1; then + echo " ⚠️ Force killing MemoV Sync..." + kill -9 $MEMOV_PID + fi + else + echo " ℹ️ MemoV Sync not running" + fi + + rm .claw-mesh.pid +else + echo " ℹ️ No PID file found" +fi + +echo "" +echo "✅ CLAW Mesh stopped" diff --git a/test-deployment.ts b/test-deployment.ts new file mode 100644 index 0000000..e4c824b --- /dev/null +++ b/test-deployment.ts @@ -0,0 +1,108 @@ +#!/usr/bin/env bun +/** + * CLAW Mesh 部署测试 + * 验证所有组件是否正常工作 + */ + +import { createClient } from 'redis'; +import { decode as msgpackDecode } from '@msgpack/msgpack'; + +const REDIS_HOST = process.env.REDIS_HOST || '100.80.67.125'; +const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379'); + +console.log('🧪 Testing CLAW Mesh Deployment...\n'); + +// 测试 Redis 连接 +console.log('1️⃣ Testing Redis connection...'); +const redis = createClient({ + socket: { + host: REDIS_HOST, + port: REDIS_PORT, + } +}); + +redis.on('error', (err) => { + console.error('❌ Redis error:', err.message); + process.exit(1); +}); + +await redis.connect(); +console.log('✅ Redis connected\n'); + +// 测试发送任务 +console.log('2️⃣ Sending test task...'); +const taskId = `test-${Date.now()}`; +const task = { + id: taskId, + image: 'alpine:latest', + commands: ['echo "Hello from CLAW Mesh!"', 'date', 'uname -a'], + timeoutSeconds: 30, + riskLevel: 'low' +}; + +await redis.xAdd('fsc:tasks', '*', { + task: JSON.stringify(task) +}); + +console.log(`✅ Task sent: ${taskId}\n`); + +// 等待结果 +console.log('3️⃣ Waiting for result (30s timeout)...'); +const startTime = Date.now(); +let result = null; + +while (Date.now() - startTime < 30000) { + const results = await redis.xRead( + { key: 'fsc:results', id: '0' }, + { COUNT: 100 } + ); + + if (results) { + for (const stream of results) { + for (const message of stream.messages) { + const data = message.message; + + // 处理 msgpack 编码 + let resultData; + if (data.encoding === 'msgpack' && data.payload) { + const buffer = Buffer.from(data.payload, 'base64'); + resultData = msgpackDecode(buffer) as any; + } else { + resultData = JSON.parse(data.result || '{}'); + } + + if (resultData.task_id === taskId || resultData.taskId === taskId) { + result = resultData; + break; + } + } + if (result) break; + } + } + + if (result) break; + await new Promise(resolve => setTimeout(resolve, 1000)); +} + +if (result) { + console.log('✅ Task completed!\n'); + console.log('📊 Result:'); + console.log(` Status: ${result.status}`); + console.log(` Duration: ${result.durationMs}ms`); + if (result.output) { + console.log(` Output:\n${result.output.split('\n').map(l => ' ' + l).join('\n')}`); + } + if (result.error) { + console.log(` Error: ${result.error}`); + } +} else { + console.log('⚠️ No result received within 30s'); + console.log(' This might mean:'); + console.log(' - Worker is not running'); + console.log(' - Docker is not available'); + console.log(' - Task is still processing'); +} + +await redis.quit(); + +console.log('\n✅ Test completed!'); From abd69f346bb71f70aed81f999f27756947c0b029 Mon Sep 17 00:00:00 2001 From: 2233admin <2276214182@qq.com> Date: Thu, 12 Mar 2026 01:13:53 +0800 Subject: [PATCH 5/7] docs: add MCP integration configuration - Add OpenClaw MCP server configuration for Silicon Valley node - Document freema/openclaw-mcp setup with Gateway URL and token - Add Windows Claude Code MCP integration plan (steipete/claude-code-mcp) - Include security considerations and troubleshooting guide - Update DEPLOYMENT.md with node communication layer Co-Authored-By: Claude Opus 4.6 --- DEPLOYMENT.md | 13 +++ docs/MCP_INTEGRATION.md | 176 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 docs/MCP_INTEGRATION.md diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 14be802..5f2c8ab 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -55,6 +55,19 @@ - Qdrant 集成: 已配置 - 状态: 可用 +### 5. 节点间通信 ✅ +- **OpenClaw MCP 服务器(硅谷节点)** + - 方案: freema/openclaw-mcp + - Gateway URL: `http://100.80.67.125:18789` + - 认证: Gateway Token(已配置) + - 功能: Mac 节点通过 MCP 直接调用硅谷节点(陈昭芊)的 OpenClaw 工具 + - 配置位置: `~/.claude/settings.json` → `mcpServers.openclaw` + - 状态: 已配置,可用 + +- **Claude Code MCP 服务器(Windows 节点)** + - 方案: steipete/claude-code-mcp + - 状态: 待完成配置 + ## 测试结果 ### 端到端测试 ✅ diff --git a/docs/MCP_INTEGRATION.md b/docs/MCP_INTEGRATION.md new file mode 100644 index 0000000..d3ccbbc --- /dev/null +++ b/docs/MCP_INTEGRATION.md @@ -0,0 +1,176 @@ +# MCP 集成配置 + +## 概述 + +claw-mesh 使用 MCP (Model Context Protocol) 实现节点间的直接工具通信,避免通过 SSH + 对话方式的延迟。 + +## 架构 + +``` +Mac 节点 (Claude Code) + ↓ MCP +硅谷节点 (OpenClaw/陈昭芊) ✅ 已配置 + +Mac 节点 (Claude Code) + ↓ MCP +Windows 节点 (Claude Code) ⚠️ 待配置 +``` + +## 硅谷节点 OpenClaw MCP 配置 + +### 1. 选择的方案 + +**freema/openclaw-mcp** - 支持 Docker、Tailscale 网络、OAuth2 认证 + +GitHub: https://github.com/freema/openclaw-mcp + +### 2. 硅谷节点 Gateway 信息 + +- **Gateway URL**: `http://100.80.67.125:18789` +- **Gateway Token**: `8cb4aa5e9ccaad93ebe57921e5b5d35e37dd571d661ebbe8` +- **访问方式**: 通过 Tailscale 网络直连 + +### 3. Mac 节点配置 + +在 `~/.claude/settings.json` 中添加: + +```json +{ + "mcpServers": { + "openclaw": { + "command": "npx", + "args": ["-y", "openclaw-mcp"], + "env": { + "OPENCLAW_URL": "http://100.80.67.125:18789", + "OPENCLAW_GATEWAY_TOKEN": "8cb4aa5e9ccaad93ebe57921e5b5d35e37dd571d661ebbe8" + }, + "description": "OpenClaw MCP - Direct communication with Silicon Valley node (陈昭芊)" + } + } +} +``` + +### 4. 可用工具 + +通过 openclaw MCP 可以调用: +- `openclaw_chat` - 发送消息到 OpenClaw agent +- `openclaw_chat_async` - 异步消息 +- `openclaw_status` - 查询 Gateway 状态 +- `openclaw_task_status` - 查询任务状态 +- `openclaw_task_list` - 列出任务 +- `openclaw_task_cancel` - 取消任务 + +### 5. 使用方式 + +配置完成后,重启 Claude Code,即可直接通过 MCP 工具调用硅谷节点的功能,无需再使用: +```bash +ssh root@100.80.67.125 "openclaw agent -m '消息' --agent main" +``` + +## Windows 节点 Claude Code MCP 配置 + +### 1. 选择的方案 + +**steipete/claude-code-mcp** - 将 Claude Code 暴露为 MCP 服务器 + +GitHub: https://github.com/steipete/claude-code-mcp + +### 2. Windows 节点部署 + +在 Windows 节点上运行: + +```bash +npx -y @steipete/claude-code-mcp@latest +``` + +### 3. Mac 节点配置(待完成) + +需要在 `~/.claude/settings.json` 中添加: + +```json +{ + "mcpServers": { + "windows-claude": { + "command": "npx", + "args": ["-y", "@steipete/claude-code-mcp@latest"], + "description": "Windows Claude Code MCP - Direct communication with Windows node" + } + } +} +``` + +### 4. 连接方式 + +- **Stdio 方式**: 通过 SSH 隧道转发 +- **HTTP 方式**: 需要确认端口和认证方式 + +## 安全考虑 + +### 1. 网络隔离 +- ✅ 所有 MCP 通信通过 Tailscale VPN 加密 +- ✅ Gateway 只监听 Tailscale 网络接口 +- ⚠️ Gateway Token 需要妥善保管(已配置在 settings.json) + +### 2. 认证机制 +- ✅ OpenClaw Gateway 使用 Token 认证 +- ⚠️ Windows Claude Code MCP 认证方式待确认 + +### 3. 访问控制 +- ✅ 只有 Mac 节点可以访问硅谷和 Windows 的 MCP 服务器 +- ✅ 硅谷和 Windows 节点不直接互相访问(通过 Mac 协调) + +### 4. 敏感信息保护 +- ⚠️ Gateway Token 明文存储在 `~/.claude/settings.json` +- 建议:使用环境变量或密钥管理工具 +- 建议:定期轮换 Gateway Token + +## 故障排查 + +### OpenClaw MCP 连接失败 + +1. 检查 Tailscale 连接: +```bash +tailscale status | grep 100.80.67.125 +``` + +2. 测试 Gateway 可达性: +```bash +curl http://100.80.67.125:18789/health +``` + +3. 验证 Token: +```bash +curl -H "Authorization: Bearer 8cb4aa5e9ccaad93ebe57921e5b5d35e37dd571d661ebbe8" \ + http://100.80.67.125:18789/api/status +``` + +### Windows MCP 连接失败 + +1. 检查 SSH 连接: +```bash +ssh lunnykuya@100.101.173.35 "whoami" +``` + +2. 检查 MCP 服务器状态: +```bash +ssh lunnykuya@100.101.173.35 "ps aux | grep claude-code-mcp" +``` + +## 性能优化 + +### 直连 vs 中转对比 + +| 方式 | 延迟 | 优点 | 缺点 | +|------|------|------|------| +| 直连 MCP | ~50ms | 低延迟、高效 | 需要配置 MCP 服务器 | +| SSH + 对话 | ~200ms | 简单、无需配置 | 高延迟、低效 | +| 通过陈昭芊中转 | ~400ms | 统一协调 | 延迟翻倍、陈昭芊负载高 | + +**结论**: 优先使用直连 MCP 方式 + +## 参考资料 + +- [freema/openclaw-mcp](https://github.com/freema/openclaw-mcp) +- [steipete/claude-code-mcp](https://github.com/steipete/claude-code-mcp) +- [Model Context Protocol 规范](https://modelcontextprotocol.io/) +- [OpenClaw Gateway 文档](https://docs.openclaw.ai/) From 122a74561b768c84f89b613bdced3745ce915f05 Mon Sep 17 00:00:00 2001 From: 2233admin <2276214182@qq.com> Date: Fri, 13 Mar 2026 15:00:30 +0800 Subject: [PATCH 6/7] fix: update Redis fallback IP from WireGuard to Tailscale, add sshUser to nodes --- config/network.ts | 9 +++++++-- mcp/fsc-core/src/index.ts | 2 +- mcp/fsc-monitor/src/index.ts | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/config/network.ts b/config/network.ts index acd07aa..ef482d4 100644 --- a/config/network.ts +++ b/config/network.ts @@ -12,6 +12,7 @@ export interface NodeConfig { name: string; tailscaleIp: string; wireguardIp?: string; // 保留用于兼容 + sshUser: string; role: 'master' | 'worker'; enabled: boolean; } @@ -22,6 +23,7 @@ export const NODES: Record = { name: 'lunnymacbook-pro', tailscaleIp: '100.114.56.105', wireguardIp: '10.10.0.5', + sshUser: 'lunnykuya', role: 'master', enabled: true, }, @@ -30,6 +32,7 @@ export const NODES: Record = { name: 'vm-0-6-debian', tailscaleIp: '100.80.67.125', wireguardIp: '10.10.0.2', + sshUser: 'root', role: 'worker', enabled: true, }, @@ -38,14 +41,16 @@ export const NODES: Record = { name: 'win-taq1rm10mnf', tailscaleIp: '100.101.173.35', wireguardIp: '10.10.0.4', + sshUser: 'Administrator', role: 'worker', - enabled: true, // 已启用 + enabled: true, }, 'tokyo': { id: 'tokyo', name: 'tokyo-node', - tailscaleIp: '', // 待添加 + tailscaleIp: '', wireguardIp: '10.10.0.3', + sshUser: 'root', role: 'worker', enabled: false, }, diff --git a/mcp/fsc-core/src/index.ts b/mcp/fsc-core/src/index.ts index a211a96..2b80eb7 100644 --- a/mcp/fsc-core/src/index.ts +++ b/mcp/fsc-core/src/index.ts @@ -13,7 +13,7 @@ import { createClient } from 'redis'; // ============ 配置 ============ const PORT = parseInt(process.env.MCP_PORT || '8081'); -const REDIS_HOST = process.env.REDIS_HOST || '10.10.0.1'; +const REDIS_HOST = process.env.REDIS_HOST || '100.80.67.125'; const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379'); // ============ Redis Client ============ diff --git a/mcp/fsc-monitor/src/index.ts b/mcp/fsc-monitor/src/index.ts index f3ae32e..555fc57 100644 --- a/mcp/fsc-monitor/src/index.ts +++ b/mcp/fsc-monitor/src/index.ts @@ -13,7 +13,7 @@ import { createClient } from 'redis'; // ============ 配置 ============ const PORT = parseInt(process.env.MCP_PORT || '8080'); -const REDIS_HOST = process.env.REDIS_HOST || '10.10.0.1'; +const REDIS_HOST = process.env.REDIS_HOST || '100.80.67.125'; const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379'); // ============ Redis Client ============ From 5010f242f3ae67e100889af1244428872a672834 Mon Sep 17 00:00:00 2001 From: 2233admin <2276214182@qq.com> Date: Sat, 14 Mar 2026 19:09:37 +0800 Subject: [PATCH 7/7] feat: Redis failover + MemoV MCP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redis 自动故障转移: - config/redis.ts v3: 跨进程状态文件 /tmp/claw-redis-active-host 优先级: 状态文件 > 环境变量 > 默认值 - fsc/redis-failover.ts: 每 10s 探测硅谷 Redis,3 次失败切本地,2 次成功切回 - fsc/memov-sync-daemon.ts: 动态 Redis 连接,5s 轮询 host 变化自动重连 - scripts/ensure-services.sh: 集成 failover monitor 自动启动 MemoV MCP Server: - mcp/memov/src/index.ts: stdio JSON-RPC 2.0,6 个工具 (list_sessions, read_session, search, timeline, read_file, write) 其他: - CLAUDE.md: 补充服务配置、启动方式、健康检查文档 - package.json: 添加 ioredis 依赖 --- .gitignore | 1 + CLAUDE.md | 157 ++++++++++++++++++++ config/redis.ts | 63 +++++++- fsc/memov-sync-daemon.ts | 64 ++++++-- fsc/redis-failover.ts | 135 +++++++++++++++++ mcp/memov/src/index.ts | 289 +++++++++++++++++++++++++++++++++++++ package.json | 1 + scripts/ensure-services.sh | 33 +++++ 8 files changed, 721 insertions(+), 22 deletions(-) create mode 100644 fsc/redis-failover.ts create mode 100644 mcp/memov/src/index.ts create mode 100755 scripts/ensure-services.sh diff --git a/.gitignore b/.gitignore index 964ae58..db0f4d3 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ qdrant_storage/ # 构建产物 bun.lock packages/ +.gitnexus diff --git a/CLAUDE.md b/CLAUDE.md index 82ae224..8f536c6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -55,9 +55,166 @@ CLAW-Mesh 分布式 AI 编码集群的基础设施层。 - `api/` — LLM 代理 + SSE + MCP - `deploy/Dockerfile.agent` — Agent 容器镜像 +## 服务配置 + +### 三大核心服务 + +| 服务 | 端口 | 源码 | 功能 | +|------|------|------|------| +| fsc-monitor | 8080 | `mcp/fsc-monitor/src/index.ts` | 队列深度、Worker 负载、告警、Prometheus metrics | +| fsc-core | 8081 | `mcp/fsc-core/src/index.ts` | 批量任务调度 (batch_dispatch)、批次追踪 | +| memov-sync | — | `fsc/memov-sync-daemon.ts` | 事件驱动记忆同步、Git commit、rsync 节点同步 | + +### Redis 配置 +- **中心节点**: 硅谷 (100.80.67.125:6379) +- **配置文件**: `config/redis.ts`(集中管理,引用 `config/network.ts` 的 `getRedisMasterIp()`) +- **环境变量覆盖**: `REDIS_HOST`, `REDIS_PORT`, `REDIS_PASSWORD` +- **Redis Streams**: + - `fsc:tasks` — 任务队列 + - `fsc:results` — 结果队列 + - `fsc:dlq` — 死信队列 + - `fsc:mem_events` — 记忆事件流 (memov-sync 消费) + - `fsc:memov:deadletter` — 记忆同步死信 +- **Consumer Group**: `memov-sync` / `memov-{HOSTNAME}` + +### 依赖差异 +- fsc-monitor / fsc-core: 使用 `redis` (node-redis v4),直接硬编码 Redis 地址 +- memov-sync-daemon: 使用 `ioredis`,从 `config/redis.ts` 导入配置 + +### 启动方式 + +**Mac (开发节点)**: +```bash +cd ~/workspace/claw-mesh +bun run mcp/fsc-monitor/src/index.ts # 端口 8080 +bun run mcp/fsc-core/src/index.ts # 端口 8081 +bun run fsc/memov-sync-daemon.ts # 记忆同步 +``` + +**Windows 节点**: +```bat +cd C:\workspace\claw-mesh +C:\Users\Administrator\.bun\bin\bun.exe run mcp/fsc-monitor/src/index.ts +C:\Users\Administrator\.bun\bin\bun.exe run mcp/fsc-core/src/index.ts +C:\Users\Administrator\.bun\bin\bun.exe run fsc/memov-sync-daemon.ts +``` +- 启动脚本: `C:\workspace\start-claw-all.bat` +- 计划任务: `schtasks /query /tn "CLAW-Mesh-All"` (开机自启) + +### 健康检查 +```bash +# fsc-monitor +curl http://localhost:8080/health +curl http://localhost:8080/metrics # Prometheus 格式 + +# fsc-core +curl http://localhost:8081/health + +# 远程检查 (Windows) +curl http://100.101.173.35:8080/health +curl http://100.101.173.35:8081/health +``` + ## 不要做的事 - 不要删除 SSH 隧道配置 (容错需要) - 不要用 express/koa,用 Bun.serve - 不要在 Worker 层用昂贵模型 (Claude/GPT-4) - 不要把原始日志传到硅谷节点 (只传聚合指标) - 不要直接修改 .mem/ 目录的 Git 配置(已配置 xart 为主仓库) + + +# GitNexus — Code Intelligence + +This project is indexed by GitNexus as **claw-mesh** (1077 symbols, 2124 relationships, 73 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. + +> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. + +## Always Do + +- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. +- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. +- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. +- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. +- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. + +## When Debugging + +1. `gitnexus_query({query: ""})` — find execution flows related to the issue +2. `gitnexus_context({name: ""})` — see all callers, callees, and process participation +3. `READ gitnexus://repo/claw-mesh/process/{processName}` — trace the full execution flow step by step +4. For regressions: `gitnexus_detect_changes({scope: "compare", base_ref: "main"})` — see what your branch changed + +## When Refactoring + +- **Renaming**: MUST use `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` first. Review the preview — graph edits are safe, text_search edits need manual review. Then run with `dry_run: false`. +- **Extracting/Splitting**: MUST run `gitnexus_context({name: "target"})` to see all incoming/outgoing refs, then `gitnexus_impact({target: "target", direction: "upstream"})` to find all external callers before moving code. +- After any refactor: run `gitnexus_detect_changes({scope: "all"})` to verify only expected files changed. + +## Never Do + +- NEVER edit a function, class, or method without first running `gitnexus_impact` on it. +- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. +- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. +- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. + +## Tools Quick Reference + +| Tool | When to use | Command | +|------|-------------|---------| +| `query` | Find code by concept | `gitnexus_query({query: "auth validation"})` | +| `context` | 360-degree view of one symbol | `gitnexus_context({name: "validateUser"})` | +| `impact` | Blast radius before editing | `gitnexus_impact({target: "X", direction: "upstream"})` | +| `detect_changes` | Pre-commit scope check | `gitnexus_detect_changes({scope: "staged"})` | +| `rename` | Safe multi-file rename | `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` | +| `cypher` | Custom graph queries | `gitnexus_cypher({query: "MATCH ..."})` | + +## Impact Risk Levels + +| Depth | Meaning | Action | +|-------|---------|--------| +| d=1 | WILL BREAK — direct callers/importers | MUST update these | +| d=2 | LIKELY AFFECTED — indirect deps | Should test | +| d=3 | MAY NEED TESTING — transitive | Test if critical path | + +## Resources + +| Resource | Use for | +|----------|---------| +| `gitnexus://repo/claw-mesh/context` | Codebase overview, check index freshness | +| `gitnexus://repo/claw-mesh/clusters` | All functional areas | +| `gitnexus://repo/claw-mesh/processes` | All execution flows | +| `gitnexus://repo/claw-mesh/process/{name}` | Step-by-step execution trace | + +## Self-Check Before Finishing + +Before completing any code modification task, verify: +1. `gitnexus_impact` was run for all modified symbols +2. No HIGH/CRITICAL risk warnings were ignored +3. `gitnexus_detect_changes()` confirms changes match expected scope +4. All d=1 (WILL BREAK) dependents were updated + +## Keeping the Index Fresh + +After committing code changes, the GitNexus index becomes stale. Re-run analyze to update it: + +```bash +npx gitnexus analyze +``` + +If the index previously included embeddings, preserve them by adding `--embeddings`: + +```bash +npx gitnexus analyze --embeddings +``` + +To check whether embeddings exist, inspect `.gitnexus/meta.json` — the `stats.embeddings` field shows the count (0 means no embeddings). **Running analyze without `--embeddings` will delete any previously generated embeddings.** + +> Claude Code users: A PostToolUse hook handles this automatically after `git commit` and `git merge`. + +## CLI + +- Re-index: `npx gitnexus analyze` +- Check freshness: `npx gitnexus status` +- Generate docs: `npx gitnexus wiki` + + diff --git a/config/redis.ts b/config/redis.ts index b032161..5a1312c 100644 --- a/config/redis.ts +++ b/config/redis.ts @@ -1,18 +1,69 @@ /** * Redis 连接配置 — 集中管理,避免 6 处硬编码 - * v2: 支持 Tailscale 网络 + * v3: 支持 Tailscale 网络 + 自动故障转移 + * + * 主节点: 硅谷 (100.80.67.125) + * 备用: Mac 本地 (127.0.0.1) + * 跨进程通信: /tmp/claw-redis-active-host 文件 */ -import { getRedisMasterIp } from './network'; +import { getRedisMasterIp, NODES } from './network'; +import { readFileSync, writeFileSync } from 'fs'; -export const REDIS_HOST = process.env.REDIS_HOST || getRedisMasterIp(); +const SILICON_VALLEY_IP = NODES['silicon-valley'].tailscaleIp; +const LOCAL_IP = '127.0.0.1'; +const STATE_FILE = '/tmp/claw-redis-active-host'; + +/** 从状态文件读取当前活跃 host(跨进程共享) + * 优先级: 状态文件 > 环境变量 > 默认值(硅谷主节点) + * 状态文件由 redis-failover monitor 维护 */ +export function getActiveRedisHost(): string { + try { + const host = readFileSync(STATE_FILE, 'utf-8').trim(); + if (host) return host; + } catch { /* 文件不存在,继续 fallback */ } + if (process.env.REDIS_HOST) return process.env.REDIS_HOST; + return getRedisMasterIp(); +} + +export function isFailoverActive(): boolean { + return getActiveRedisHost() === LOCAL_IP; +} + +/** failover monitor 调用:写状态文件,切换到本地 */ +export function switchToLocal(): void { + const current = getActiveRedisHost(); + if (current !== LOCAL_IP) { + console.log(`[Redis Failover] 切换: ${current} → ${LOCAL_IP} (本地备用)`); + writeFileSync(STATE_FILE, LOCAL_IP); + } +} + +/** failover monitor 调用:写状态文件,切回硅谷 */ +export function switchToPrimary(): void { + const current = getActiveRedisHost(); + if (current !== SILICON_VALLEY_IP) { + console.log(`[Redis Failover] 恢复: ${current} → ${SILICON_VALLEY_IP} (硅谷主节点)`); + writeFileSync(STATE_FILE, SILICON_VALLEY_IP); + } +} + +// 兼容旧代码的静态导出 export const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379'); -export const REDIS_PASSWORD = process.env.REDIS_PASSWORD || ''; // 从环境变量读取 +export const REDIS_PASSWORD = process.env.REDIS_PASSWORD || ''; + +// 动态 getter — 新代码应使用这些 +export function getRedisHost(): string { return getActiveRedisHost(); } +export function getRedisUrl(): string { + return `redis://:${REDIS_PASSWORD}@${getActiveRedisHost()}:${REDIS_PORT}`; +} -export const REDIS_URL = `redis://:${REDIS_PASSWORD}@${REDIS_HOST}:${REDIS_PORT}`; +// 静态值(首次加载时的快照,兼容已有 import { REDIS_HOST } 的代码) +export const REDIS_HOST = getActiveRedisHost(); +export const REDIS_URL = `redis://:${REDIS_PASSWORD}@${getActiveRedisHost()}:${REDIS_PORT}`; export function redisConfig() { return { - url: REDIS_URL, + url: getRedisUrl(), socket: { connectTimeout: 5000, reconnectStrategy: (retries: number) => Math.min(retries * 500, 5000) }, }; } diff --git a/fsc/memov-sync-daemon.ts b/fsc/memov-sync-daemon.ts index 13b18c7..1ad335d 100644 --- a/fsc/memov-sync-daemon.ts +++ b/fsc/memov-sync-daemon.ts @@ -17,7 +17,7 @@ import { createHash } from 'crypto'; import { readFile, writeFile } from 'fs/promises'; // ============ 配置 ============ -import { REDIS_HOST, REDIS_PORT, REDIS_PASSWORD } from '../config/redis'; +import { getActiveRedisHost, REDIS_PORT, REDIS_PASSWORD } from '../config/redis'; import { getActiveNodeIps } from '../config/network'; const MEM_DIR = '.mem'; @@ -35,17 +35,35 @@ const BATCH_SIZE_BYTES = 256 * 1024; // 大小阈值 (256KB) // 重试 const MAX_EVENT_RETRIES = 3; -// ============ Redis ============ -const redis = new Redis({ - host: REDIS_HOST, - port: REDIS_PORT, - password: REDIS_PASSWORD, - retryStrategy: (times) => Math.min(times * 200, 3000), - maxRetriesPerRequest: null, // BLOCK 命令需要 -}); +// ============ Redis (动态 host,支持 failover) ============ +const INITIAL_REDIS_HOST = getActiveRedisHost(); +let redis = createRedisClient(INITIAL_REDIS_HOST); + +function createRedisClient(host: string): InstanceType { + const client = new Redis({ + host, + port: REDIS_PORT, + password: REDIS_PASSWORD, + retryStrategy: (times) => Math.min(times * 200, 3000), + maxRetriesPerRequest: null, // BLOCK 命令需要 + }); + client.on('error', (err) => console.error(`[Redis] Error (${host}):`, err.message)); + client.on('connect', () => console.log(`[Redis] Connected to ${host}`)); + return client; +} -redis.on('error', (err) => console.error('[Redis] Error:', err.message)); -redis.on('connect', () => console.log('[Redis] Connected')); +// 定期检查是否需要重连到新 host(failover monitor 切换后) +let _currentRedisHost = INITIAL_REDIS_HOST; +setInterval(async () => { + const newHost = getActiveRedisHost(); + if (newHost !== _currentRedisHost) { + console.log(`[Redis] Host 变更: ${_currentRedisHost} → ${newHost}, 重连...`); + const oldRedis = redis; + redis = createRedisClient(newHost); + _currentRedisHost = newHost; + try { await oldRedis.quit(); } catch { /* ignore */ } + } +}, 5000); // ============ SHA256 ============ function sha256(content: string): string { @@ -53,23 +71,37 @@ function sha256(content: string): string { } // ============ Git 操作 ============ +const GIT_TIMEOUT_MS = 15000; // 15 秒超时,防止 git 挂死 + async function gitCommit(message: string): Promise { const startTime = Date.now(); const add = spawn(['git', '-C', MEM_DIR, 'add', '.']); - await add.exited; + const addExit = await Promise.race([ + add.exited, + new Promise(resolve => setTimeout(() => { add.kill(); resolve(-1); }, GIT_TIMEOUT_MS)), + ]); + if (addExit !== 0) { + console.error(`[Git] git add failed or timed out (exit: ${addExit})`); + return false; + } - const commit = spawn(['git', '-C', MEM_DIR, 'commit', '-m', message, '--allow-empty=false']); - const exitCode = await commit.exited; + const commit = spawn(['git', '-C', MEM_DIR, 'commit', '-m', message]); + const exitCode = await Promise.race([ + commit.exited, + new Promise(resolve => setTimeout(() => { commit.kill(); resolve(-1); }, GIT_TIMEOUT_MS)), + ]); const latencyMs = Date.now() - startTime; if (exitCode === 0) { console.log(`[Git] Committed (${latencyMs}ms): ${message}`); - // 反馈延迟给 batcher 做自适应 batcher.reportGitLatency(latencyMs); return true; } + if (exitCode === -1) { + console.error(`[Git] Commit timed out after ${GIT_TIMEOUT_MS}ms, killed process`); + } return false; } @@ -502,7 +534,7 @@ export async function writeWithCAS(path: string, content: string): Promise { + try { + const args = ['redis-cli', '-h', host, '-p', String(REDIS_PORT)]; + if (REDIS_PASSWORD) args.push('-a', REDIS_PASSWORD); + args.push('--no-auth-warning', 'PING'); + + const proc = spawn(args, { stdout: 'pipe', stderr: 'pipe' }); + const exitCode = await Promise.race([ + proc.exited, + new Promise(resolve => setTimeout(() => { proc.kill(); resolve(-1); }, 3000)), + ]); + + if (exitCode !== 0) return false; + const output = await new Response(proc.stdout).text(); + return output.trim() === 'PONG'; + } catch { + return false; + } +} + +/** 确保本地 Redis 正在运行 */ +async function ensureLocalRedis(): Promise { + const alive = await probeRedis(LOCAL_IP); + if (alive) return true; + + console.log('[Failover] 本地 Redis 未运行,尝试启动...'); + try { + const proc = spawn(['brew', 'services', 'start', 'redis'], { + stdout: 'pipe', stderr: 'pipe', + }); + await proc.exited; + // 等 1 秒让 Redis 启动 + await new Promise(r => setTimeout(r, 1000)); + return await probeRedis(LOCAL_IP); + } catch (err) { + console.error('[Failover] 启动本地 Redis 失败:', err); + return false; + } +} + +/** 硅谷恢复后,可选:停止本地 Redis(节省资源) */ +async function stopLocalRedisIfIdle(): Promise { + // 不主动停止——保持本地 Redis 运行作为热备 + // 如果需要停止,取消注释: + // spawn(['brew', 'services', 'stop', 'redis']); + console.log('[Failover] 硅谷已恢复,本地 Redis 保持运行作为热备'); +} + +/** 主探测循环 */ +async function monitor() { + console.log('[Redis Failover Monitor] v1.0 启动'); + console.log(`[Config] 主节点: ${SILICON_VALLEY_IP}:${REDIS_PORT}`); + console.log(`[Config] 备用: ${LOCAL_IP}:${REDIS_PORT}`); + console.log(`[Config] 探测间隔: ${PROBE_INTERVAL_MS / 1000}s, 失败阈值: ${FAIL_THRESHOLD}, 恢复阈值: ${RECOVERY_THRESHOLD}`); + + while (true) { + const primaryAlive = await probeRedis(SILICON_VALLEY_IP); + + if (primaryAlive) { + consecutiveFailures = 0; + consecutiveSuccesses++; + + if (isFailoverActive() && consecutiveSuccesses >= RECOVERY_THRESHOLD) { + console.log(`[Failover] 硅谷 Redis 已恢复 (连续 ${consecutiveSuccesses} 次成功),切回主节点`); + switchToPrimary(); + await stopLocalRedisIfIdle(); + consecutiveSuccesses = 0; + } + } else { + consecutiveSuccesses = 0; + consecutiveFailures++; + + if (!isFailoverActive() && consecutiveFailures >= FAIL_THRESHOLD) { + console.warn(`[Failover] 硅谷 Redis 不可达 (连续 ${consecutiveFailures} 次失败),切换到本地`); + const localOk = await ensureLocalRedis(); + if (localOk) { + switchToLocal(); + consecutiveFailures = 0; + } else { + console.error('[Failover] 本地 Redis 也无法启动!保持当前状态'); + } + } else if (!isFailoverActive()) { + console.warn(`[Failover] 硅谷 Redis 探测失败 (${consecutiveFailures}/${FAIL_THRESHOLD})`); + } + } + + await new Promise(r => setTimeout(r, PROBE_INTERVAL_MS)); + } +} + +monitor().catch(err => { + console.error('[Redis Failover] Fatal:', err); + process.exit(1); +}); + +// 优雅退出 +process.on('SIGTERM', () => { + console.log('[Redis Failover] SIGTERM, exiting...'); + process.exit(0); +}); +process.on('SIGINT', () => { + console.log('[Redis Failover] SIGINT, exiting...'); + process.exit(0); +}); diff --git a/mcp/memov/src/index.ts b/mcp/memov/src/index.ts new file mode 100644 index 0000000..daef02b --- /dev/null +++ b/mcp/memov/src/index.ts @@ -0,0 +1,289 @@ +#!/usr/bin/env bun +/** + * MemoV MCP Server — stdio JSON-RPC 2.0 + * 零依赖,直接读 .mem/ 目录 + Git 历史 + * + * Tools: + * - memov_list_sessions: 列出所有会话记录 + * - memov_read_session: 读取指定会话内容 + * - memov_search: 在记忆中搜索关键词 + * - memov_timeline: 查看 Git 提交时间线 + * - memov_read_file: 读取 .mem/ 下任意文件 + * - memov_write: 写入共享记忆 + */ + +import { readFileSync, readdirSync, writeFileSync, existsSync, statSync } from 'fs'; +import { execSync } from 'child_process'; +import { join, relative } from 'path'; +import { createInterface } from 'readline'; + +const MEM_DIR = process.env.MEMOV_DIR || join(process.cwd(), '.mem'); +const SHARED_DIR = join(MEM_DIR, 'shared'); + +// ============ stdio line reader ============ + +const rl = createInterface({ input: process.stdin, terminal: false }); +const lineQueue: string[] = []; +let lineResolve: ((line: string) => void) | null = null; + +rl.on('line', (line) => { + if (lineResolve) { + const resolve = lineResolve; + lineResolve = null; + resolve(line); + } else { + lineQueue.push(line); + } +}); + +rl.on('close', () => process.exit(0)); + +function readLine(): Promise { + if (lineQueue.length > 0) return Promise.resolve(lineQueue.shift()!); + return new Promise((resolve) => { lineResolve = resolve; }); +} + +function send(msg: object) { + const json = JSON.stringify(msg); + process.stdout.write(`${json}\n`); +} + +function reply(id: string | number, result: unknown) { + send({ jsonrpc: '2.0', id, result }); +} + +function replyError(id: string | number, code: number, message: string) { + send({ jsonrpc: '2.0', id, error: { code, message } }); +} + +function git(...args: string[]): string { + try { + return execSync(`git -C "${MEM_DIR}" ${args.join(' ')}`, { + encoding: 'utf-8', + timeout: 5000, + }).trim(); + } catch { + return ''; + } +} + +function walkDir(dir: string, base: string = dir): string[] { + const files: string[] = []; + if (!existsSync(dir)) return files; + for (const entry of readdirSync(dir, { withFileTypes: true })) { + if (entry.name.startsWith('.')) continue; + const full = join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...walkDir(full, base)); + } else { + files.push(relative(base, full)); + } + } + return files; +} + +// ============ Tool Implementations ============ + +function listSessions() { + const sessDir = join(SHARED_DIR, 'sessions'); + if (!existsSync(sessDir)) return { sessions: [] }; + + const files = readdirSync(sessDir) + .filter(f => f.endsWith('.md')) + .sort() + .reverse(); + + return { + sessions: files.map(f => { + const stat = statSync(join(sessDir, f)); + const content = readFileSync(join(sessDir, f), 'utf-8'); + const title = content.split('\n').find(l => l.startsWith('#'))?.replace(/^#+\s*/, '') || f; + return { file: f, title, size: stat.size, modified: stat.mtime.toISOString() }; + }), + }; +} + +function readSession(args: { file: string }) { + const filePath = join(SHARED_DIR, 'sessions', args.file); + if (!existsSync(filePath)) return { error: `Session not found: ${args.file}` }; + return { content: readFileSync(filePath, 'utf-8') }; +} + +function searchMemory(args: { query: string; limit?: number }) { + const limit = args.limit || 20; + const keywords = args.query.toLowerCase().split(/\s+/).filter(Boolean); + const results: { file: string; line: number; text: string; score: number }[] = []; + + const allFiles = walkDir(SHARED_DIR, MEM_DIR); + + for (const relPath of allFiles) { + if (!relPath.endsWith('.md')) continue; + const fullPath = join(MEM_DIR, relPath); + const content = readFileSync(fullPath, 'utf-8'); + const lines = content.split('\n'); + + for (let i = 0; i < lines.length; i++) { + const lower = lines[i].toLowerCase(); + const score = keywords.filter(k => lower.includes(k)).length; + if (score > 0) { + results.push({ file: relPath, line: i + 1, text: lines[i].slice(0, 200), score }); + } + } + } + + results.sort((a, b) => b.score - a.score); + return { results: results.slice(0, limit), total: results.length }; +} + +function getTimeline(args: { limit?: number }) { + const limit = args.limit || 20; + const log = git('log', `--oneline`, `--format=%H|%ai|%s`, `-${limit}`); + if (!log) return { commits: [] }; + + return { + commits: log.split('\n').filter(Boolean).map(line => { + const [hash, date, ...msgParts] = line.split('|'); + return { hash, date, message: msgParts.join('|') }; + }), + }; +} + +function readFile(args: { path: string }) { + const safePath = args.path.replace(/\.\./g, ''); + const fullPath = join(MEM_DIR, safePath); + if (!existsSync(fullPath)) return { error: `File not found: ${safePath}` }; + if (statSync(fullPath).isDirectory()) { + return { entries: readdirSync(fullPath).filter(f => !f.startsWith('.')) }; + } + return { content: readFileSync(fullPath, 'utf-8') }; +} + +function writeMemory(args: { path: string; content: string }) { + const safePath = args.path.replace(/\.\./g, ''); + const fullPath = join(SHARED_DIR, safePath); + writeFileSync(fullPath, args.content, 'utf-8'); + return { success: true, path: `shared/${safePath}` }; +} + +// ============ MCP Protocol ============ + +const TOOLS = [ + { + name: 'memov_list_sessions', + description: '列出所有会话记录,返回文件名、标题、大小、修改时间', + inputSchema: { type: 'object', properties: {}, required: [] }, + }, + { + name: 'memov_read_session', + description: '读取指定会话记录的完整内容', + inputSchema: { + type: 'object', + properties: { file: { type: 'string', description: '会话文件名,如 session-2026-03-09-quant-terminal-cli.md' } }, + required: ['file'], + }, + }, + { + name: 'memov_search', + description: '在 .mem/shared/ 的所有 Markdown 文件中搜索关键词', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: '搜索关键词,空格分隔多个词' }, + limit: { type: 'number', description: '最大返回条数,默认 20' }, + }, + required: ['query'], + }, + }, + { + name: 'memov_timeline', + description: '查看 .mem/ 的 Git 提交时间线', + inputSchema: { + type: 'object', + properties: { limit: { type: 'number', description: '最大返回条数,默认 20' } }, + required: [], + }, + }, + { + name: 'memov_read_file', + description: '读取 .mem/ 目录下的任意文件或列出目录内容', + inputSchema: { + type: 'object', + properties: { path: { type: 'string', description: '相对于 .mem/ 的路径,如 shared/USER.md 或 ontology/graph.jsonl' } }, + required: ['path'], + }, + }, + { + name: 'memov_write', + description: '写入内容到 .mem/shared/ 目录(会被 memov-sync-daemon 自动同步到其他节点)', + inputSchema: { + type: 'object', + properties: { + path: { type: 'string', description: '相对于 shared/ 的路径,如 notes/my-note.md' }, + content: { type: 'string', description: '文件内容' }, + }, + required: ['path', 'content'], + }, + }, +]; + +function handleToolCall(name: string, args: Record) { + switch (name) { + case 'memov_list_sessions': return listSessions(); + case 'memov_read_session': return readSession(args as any); + case 'memov_search': return searchMemory(args as any); + case 'memov_timeline': return getTimeline(args as any); + case 'memov_read_file': return readFile(args as any); + case 'memov_write': return writeMemory(args as any); + default: throw new Error(`Unknown tool: ${name}`); + } +} + +// ============ Main Loop ============ + +async function main() { + process.stderr.write('[MemoV MCP] Starting stdio server...\n'); + process.stderr.write(`[MemoV MCP] MEM_DIR: ${MEM_DIR}\n`); + + while (true) { + const line = await readLine(); + if (!line) continue; + + let msg: any; + try { + msg = JSON.parse(line); + } catch { + continue; + } + + const { id, method, params } = msg; + + if (method === 'initialize') { + reply(id, { + protocolVersion: '2024-11-05', + capabilities: { tools: {} }, + serverInfo: { name: 'memov', version: '1.0.0' }, + }); + } else if (method === 'notifications/initialized') { + // no reply needed for notifications + } else if (method === 'tools/list') { + reply(id, { tools: TOOLS }); + } else if (method === 'tools/call') { + const { name, arguments: args } = params; + try { + const result = handleToolCall(name, args || {}); + reply(id, { + content: [{ type: 'text', text: JSON.stringify(result, null, 2) }], + }); + } catch (err: any) { + reply(id, { + content: [{ type: 'text', text: JSON.stringify({ error: err.message }) }], + isError: true, + }); + } + } else if (id !== undefined) { + replyError(id, -32601, `Method not found: ${method}`); + } + } +} + +main(); diff --git a/package.json b/package.json index 331ce68..d60be0c 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ }, "dependencies": { "@msgpack/msgpack": "^3.1.3", + "ioredis": "^5.10.0", "redis": "^5.11.0" } } diff --git a/scripts/ensure-services.sh b/scripts/ensure-services.sh new file mode 100755 index 0000000..09e5aff --- /dev/null +++ b/scripts/ensure-services.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Auto-start claw-mesh services if not running, then exec MCP stdio bridge +# Called by Claude Code MCP config + +CLAW_DIR="$HOME/workspace/claw-mesh" +BUN="$HOME/.bun/bin/bun" +LOG_DIR="$CLAW_DIR/logs" +mkdir -p "$LOG_DIR" + +start_if_needed() { + local name="$1" port="$2" script="$3" + if curl -s "http://127.0.0.1:$port/health" >/dev/null 2>&1; then + return + fi + cd "$CLAW_DIR" + nohup "$BUN" run "$script" > "$LOG_DIR/$name.log" 2>&1 & + for i in $(seq 1 10); do + sleep 1 + if curl -s "http://127.0.0.1:$port/health" >/dev/null 2>&1; then + return + fi + done + echo "Warning: $name failed to start on port $port" >&2 +} + +start_if_needed "fsc-monitor" 8080 "mcp/fsc-monitor/src/index.ts" +start_if_needed "fsc-core" 8081 "mcp/fsc-core/src/index.ts" + +# Redis failover monitor (no HTTP port, check by process name) +if ! pgrep -f "redis-failover.ts" >/dev/null 2>&1; then + cd "$CLAW_DIR" + nohup "$BUN" run fsc/redis-failover.ts > "$LOG_DIR/redis-failover.log" 2>&1 & +fi