diff --git a/DEV-LOG.md b/DEV-LOG.md index 18700fca6..df5581a19 100644 --- a/DEV-LOG.md +++ b/DEV-LOG.md @@ -1,5 +1,33 @@ # DEV-LOG +## Enable Claude in Chrome MCP (2026-04-03) + +恢复 Chrome 浏览器控制功能。`src/` 下所有 claudeInChrome 相关源码已与官方一致(0 行差异),问题出在 `@ant/claude-for-chrome-mcp` 包是 6 行 stub(返回空工具列表和 null server)。 + +**替换文件:** + +| 文件 | 变更 | +|------|------| +| `packages/@ant/claude-for-chrome-mcp/src/index.ts` | 6 行 stub → 15 行完整导出 | + +**新增文件:** + +| 文件 | 行数 | 说明 | +|------|------|------| +| `packages/@ant/claude-for-chrome-mcp/src/types.ts` | 134 | 类型定义 | +| `packages/@ant/claude-for-chrome-mcp/src/browserTools.ts` | 546 | 17 个浏览器工具定义 | +| `packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts` | 96 | MCP Server | +| `packages/@ant/claude-for-chrome-mcp/src/mcpSocketClient.ts` | 493 | Unix Socket 客户端 | +| `packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts` | 327 | 多 Profile 连接池 | +| `packages/@ant/claude-for-chrome-mcp/src/bridgeClient.ts` | 1126 | Bridge WebSocket 客户端 | +| `packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts` | 301 | 工具调用路由 | + +**不需要 feature flag,不需要改 dev.ts/build.ts,不改 src/ 下任何文件。** + +**运行时依赖:** Chrome 浏览器 + Claude in Chrome 扩展(https://claude.ai/chrome) + +--- + ## Enable Remote Control / BRIDGE_MODE (2026-04-03) **PR**: [claude-code-best/claude-code#60](https://github.com/claude-code-best/claude-code/pull/60) diff --git a/docs/features/claude-in-chrome-mcp.md b/docs/features/claude-in-chrome-mcp.md new file mode 100644 index 000000000..3b668ff55 --- /dev/null +++ b/docs/features/claude-in-chrome-mcp.md @@ -0,0 +1,243 @@ +# Claude in Chrome MCP — 恢复计划 + +更新时间:2026-04-03 +参考项目:`E:\源码\claude-code-source-main\claude-code-source-main` + +## 1. 功能概述 + +Claude in Chrome 让 Claude Code CLI 通过 MCP 协议控制用户的 Chrome 浏览器:导航网页、填写表单、截图、录制 GIF、读取 DOM、执行 JS、监控网络请求和控制台日志。 + +通信方式有两种: +- **本地 Socket**:Chrome 扩展通过 Native Messaging Host 与 CLI 建立 Unix socket 连接 +- **Bridge WebSocket**:通过 Anthropic 的 bridge 服务中转,支持远程浏览器 + +## 2. 完整加载链路 + +``` +CLI 启动 + │ + ▼ +src/main.tsx:1003 + .option('--chrome', 'Enable Claude in Chrome integration') + │ + ▼ +src/main.tsx:1522-1527 + setChromeFlagOverride(chromeOpts.chrome) + │ + ▼ +src/utils/claudeInChrome/setup.ts + shouldEnableClaudeInChrome() + ├── --chrome flag → true + ├── --no-chrome flag → false + ├── 非交互模式 → false + ├── 环境变量 CLAUDE_CODE_DISABLE_CHROME → false + ├── 配置 claudeInChromeDefaultEnabled → true/false + └── Chrome 扩展已安装 + GrowthBook tengu_chrome_auto_enable → auto + │ + ▼ +src/utils/claudeInChrome/setup.ts + setupClaudeInChrome() + ├── 生成 MCP server 配置 + └── 返回 mcpConfig + allowedTools + │ + ▼ +src/utils/claudeInChrome/mcpServer.ts + import { createClaudeForChromeMcpServer } from '@ant/claude-for-chrome-mcp' + │ + ▼ +packages/@ant/claude-for-chrome-mcp/src/index.ts ← 当前是 STUB + export function createClaudeForChromeMcpServer() { return null } + export const BROWSER_TOOLS = [] +``` + +## 3. 阻塞点清单 + +| # | 阻塞点 | 位置 | 状态 | +|---|--------|------|------| +| ① | `@ant/claude-for-chrome-mcp` 是 stub | `packages/@ant/claude-for-chrome-mcp/src/index.ts` | **6 行空壳,返回 null** | +| ② | 缺少完整实现(7 个文件,3038 行) | `packages/@ant/claude-for-chrome-mcp/src/` | 只有 1 个 stub 文件 | + +**不需要任何 feature flag** — `/chrome` 命令无条件注册在 `src/commands.ts:264`。 + +**不需要改 `src/` 下任何文件** — 以下文件全部与参考项目 0 行差异: +- `src/utils/claudeInChrome/setup.ts` +- `src/utils/claudeInChrome/mcpServer.ts` +- `src/utils/claudeInChrome/common.ts` +- `src/utils/claudeInChrome/chromeNativeHost.ts` +- `src/utils/claudeInChrome/prompt.ts` +- `src/utils/claudeInChrome/setupPortable.ts` +- `src/utils/claudeInChrome/toolRendering.tsx` +- `src/commands/chrome/index.ts` +- `src/commands/chrome/chrome.tsx`(仅 sourcemap 差异) +- `src/skills/bundled/claudeInChrome.ts` + +## 4. 参考项目完整实现清单 + +参考项目路径:`deps/@ant/claude-for-chrome-mcp/src/` + +| 文件 | 行数 | 职责 | +|------|------|------| +| `index.ts` | 15 | 导出入口:`createBridgeClient`、`BROWSER_TOOLS`、`createChromeSocketClient`、`createClaudeForChromeMcpServer`、`localPlatformLabel` + 类型导出 | +| `types.ts` | 134 | 类型定义:`Logger`、`PermissionMode`、`BridgeConfig`、`ChromeExtensionInfo`、`ClaudeForChromeContext`、`SocketClient`、`BridgePermissionRequest/Response`、`PermissionOverrides` | +| `browserTools.ts` | 546 | 17 个浏览器工具定义(MCP tool schema) | +| `mcpServer.ts` | 96 | MCP Server 创建:注册 `ListTools`/`CallTool` handler,选择 socket/bridge 传输 | +| `mcpSocketClient.ts` | 493 | Unix Socket 客户端:连接 Chrome Native Messaging Host,JSON-RPC 通信 | +| `mcpSocketPool.ts` | 327 | Socket 连接池:多 Chrome profile 支持,按 tabId 路由 | +| `bridgeClient.ts` | 1126 | Bridge WebSocket 客户端:连接 Anthropic bridge 服务,扩展发现、设备配对、权限管理 | +| `toolCalls.ts` | 301 | 工具调用路由:连接状态处理、结果转换、权限模式切换、浏览器切换 | + +### 17 个浏览器工具 + +| 工具名 | 功能 | +|--------|------| +| `javascript_tool` | 在页面上下文执行 JavaScript | +| `read_page` | 获取页面可访问性树(DOM) | +| `find` | 自然语言搜索页面元素 | +| `form_input` | 填写表单字段 | +| `computer` | 鼠标键盘操作 + 截图(13 种 action) | +| `navigate` | URL 导航 / 前进后退 | +| `resize_window` | 调整浏览器窗口尺寸 | +| `gif_creator` | GIF 录制和导出 | +| `upload_image` | 图片上传到文件输入框或拖拽区域 | +| `get_page_text` | 提取页面纯文本 | +| `tabs_context_mcp` | 获取当前标签组信息 | +| `tabs_create_mcp` | 创建新标签页 | +| `update_plan` | 向用户提交操作计划供审批 | +| `read_console_messages` | 读取浏览器控制台日志 | +| `read_network_requests` | 读取网络请求 | +| `shortcuts_list` | 列出可用快捷方式 | +| `shortcuts_execute` | 执行快捷方式 | +| `switch_browser` | 切换到其他 Chrome 浏览器(仅 bridge 模式) | + +### 外部依赖 + +| 依赖 | 用途 | 我们项目是否已有 | +|------|------|----------------| +| `ws` | WebSocket 客户端(bridge 模式) | ✅ 有 | +| `@modelcontextprotocol/sdk` | MCP Server + 类型 | ✅ 有 | +| `fs`/`net`/`os`/`path` | Node.js 内置 | ✅ | + +## 5. 修复步骤 + +### 步骤 1:复制完整实现到 stub 包目录 + +```bash +# 从参考项目复制 7 个文件(覆盖现有的 1 个 stub) +cp "E:/源码/claude-code-source-main/claude-code-source-main/deps/@ant/claude-for-chrome-mcp/src/"*.ts \ + "E:/源码/Claude-code-bast/packages/@ant/claude-for-chrome-mcp/src/" +``` + +复制后 `packages/@ant/claude-for-chrome-mcp/src/` 应包含 8 个文件: + +``` +packages/@ant/claude-for-chrome-mcp/src/ +├── index.ts ← 覆盖 stub(15 行,导出入口) +├── types.ts ← 新增(134 行) +├── browserTools.ts ← 新增(546 行) +├── mcpServer.ts ← 新增(96 行) +├── mcpSocketClient.ts ← 新增(493 行) +├── mcpSocketPool.ts ← 新增(327 行) +├── bridgeClient.ts ← 新增(1126 行) +└── toolCalls.ts ← 新增(301 行) +``` + +### 步骤 2:验证构建 + +```bash +bun run build +``` + +不需要改 `scripts/dev.ts` 或 `build.ts`(无 feature flag)。 + +### 步骤 3:功能验证 + +```bash +# 启动(手动启用 chrome) +bun run dev -- --chrome + +# 在 REPL 中: +# 1. /chrome 命令应显示 Chrome 设置菜单 +# 2. 如果 Chrome 扩展已安装 → 状态显示 "Enabled" +# 3. 如果未安装 → 提示安装扩展链接 +``` + +## 6. 验证测试项 + +### 6.1 构建验证 + +| 测试项 | 预期结果 | 验证命令 | +|--------|---------|---------| +| build 成功 | 无报错 | `bun run build` | +| BROWSER_TOOLS 非空 | 产物中包含 17 个工具定义 | `grep "javascript_tool" dist/*.js` | +| createClaudeForChromeMcpServer 非 null | 产物中包含 MCP Server 创建逻辑 | `grep "ListToolsRequestSchema" dist/*.js` | +| Bridge WebSocket 逻辑在产物中 | 包含 bridge 连接代码 | `grep "bridge.claudeusercontent.com" dist/*.js` | + +### 6.2 命令注册验证 + +| 测试项 | 预期结果 | +|--------|---------| +| `/chrome` 命令可见 | REPL 中输入 `/chrome` 显示设置菜单 | +| `--chrome` 参数可用 | `bun run dev -- --chrome` 不报错 | +| `--no-chrome` 参数可用 | `bun run dev -- --no-chrome` 不报错 | + +### 6.3 MCP Server 验证(需要 Chrome 扩展) + +| 测试项 | 预期结果 | +|--------|---------| +| Chrome 扩展检测 | 已安装扩展时 `/chrome` 显示 "Extension: Installed" | +| Socket 连接 | 扩展连接后 MCP tools 可用 | +| BROWSER_TOOLS 注册 | `tabs_context_mcp` 等 17 个工具在 MCP 工具列表中可见 | + +### 6.4 工具功能验证(需要 Chrome 扩展 + 连接) + +| 测试项 | 预期结果 | +|--------|---------| +| `tabs_context_mcp` | 返回当前标签组信息 | +| `navigate` | 能导航到指定 URL | +| `computer` + `screenshot` | 返回页面截图 | +| `read_page` | 返回 DOM 可访问性树 | +| `javascript_tool` | 执行 JS 并返回结果 | + +### 6.5 不影响现有功能 + +| 测试项 | 预期结果 | +|--------|---------| +| 不带 `--chrome` 启动 | 正常运行,无 chrome 相关报错 | +| `/voice` 命令 | 不受影响 | +| `/schedule` 命令 | 不受影响 | +| `bun test` | 现有测试全部通过 | + +## 7. 改动总结 + +| 操作 | 文件 | 说明 | +|------|------|------| +| 覆盖 stub | `packages/@ant/claude-for-chrome-mcp/src/index.ts` | 6 行 stub → 15 行完整导出 | +| 新增 | `packages/@ant/claude-for-chrome-mcp/src/types.ts` | 134 行类型定义 | +| 新增 | `packages/@ant/claude-for-chrome-mcp/src/browserTools.ts` | 546 行,17 个工具定义 | +| 新增 | `packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts` | 96 行 MCP Server | +| 新增 | `packages/@ant/claude-for-chrome-mcp/src/mcpSocketClient.ts` | 493 行 Socket 客户端 | +| 新增 | `packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts` | 327 行连接池 | +| 新增 | `packages/@ant/claude-for-chrome-mcp/src/bridgeClient.ts` | 1126 行 Bridge 客户端 | +| 新增 | `packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts` | 301 行工具调用路由 | + +**不改动**:`src/` 下所有文件(已与参考项目一致)、`scripts/dev.ts`、`build.ts`。 + +## 8. 运行时依赖 + +| 依赖 | 必需? | 说明 | +|------|--------|------| +| Chrome 浏览器 | 是 | 需安装 Chrome | +| Claude in Chrome 扩展 | 是 | 从 https://claude.ai/chrome 安装 | +| claude.ai OAuth 登录 | Bridge 模式需要 | 本地 Socket 模式不需要 | +| Native Messaging Host | 本地 Socket 需要 | 扩展安装时自动注册 | + +## 9. 与 /voice、/schedule 恢复方式对比 + +| 项 | `/schedule` | `/voice` | Claude in Chrome | +|---|---|---|---| +| 编译开关 | `AGENT_TRIGGERS_REMOTE` | `VOICE_MODE` | **无需** | +| 改 dev.ts/build.ts | ✅ | ✅ | **不需要** | +| 缺失的 vendor 二进制 | 无 | `.node` 文件 | 无 | +| 需要替换的 stub | 无 | `audio-capture-napi` | `@ant/claude-for-chrome-mcp`(7 个文件) | +| 改动 src/ 源码 | 无 | 无 | 无 | +| 平台限制 | 无 | 需原生 `.node` | 需 Chrome 浏览器 | diff --git a/packages/@ant/claude-for-chrome-mcp/src/bridgeClient.ts b/packages/@ant/claude-for-chrome-mcp/src/bridgeClient.ts new file mode 100644 index 000000000..846284778 --- /dev/null +++ b/packages/@ant/claude-for-chrome-mcp/src/bridgeClient.ts @@ -0,0 +1,1126 @@ +/** + * WebSocket bridge client for the Chrome extension MCP server. + * Communicates with the Chrome extension via the office bridge server's /chrome path. + */ + +import WebSocket from "ws"; + +import { SocketConnectionError } from "./mcpSocketClient.js"; +import { + localPlatformLabel, + type BridgePermissionRequest, + type ChromeExtensionInfo, + type ClaudeForChromeContext, + type PermissionMode, + type PermissionOverrides, + type SocketClient, +} from "./types.js"; + +/** Timeout for list_extensions response from the bridge. */ +const DISCOVERY_TIMEOUT_MS = 5000; + +/** How long to wait for a peer_connected event when 0 extensions are found. */ +const PEER_WAIT_TIMEOUT_MS = 10_000; + +interface PendingToolCall { + resolve: (value: unknown) => void; + reject: (reason: Error) => void; + timer: NodeJS.Timeout; + results: unknown[]; + isTabsContext: boolean; + onPermissionRequest?: (request: BridgePermissionRequest) => Promise; + startTime: number; + toolName: string; +} + +export class BridgeClient implements SocketClient { + private ws: WebSocket | null = null; + private connected = false; + private authenticated = false; + private connecting = false; + private reconnectTimer: NodeJS.Timeout | null = null; + private reconnectAttempts = 0; + private pendingCalls = new Map(); + private notificationHandler: + | ((notification: { + method: string; + params?: Record; + }) => void) + | null = null; + private context: ClaudeForChromeContext; + private permissionMode: PermissionMode = "ask"; + private allowedDomains: string[] | undefined; + private tabsContextCollectionTimeoutMs = 2000; + private toolCallTimeoutMs = 120_000; + private connectionStartTime: number | null = null; + private connectionEstablishedTime: number | null = null; + /** The device_id of the selected Chrome extension for targeted routing. */ + private selectedDeviceId: string | undefined; + /** True after first discovery attempt completes (success or timeout). */ + private discoveryComplete = false; + /** Shared promise so concurrent callTool invocations join the same discovery. */ + private discoveryPromise: Promise | null = null; + /** Pending discovery response from bridge. */ + private pendingDiscovery: { + resolve: (extensions: ChromeExtensionInfo[]) => void; + timeout: NodeJS.Timeout; + } | null = null; + /** The device_id we had selected before a peer_disconnected — for auto-reselect. */ + private previousSelectedDeviceId: string | undefined; + /** Callbacks waiting for the next peer_connected event. Receives `true` on peer arrival, `false` on abort. */ + private peerConnectedWaiters: Array<(arrived: boolean) => void> = []; + /** The request_id of the current pending pairing broadcast. */ + private pendingPairingRequestId: string | undefined; + /** True while a pairing broadcast is in flight and no response yet. */ + private pairingInProgress = false; + /** The deviceId from a previous persisted pairing. */ + private persistedDeviceId: string | undefined; + /** Resolve callback for a blocking switchBrowser() call. */ + private pendingSwitchResolve: + | ((result: { deviceId: string; name: string } | null) => void) + | null = null; + + constructor(context: ClaudeForChromeContext) { + this.context = context; + if (context.initialPermissionMode) { + this.permissionMode = context.initialPermissionMode; + } + } + + public async ensureConnected(): Promise { + const { logger, serverName } = this.context; + logger.info( + `[${serverName}] ensureConnected called, connected=${this.connected}, authenticated=${this.authenticated}, wsState=${this.ws?.readyState}`, + ); + + if ( + this.connected && + this.authenticated && + this.ws?.readyState === WebSocket.OPEN + ) { + logger.info(`[${serverName}] Already connected and authenticated`); + return true; + } + + if (!this.connecting) { + logger.info(`[${serverName}] Not connecting, starting connection...`); + await this.connect(); + } else { + logger.info(`[${serverName}] Already connecting, waiting...`); + } + + // Wait for authentication with timeout + return new Promise((resolve) => { + const timeout = setTimeout(() => { + logger.info( + `[${serverName}] Connection timeout, connected=${this.connected}, authenticated=${this.authenticated}`, + ); + resolve(false); + }, 10_000); + const check = () => { + if (this.connected && this.authenticated) { + logger.info(`[${serverName}] Connection successful`); + clearTimeout(timeout); + resolve(true); + } else if (!this.connecting) { + logger.info(`[${serverName}] No longer connecting, giving up`); + clearTimeout(timeout); + resolve(false); + } else { + setTimeout(check, 200); + } + }; + check(); + }); + } + + public async callTool( + name: string, + args: Record, + permissionOverrides?: PermissionOverrides, + ): Promise { + const { logger, serverName, trackEvent } = this.context; + + if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { + throw new SocketConnectionError(`[${serverName}] Bridge not connected`); + } + + // Lazy discovery: run on first tool call if no extension selected yet. + // Use a shared promise so concurrent callers join the same discovery. + if (!this.selectedDeviceId && !this.discoveryComplete) { + this.discoveryPromise ??= this.discoverAndSelectExtension().finally( + () => { + this.discoveryPromise = null; + }, + ); + await this.discoveryPromise; + } + + // TODO: Once all extensions support pairing, throw here for multi-extension + // cases where pairingInProgress is true. For now, let the bridge handle + // routing — it auto-routes to a single extension or returns an error for + // multiple extensions without a target_device_id. + + const toolUseId = crypto.randomUUID(); + const isTabsContext = name === "tabs_context_mcp"; + const startTime = Date.now(); + const timeoutMs = isTabsContext + ? this.tabsContextCollectionTimeoutMs + : this.toolCallTimeoutMs; + + // Track tool call start + trackEvent?.("chrome_bridge_tool_call_started", { + tool_name: name, + tool_use_id: toolUseId, + }); + + // Per-call overrides (from session context) take priority over + // instance values (from set_permission_mode on the singleton). + const effectivePermissionMode = + permissionOverrides?.permissionMode ?? this.permissionMode; + const effectiveAllowedDomains = + permissionOverrides?.allowedDomains ?? this.allowedDomains; + + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + const pending = this.pendingCalls.get(toolUseId); + if (pending) { + this.pendingCalls.delete(toolUseId); + const durationMs = Date.now() - pending.startTime; + + if (isTabsContext && pending.results.length > 0) { + // For tabs_context, resolve with collected results even on timeout + trackEvent?.("chrome_bridge_tool_call_completed", { + tool_name: name, + tool_use_id: toolUseId, + duration_ms: durationMs, + }); + resolve(this.mergeTabsResults(pending.results)); + } else { + logger.warn( + `[${serverName}] Tool call timeout: ${name} (${toolUseId.slice(0, 8)}) after ${durationMs}ms, pending calls: ${this.pendingCalls.size}`, + ); + trackEvent?.("chrome_bridge_tool_call_timeout", { + tool_name: name, + tool_use_id: toolUseId, + duration_ms: durationMs, + timeout_ms: timeoutMs, + }); + reject( + new SocketConnectionError( + `[${serverName}] Tool call timed out: ${name}`, + ), + ); + } + } + }, timeoutMs); + + this.pendingCalls.set(toolUseId, { + resolve, + reject, + timer, + results: [], + isTabsContext, + onPermissionRequest: permissionOverrides?.onPermissionRequest, + startTime, + toolName: name, + }); + + const message: Record = { + type: "tool_call", + tool_use_id: toolUseId, + client_type: this.context.clientTypeId, + tool: name, + args, + }; + + // Target the selected extension for routing + if (this.selectedDeviceId) { + message.target_device_id = this.selectedDeviceId; + } + + // Only include permission fields when a value exists. + // Priority: per-call override (from session context) > instance value (from set_permission_mode). + if (effectivePermissionMode) { + message.permission_mode = effectivePermissionMode; + } + if (effectiveAllowedDomains?.length) { + message.allowed_domains = effectiveAllowedDomains; + } + if (permissionOverrides?.onPermissionRequest) { + message.handle_permission_prompts = true; + } + + logger.debug( + `[${serverName}] Sending tool_call: ${name} (${toolUseId.slice(0, 8)})`, + ); + this.ws!.send(JSON.stringify(message)); + }); + } + + public isConnected(): boolean { + return ( + this.connected && + this.authenticated && + this.ws?.readyState === WebSocket.OPEN + ); + } + + public disconnect(): void { + this.cleanup(); + } + + public setNotificationHandler( + handler: (notification: { + method: string; + params?: Record; + }) => void, + ): void { + this.notificationHandler = handler; + } + + public async setPermissionMode( + mode: PermissionMode, + allowedDomains?: string[], + ): Promise { + this.permissionMode = mode; + this.allowedDomains = allowedDomains; + } + + // =========================================================================== + // Extension discovery and selection + // =========================================================================== + + /** + * Discover connected extensions and auto-select one, or broadcast a pairing request. + * Called lazily on the first tool call. + */ + private async discoverAndSelectExtension(): Promise { + const { logger, serverName } = this.context; + + this.persistedDeviceId ??= this.context.getPersistedDeviceId?.(); + + let extensions = await this.queryBridgeExtensions(); + + if (extensions.length === 0) { + logger.info( + `[${serverName}] No extensions connected, waiting up to ${PEER_WAIT_TIMEOUT_MS}ms for peer_connected`, + ); + const peerArrived = await this.waitForPeerConnected(PEER_WAIT_TIMEOUT_MS); + if (peerArrived) { + extensions = await this.queryBridgeExtensions(); + } + } + + this.discoveryComplete = true; + + if (extensions.length === 0) { + // Still nothing — callTool will throw a clear error + logger.info(`[${serverName}] No extensions found after waiting`); + return; + } + + // Single extension: auto-select silently + if (extensions.length === 1) { + const ext = extensions[0]!; + if (!this.isLocalExtension(ext)) { + this.context.onRemoteExtensionWarning?.(ext); + } + this.selectExtension(ext.deviceId); + return; + } + + // Multiple extensions: check for persisted selection + if (this.persistedDeviceId) { + const persisted = extensions.find( + (e) => e.deviceId === this.persistedDeviceId, + ); + if (persisted) { + logger.info( + `[${serverName}] Auto-connecting to persisted extension: ${persisted.name || persisted.deviceId.slice(0, 8)}`, + ); + this.selectExtension(persisted.deviceId); + return; + } + } + + // Multiple extensions, no valid persisted selection: broadcast and fail fast + this.broadcastPairingRequest(); + this.pairingInProgress = true; + } + + /** + * Query the bridge for connected extensions. Returns empty array on timeout. + * Deduplicates by deviceId, keeping the most recent connection — the bridge + * may report stale duplicates (e.g. after a service worker restart). + */ + private async queryBridgeExtensions(): Promise { + const raw: ChromeExtensionInfo[] = await new Promise((resolve) => { + const timeout = setTimeout(() => { + this.pendingDiscovery = null; + resolve([]); + }, DISCOVERY_TIMEOUT_MS); + + this.pendingDiscovery = { resolve, timeout }; + this.ws?.send(JSON.stringify({ type: "list_extensions" })); + }); + + const byDeviceId = new Map(); + for (const ext of raw) { + const existing = byDeviceId.get(ext.deviceId); + if (!existing || ext.connectedAt > existing.connectedAt) { + byDeviceId.set(ext.deviceId, ext); + } + } + return [...byDeviceId.values()]; + } + + /** + * Select an extension by device ID for per-message targeted routing. + */ + private selectExtension(deviceId: string): void { + const { logger, serverName } = this.context; + this.selectedDeviceId = deviceId; + this.previousSelectedDeviceId = undefined; + logger.info( + `[${serverName}] Selected Chrome extension: ${deviceId.slice(0, 8)}...`, + ); + } + + /** + * Check if an extension might be on the same machine as this MCP client + * by comparing OS platform. Extensions can't provide a real hostname from + * the service worker sandbox, so platform is a weak heuristic. The profile + * email is the primary differentiator shown in the selection dialog. + */ + private isLocalExtension(ext: ChromeExtensionInfo): boolean { + if (!ext.osPlatform) return false; + return ext.osPlatform === localPlatformLabel(); + } + + /** + * Returns a promise that resolves to `true` when a peer_connected event + * fires, or `false` if the timeout elapses first. + */ + private waitForPeerConnected(timeoutMs: number): Promise { + return new Promise((resolve) => { + const timer = setTimeout(() => { + this.peerConnectedWaiters = this.peerConnectedWaiters.filter( + (w) => w !== onPeer, + ); + resolve(false); + }, timeoutMs); + + const onPeer = (arrived: boolean) => { + clearTimeout(timer); + resolve(arrived); + }; + + this.peerConnectedWaiters.push(onPeer); + }); + } + + /** + * Broadcast a pairing request to all connected extensions. + * Non-blocking — the pairing_response handler will select the extension. + */ + private broadcastPairingRequest(): void { + const requestId = crypto.randomUUID(); + this.pendingPairingRequestId = requestId; + this.ws?.send( + JSON.stringify({ + type: "pairing_request", + request_id: requestId, + client_type: this.context.clientTypeId, + }), + ); + } + + /** + * Switch to a different browser. Broadcasts a pairing request and blocks + * until a response arrives or timeout (120s). Returns the paired extension + * info, or null on timeout. + */ + public async switchBrowser(): Promise< + | { + deviceId: string; + name: string; + } + | "no_other_browsers" + | null + > { + const extensions = await this.queryBridgeExtensions(); + const currentDeviceId = + this.selectedDeviceId ?? this.previousSelectedDeviceId; + if ( + extensions.length === 0 || + (extensions.length === 1 && + (!currentDeviceId || extensions[0]!.deviceId === currentDeviceId)) + ) { + return "no_other_browsers"; + } + + this.previousSelectedDeviceId = this.selectedDeviceId; + this.selectedDeviceId = undefined; + this.discoveryComplete = false; + this.pairingInProgress = false; + + const requestId = crypto.randomUUID(); + this.pendingPairingRequestId = requestId; + if (this.ws?.readyState !== WebSocket.OPEN) { + return null; + } + this.ws.send( + JSON.stringify({ + type: "pairing_request", + request_id: requestId, + client_type: this.context.clientTypeId, + }), + ); + + // Resolve any previous pending switch so the caller doesn't hang forever + if (this.pendingSwitchResolve) { + this.pendingSwitchResolve(null); + } + + // Block for switch_browser since user is actively engaged + return new Promise((resolve) => { + const timer = setTimeout(() => { + if (this.pendingPairingRequestId === requestId) { + this.pendingPairingRequestId = undefined; + } + this.pendingSwitchResolve = null; + resolve(null); + }, 120_000); + + this.pendingSwitchResolve = (result) => { + clearTimeout(timer); + this.pendingSwitchResolve = null; + resolve(result); + }; + }); + } + + private async connect(): Promise { + const { logger, serverName, bridgeConfig, trackEvent } = this.context; + + if (!bridgeConfig) { + logger.error(`[${serverName}] No bridge config provided`); + return; + } + + if (this.connecting) { + return; + } + + this.connecting = true; + this.authenticated = false; + this.connectionStartTime = Date.now(); + this.closeSocket(); + + // Get user ID for the connection path + let userId: string; + let token: string | undefined; + + if (bridgeConfig.devUserId) { + userId = bridgeConfig.devUserId; + logger.debug(`[${serverName}] Using dev user ID for bridge connection`); + } else { + logger.debug(`[${serverName}] Fetching user ID for bridge connection`); + const fetchedUserId = await bridgeConfig.getUserId(); + if (!fetchedUserId) { + const durationMs = Date.now() - this.connectionStartTime; + logger.error( + `[${serverName}] No user ID available after ${durationMs}ms`, + ); + trackEvent?.("chrome_bridge_connection_failed", { + duration_ms: durationMs, + error_type: "no_user_id", + reconnect_attempt: this.reconnectAttempts, + }); + this.connecting = false; + this.context.onAuthenticationError?.(); + return; + } + userId = fetchedUserId; + + logger.debug( + `[${serverName}] Fetching OAuth token for bridge connection`, + ); + token = await bridgeConfig.getOAuthToken(); + if (!token) { + const durationMs = Date.now() - this.connectionStartTime; + logger.error( + `[${serverName}] No OAuth token available after ${durationMs}ms`, + ); + trackEvent?.("chrome_bridge_connection_failed", { + duration_ms: durationMs, + error_type: "no_oauth_token", + reconnect_attempt: this.reconnectAttempts, + }); + this.connecting = false; + this.context.onAuthenticationError?.(); + return; + } + } + + // Connect to user-specific endpoint: /chrome/ + const wsUrl = `${bridgeConfig.url}/chrome/${userId}`; + logger.info(`[${serverName}] Connecting to bridge: ${wsUrl}`); + + // Track connection started + trackEvent?.("chrome_bridge_connection_started", { + bridge_url: wsUrl, + }); + + try { + this.ws = new WebSocket(wsUrl); + } catch (error) { + const durationMs = Date.now() - this.connectionStartTime; + logger.error( + `[${serverName}] Failed to create WebSocket after ${durationMs}ms:`, + error, + ); + trackEvent?.("chrome_bridge_connection_failed", { + duration_ms: durationMs, + error_type: "websocket_error", + reconnect_attempt: this.reconnectAttempts, + }); + this.connecting = false; + this.scheduleReconnect(); + return; + } + + this.ws.on("open", () => { + logger.info( + `[${serverName}] WebSocket connected, sending connect message`, + ); + + // First message must be connect (same format as office path) + const connectMessage: Record = { + type: "connect", + client_type: this.context.clientTypeId, + }; + + if (bridgeConfig.devUserId) { + connectMessage.dev_user_id = bridgeConfig.devUserId; + } else { + connectMessage.oauth_token = token; + } + + this.ws?.send(JSON.stringify(connectMessage)); + }); + + this.ws.on("message", (data: WebSocket.Data) => { + try { + const message = JSON.parse(data.toString()) as Record; + logger.debug( + `[${serverName}] Bridge received: ${JSON.stringify(message)}`, + ); + this.handleMessage(message); + } catch (error) { + logger.error(`[${serverName}] Failed to parse bridge message:`, error); + } + }); + + this.ws.on("close", (code: number) => { + const durationSinceConnect = this.connectionEstablishedTime + ? Date.now() - this.connectionEstablishedTime + : 0; + logger.info( + `[${serverName}] Bridge connection closed (code: ${code}, duration: ${durationSinceConnect}ms)`, + ); + trackEvent?.("chrome_bridge_disconnected", { + close_code: code, + duration_since_connect_ms: durationSinceConnect, + reconnect_attempt: this.reconnectAttempts + 1, + }); + this.connected = false; + this.authenticated = false; + this.connecting = false; + this.connectionEstablishedTime = null; + this.scheduleReconnect(); + }); + + this.ws.on("error", (error: Error) => { + const durationMs = this.connectionStartTime + ? Date.now() - this.connectionStartTime + : 0; + logger.error( + `[${serverName}] Bridge WebSocket error after ${durationMs}ms: ${error.message}`, + ); + trackEvent?.("chrome_bridge_connection_failed", { + duration_ms: durationMs, + error_type: "websocket_error", + reconnect_attempt: this.reconnectAttempts, + }); + this.connected = false; + this.authenticated = false; + this.connecting = false; + }); + } + + private handleMessage(message: Record): void { + const { logger, serverName, trackEvent } = this.context; + + switch (message.type) { + case "paired": { + const durationMs = this.connectionStartTime + ? Date.now() - this.connectionStartTime + : 0; + logger.info( + `[${serverName}] Paired with Chrome extension (duration: ${durationMs}ms)`, + ); + this.connected = true; + this.authenticated = true; + this.connecting = false; + this.reconnectAttempts = 0; + this.connectionEstablishedTime = Date.now(); + trackEvent?.("chrome_bridge_connection_succeeded", { + duration_ms: durationMs, + status: "paired", + }); + break; + } + + case "waiting": { + const durationMs = this.connectionStartTime + ? Date.now() - this.connectionStartTime + : 0; + logger.info( + `[${serverName}] Waiting for Chrome extension to connect (duration: ${durationMs}ms)`, + ); + this.connected = true; + this.authenticated = true; + this.connecting = false; + this.reconnectAttempts = 0; + this.connectionEstablishedTime = Date.now(); + trackEvent?.("chrome_bridge_connection_succeeded", { + duration_ms: durationMs, + status: "waiting", + }); + break; + } + + case "peer_connected": + logger.info(`[${serverName}] Chrome extension connected to bridge`); + trackEvent?.("chrome_bridge_peer_connected", null); + // If no extension selected, mark discovery as needed (next tool call will discover) + if (!this.selectedDeviceId) { + this.discoveryComplete = false; + } + // Auto-reselect if the previously selected extension reconnected (e.g., service worker restart) + if ( + this.previousSelectedDeviceId && + message.deviceId === this.previousSelectedDeviceId && + !this.pendingSwitchResolve + ) { + logger.info( + `[${serverName}] Previously selected extension reconnected, auto-reselecting`, + ); + this.selectExtension(this.previousSelectedDeviceId); + this.previousSelectedDeviceId = undefined; + } + if (this.peerConnectedWaiters.length > 0) { + const waiters = this.peerConnectedWaiters; + this.peerConnectedWaiters = []; + for (const waiter of waiters) { + waiter(true); + } + } + break; + + case "peer_disconnected": + logger.info( + `[${serverName}] Chrome extension disconnected from bridge`, + ); + trackEvent?.("chrome_bridge_peer_disconnected", null); + // If the selected extension disconnected, clear selection for re-discovery + if (message.deviceId && message.deviceId === this.selectedDeviceId) { + logger.info( + `[${serverName}] Selected extension disconnected, clearing selection`, + ); + this.previousSelectedDeviceId = this.selectedDeviceId; + this.selectedDeviceId = undefined; + this.discoveryComplete = false; + } + break; + + case "extensions_list": + // Response to list_extensions — resolve pending discovery + if (this.pendingDiscovery) { + clearTimeout(this.pendingDiscovery.timeout); + this.pendingDiscovery.resolve( + (message.extensions as ChromeExtensionInfo[]) ?? [], + ); + this.pendingDiscovery = null; + } + break; + + case "pairing_response": { + const requestId = message.request_id as string; + const responseDeviceId = message.device_id as string; + const responseName = message.name as string; + if ( + this.pendingPairingRequestId === requestId && + responseDeviceId && + responseName + ) { + this.pendingPairingRequestId = undefined; + this.pairingInProgress = false; + this.selectExtension(responseDeviceId); + this.context.onExtensionPaired?.(responseDeviceId, responseName); + logger.info( + `[${serverName}] Paired with "${responseName}" (${responseDeviceId.slice(0, 8)})`, + ); + if (this.pendingSwitchResolve) { + this.pendingSwitchResolve({ + deviceId: responseDeviceId, + name: responseName, + }); + this.pendingSwitchResolve = null; + } + } + break; + } + + case "ping": + this.ws?.send(JSON.stringify({ type: "pong" })); + break; + + case "pong": + // Response to our keepalive, nothing to do + break; + + case "tool_result": + this.handleToolResult(message); + break; + + case "permission_request": + void this.handlePermissionRequest(message); + break; + + case "notification": + if (this.notificationHandler) { + this.notificationHandler({ + method: message.method as string, + params: message.params as Record | undefined, + }); + } + break; + + case "error": + logger.warn(`[${serverName}] Bridge error: ${message.error}`); + // If we had a selected extension, the error may indicate it's gone + // (e.g., extension disconnected between list and select). Clear state + // so the next tool call re-discovers. + if (this.selectedDeviceId) { + this.selectedDeviceId = undefined; + this.discoveryComplete = false; + } + break; + + default: + logger.warn( + `[${serverName}] Unrecognized bridge message type: ${message.type}`, + ); + } + } + + private async handlePermissionRequest( + message: Record, + ): Promise { + const { logger, serverName } = this.context; + const toolUseId = message.tool_use_id as string; + const requestId = message.request_id as string; + + if (!toolUseId || !requestId) { + logger.warn( + `[${serverName}] permission_request missing tool_use_id or request_id`, + ); + return; + } + + const pending = this.pendingCalls.get(toolUseId); + if (!pending?.onPermissionRequest) { + // Don't auto-deny — the bridge broadcasts permission_request to all + // connected MCP clients, and only the client that made the tool call + // has the pending entry. Auto-denying here would race with the correct + // client's handler when multiple Desktop instances are connected. + logger.debug( + `[${serverName}] Ignoring permission_request for unknown tool_use_id ${toolUseId.slice(0, 8)} (not our call)`, + ); + return; + } + + const request: BridgePermissionRequest = { + toolUseId, + requestId, + toolType: (message.tool_type as string) ?? "unknown", + url: (message.url as string) ?? "", + actionData: message.action_data as Record | undefined, + }; + + try { + const allowed = await pending.onPermissionRequest(request); + this.sendPermissionResponse(requestId, allowed); + } catch (error) { + logger.error(`[${serverName}] Error handling permission request:`, error); + this.sendPermissionResponse(requestId, false); + } + } + + private sendPermissionResponse(requestId: string, allowed: boolean): void { + if (this.ws?.readyState === WebSocket.OPEN) { + const message: Record = { + type: "permission_response", + request_id: requestId, + allowed, + }; + if (this.selectedDeviceId) { + message.target_device_id = this.selectedDeviceId; + } + this.ws.send(JSON.stringify(message)); + } + } + + private handleToolResult(message: Record): void { + const { logger, serverName, trackEvent } = this.context; + const toolUseId = message.tool_use_id as string; + if (!toolUseId) { + logger.warn(`[${serverName}] Received tool_result without tool_use_id`); + return; + } + + const pending = this.pendingCalls.get(toolUseId); + if (!pending) { + logger.debug( + `[${serverName}] Received tool_result for unknown call: ${toolUseId.slice(0, 8)}`, + ); + return; + } + + const durationMs = Date.now() - pending.startTime; + + // Normalize bridge response format to match socket client format. + // Bridge sends: { type, tool_use_id, content: [...], is_error?: boolean } + // Socket sends: { result: { content: [...] } } or { error: { content: [...] } } + const normalized = this.normalizeBridgeResponse(message); + const isError = Boolean(message.is_error) || "error" in normalized; + + if (pending.isTabsContext && !this.selectedDeviceId) { + // No extension selected: collect results from all extensions (pre-selection / backward compat) + pending.results.push(normalized); + // Don't resolve yet — let the timer handle collection + } else { + // For other tools, resolve on first result + clearTimeout(pending.timer); + this.pendingCalls.delete(toolUseId); + + if (isError) { + // Extract error message for telemetry + const errorContent = (normalized as { error?: { content?: unknown[] } }) + .error?.content; + let errorMessage = "Unknown error"; + if (Array.isArray(errorContent)) { + const textItem = errorContent.find( + (item) => + typeof item === "object" && item !== null && "text" in item, + ) as { text?: string } | undefined; + if (textItem?.text) { + errorMessage = textItem.text.slice(0, 200); + } + } + + logger.warn( + `[${serverName}] Tool call error: ${pending.toolName} (${toolUseId.slice(0, 8)}) after ${durationMs}ms`, + ); + trackEvent?.("chrome_bridge_tool_call_error", { + tool_name: pending.toolName, + tool_use_id: toolUseId, + duration_ms: durationMs, + error_message: errorMessage, + }); + } else { + logger.debug( + `[${serverName}] Tool call completed: ${pending.toolName} (${toolUseId.slice(0, 8)}) in ${durationMs}ms`, + ); + trackEvent?.("chrome_bridge_tool_call_completed", { + tool_name: pending.toolName, + tool_use_id: toolUseId, + duration_ms: durationMs, + }); + } + + pending.resolve(normalized); + } + } + + private normalizeBridgeResponse( + message: Record, + ): Record { + // Already has result/error wrapper (socket format) — pass through + if (message.result || message.error) { + return message; + } + + // Bridge format has content at top level — wrap it + if (message.content) { + if (message.is_error) { + return { error: { content: message.content } }; + } + return { result: { content: message.content } }; + } + + return message; + } + + private mergeTabsResults(results: unknown[]): unknown { + const mergedTabs: unknown[] = []; + + for (const result of results) { + const msg = result as Record; + const resultData = msg.result as + | { content?: Array<{ type: string; text?: string }> } + | undefined; + const content = resultData?.content; + + if (!content || !Array.isArray(content)) continue; + + for (const item of content) { + if (item.type === "text" && item.text) { + try { + const parsed = JSON.parse(item.text); + if (Array.isArray(parsed)) { + mergedTabs.push(...parsed); + } else if ( + parsed?.availableTabs && + Array.isArray(parsed.availableTabs) + ) { + mergedTabs.push(...parsed.availableTabs); + } + } catch { + // Not JSON, skip + } + } + } + } + + if (mergedTabs.length > 0) { + const tabListText = mergedTabs + .map((t) => { + const tab = t as { tabId: number; title: string; url: string }; + return ` \u2022 tabId ${tab.tabId}: "${tab.title}" (${tab.url})`; + }) + .join("\n"); + + return { + result: { + content: [ + { + type: "text", + text: JSON.stringify({ availableTabs: mergedTabs }), + }, + { + type: "text", + text: `\n\nTab Context:\n- Available tabs:\n${tabListText}`, + }, + ], + }, + }; + } + + // Return first result as fallback + return results[0]; + } + + private scheduleReconnect(): void { + const { logger, serverName, trackEvent } = this.context; + + if (this.reconnectTimer) return; + + this.reconnectAttempts++; + + if (this.reconnectAttempts > 100) { + logger.warn( + `[${serverName}] Giving up bridge reconnection after 100 attempts`, + ); + trackEvent?.("chrome_bridge_reconnect_exhausted", { + total_attempts: 100, + }); + this.reconnectAttempts = 0; + return; + } + + const delay = Math.min( + 2000 * Math.pow(1.5, this.reconnectAttempts - 1), + 30_000, + ); + + if (this.reconnectAttempts <= 10 || this.reconnectAttempts % 10 === 0) { + logger.info( + `[${serverName}] Bridge reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts})`, + ); + } + + this.reconnectTimer = setTimeout(() => { + this.reconnectTimer = null; + void this.connect(); + }, delay); + } + + private closeSocket(): void { + if (this.ws) { + this.ws.removeAllListeners(); + this.ws.close(); + this.ws = null; + } + this.connected = false; + this.authenticated = false; + // Clear extension selection state so reconnections start fresh + this.selectedDeviceId = undefined; + this.discoveryComplete = false; + this.pendingPairingRequestId = undefined; + this.pairingInProgress = false; + if (this.pendingSwitchResolve) { + this.pendingSwitchResolve(null); + this.pendingSwitchResolve = null; + } + if (this.pendingDiscovery) { + clearTimeout(this.pendingDiscovery.timeout); + this.pendingDiscovery.resolve([]); + this.pendingDiscovery = null; + } + // Unblock any in-progress waitForPeerConnected so it doesn't hang until its timeout + if (this.peerConnectedWaiters.length > 0) { + const waiters = this.peerConnectedWaiters; + this.peerConnectedWaiters = []; + for (const waiter of waiters) { + waiter(false); + } + } + } + + private cleanup(): void { + if (this.reconnectTimer) { + clearTimeout(this.reconnectTimer); + this.reconnectTimer = null; + } + + // Reject all pending calls + for (const [id, pending] of this.pendingCalls) { + clearTimeout(pending.timer); + pending.reject(new SocketConnectionError("Bridge client disconnected")); + this.pendingCalls.delete(id); + } + + this.closeSocket(); + this.reconnectAttempts = 0; + } +} + +export function createBridgeClient( + context: ClaudeForChromeContext, +): BridgeClient { + return new BridgeClient(context); +} diff --git a/packages/@ant/claude-for-chrome-mcp/src/browserTools.ts b/packages/@ant/claude-for-chrome-mcp/src/browserTools.ts new file mode 100644 index 000000000..a1aba77ab --- /dev/null +++ b/packages/@ant/claude-for-chrome-mcp/src/browserTools.ts @@ -0,0 +1,546 @@ +export const BROWSER_TOOLS = [ + { + name: "javascript_tool", + description: + "Execute JavaScript code in the context of the current page. The code runs in the page's context and can interact with the DOM, window object, and page variables. Returns the result of the last expression or any thrown errors. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", + inputSchema: { + type: "object", + properties: { + action: { + type: "string", + description: "Must be set to 'javascript_exec'", + }, + text: { + type: "string", + description: + "The JavaScript code to execute. The code will be evaluated in the page context. The result of the last expression will be returned automatically. Do NOT use 'return' statements - just write the expression you want to evaluate (e.g., 'window.myData.value' not 'return window.myData.value'). You can access and modify the DOM, call page functions, and interact with page variables.", + }, + tabId: { + type: "number", + description: + "Tab ID to execute the code in. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["action", "text", "tabId"], + }, + }, + { + name: "read_page", + description: + "Get an accessibility tree representation of elements on the page. By default returns all elements including non-visible ones. Output is limited to 50000 characters by default. If the output exceeds this limit, you will receive an error asking you to specify a smaller depth or focus on a specific element using ref_id. Optionally filter for only interactive elements. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", + inputSchema: { + type: "object", + properties: { + filter: { + type: "string", + enum: ["interactive", "all"], + description: + 'Filter elements: "interactive" for buttons/links/inputs only, "all" for all elements including non-visible ones (default: all elements)', + }, + tabId: { + type: "number", + description: + "Tab ID to read from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + depth: { + type: "number", + description: + "Maximum depth of the tree to traverse (default: 15). Use a smaller depth if output is too large.", + }, + ref_id: { + type: "string", + description: + "Reference ID of a parent element to read. Will return the specified element and all its children. Use this to focus on a specific part of the page when output is too large.", + }, + max_chars: { + type: "number", + description: + "Maximum characters for output (default: 50000). Set to a higher value if your client can handle large outputs.", + }, + }, + required: ["tabId"], + }, + }, + { + name: "find", + description: + 'Find elements on the page using natural language. Can search for elements by their purpose (e.g., "search bar", "login button") or by text content (e.g., "organic mango product"). Returns up to 20 matching elements with references that can be used with other tools. If more than 20 matches exist, you\'ll be notified to use a more specific query. If you don\'t have a valid tab ID, use tabs_context_mcp first to get available tabs.', + inputSchema: { + type: "object", + properties: { + query: { + type: "string", + description: + 'Natural language description of what to find (e.g., "search bar", "add to cart button", "product title containing organic")', + }, + tabId: { + type: "number", + description: + "Tab ID to search in. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["query", "tabId"], + }, + }, + { + name: "form_input", + description: + "Set values in form elements using element reference ID from the read_page tool. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", + inputSchema: { + type: "object", + properties: { + ref: { + type: "string", + description: + 'Element reference ID from the read_page tool (e.g., "ref_1", "ref_2")', + }, + value: { + type: ["string", "boolean", "number"], + description: + "The value to set. For checkboxes use boolean, for selects use option value or text, for other inputs use appropriate string/number", + }, + tabId: { + type: "number", + description: + "Tab ID to set form value in. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["ref", "value", "tabId"], + }, + }, + { + name: "computer", + description: `Use a mouse and keyboard to interact with a web browser, and take screenshots. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.\n* Whenever you intend to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.\n* If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your click location so that the tip of the cursor visually falls on the element that you want to click.\n* Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.`, + inputSchema: { + type: "object", + properties: { + action: { + type: "string", + enum: [ + "left_click", + "right_click", + "type", + "screenshot", + "wait", + "scroll", + "key", + "left_click_drag", + "double_click", + "triple_click", + "zoom", + "scroll_to", + "hover", + ], + description: + "The action to perform:\n* `left_click`: Click the left mouse button at the specified coordinates.\n* `right_click`: Click the right mouse button at the specified coordinates to open context menus.\n* `double_click`: Double-click the left mouse button at the specified coordinates.\n* `triple_click`: Triple-click the left mouse button at the specified coordinates.\n* `type`: Type a string of text.\n* `screenshot`: Take a screenshot of the screen.\n* `wait`: Wait for a specified number of seconds.\n* `scroll`: Scroll up, down, left, or right at the specified coordinates.\n* `key`: Press a specific keyboard key.\n* `left_click_drag`: Drag from start_coordinate to coordinate.\n* `zoom`: Take a screenshot of a specific region for closer inspection.\n* `scroll_to`: Scroll an element into view using its element reference ID from read_page or find tools.\n* `hover`: Move the mouse cursor to the specified coordinates or element without clicking. Useful for revealing tooltips, dropdown menus, or triggering hover states.", + }, + coordinate: { + type: "array", + items: { type: "number" }, + minItems: 2, + maxItems: 2, + description: + "(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates. Required for `left_click`, `right_click`, `double_click`, `triple_click`, and `scroll`. For `left_click_drag`, this is the end position.", + }, + text: { + type: "string", + description: + 'The text to type (for `type` action) or the key(s) to press (for `key` action). For `key` action: Provide space-separated keys (e.g., "Backspace Backspace Delete"). Supports keyboard shortcuts using the platform\'s modifier key (use "cmd" on Mac, "ctrl" on Windows/Linux, e.g., "cmd+a" or "ctrl+a" for select all).', + }, + duration: { + type: "number", + minimum: 0, + maximum: 30, + description: + "The number of seconds to wait. Required for `wait`. Maximum 30 seconds.", + }, + scroll_direction: { + type: "string", + enum: ["up", "down", "left", "right"], + description: "The direction to scroll. Required for `scroll`.", + }, + scroll_amount: { + type: "number", + minimum: 1, + maximum: 10, + description: + "The number of scroll wheel ticks. Optional for `scroll`, defaults to 3.", + }, + start_coordinate: { + type: "array", + items: { type: "number" }, + minItems: 2, + maxItems: 2, + description: + "(x, y): The starting coordinates for `left_click_drag`.", + }, + region: { + type: "array", + items: { type: "number" }, + minItems: 4, + maxItems: 4, + description: + "(x0, y0, x1, y1): The rectangular region to capture for `zoom`. Coordinates define a rectangle from top-left (x0, y0) to bottom-right (x1, y1) in pixels from the viewport origin. Required for `zoom` action. Useful for inspecting small UI elements like icons, buttons, or text.", + }, + repeat: { + type: "number", + minimum: 1, + maximum: 100, + description: + "Number of times to repeat the key sequence. Only applicable for `key` action. Must be a positive integer between 1 and 100. Default is 1. Useful for navigation tasks like pressing arrow keys multiple times.", + }, + ref: { + type: "string", + description: + 'Element reference ID from read_page or find tools (e.g., "ref_1", "ref_2"). Required for `scroll_to` action. Can be used as alternative to `coordinate` for click actions.', + }, + modifiers: { + type: "string", + description: + 'Modifier keys for click actions. Supports: "ctrl", "shift", "alt", "cmd" (or "meta"), "win" (or "windows"). Can be combined with "+" (e.g., "ctrl+shift", "cmd+alt"). Optional.', + }, + tabId: { + type: "number", + description: + "Tab ID to execute the action on. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["action", "tabId"], + }, + }, + { + name: "navigate", + description: + "Navigate to a URL, or go forward/back in browser history. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", + inputSchema: { + type: "object", + properties: { + url: { + type: "string", + description: + 'The URL to navigate to. Can be provided with or without protocol (defaults to https://). Use "forward" to go forward in history or "back" to go back in history.', + }, + tabId: { + type: "number", + description: + "Tab ID to navigate. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["url", "tabId"], + }, + }, + { + name: "resize_window", + description: + "Resize the current browser window to specified dimensions. Useful for testing responsive designs or setting up specific screen sizes. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", + inputSchema: { + type: "object", + properties: { + width: { + type: "number", + description: "Target window width in pixels", + }, + height: { + type: "number", + description: "Target window height in pixels", + }, + tabId: { + type: "number", + description: + "Tab ID to get the window for. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["width", "height", "tabId"], + }, + }, + { + name: "gif_creator", + description: + "Manage GIF recording and export for browser automation sessions. Control when to start/stop recording browser actions (clicks, scrolls, navigation), then export as an animated GIF with visual overlays (click indicators, action labels, progress bar, watermark). All operations are scoped to the tab's group. When starting recording, take a screenshot immediately after to capture the initial state as the first frame. When stopping recording, take a screenshot immediately before to capture the final state as the last frame. For export, either provide 'coordinate' to drag/drop upload to a page element, or set 'download: true' to download the GIF.", + inputSchema: { + type: "object", + properties: { + action: { + type: "string", + enum: ["start_recording", "stop_recording", "export", "clear"], + description: + "Action to perform: 'start_recording' (begin capturing), 'stop_recording' (stop capturing but keep frames), 'export' (generate and export GIF), 'clear' (discard frames)", + }, + tabId: { + type: "number", + description: + "Tab ID to identify which tab group this operation applies to", + }, + download: { + type: "boolean", + description: + "Always set this to true for the 'export' action only. This causes the gif to be downloaded in the browser.", + }, + filename: { + type: "string", + description: + "Optional filename for exported GIF (default: 'recording-[timestamp].gif'). For 'export' action only.", + }, + options: { + type: "object", + description: + "Optional GIF enhancement options for 'export' action. Properties: showClickIndicators (bool), showDragPaths (bool), showActionLabels (bool), showProgressBar (bool), showWatermark (bool), quality (number 1-30). All default to true except quality (default: 10).", + properties: { + showClickIndicators: { + type: "boolean", + description: + "Show orange circles at click locations (default: true)", + }, + showDragPaths: { + type: "boolean", + description: "Show red arrows for drag actions (default: true)", + }, + showActionLabels: { + type: "boolean", + description: + "Show black labels describing actions (default: true)", + }, + showProgressBar: { + type: "boolean", + description: "Show orange progress bar at bottom (default: true)", + }, + showWatermark: { + type: "boolean", + description: "Show Claude logo watermark (default: true)", + }, + quality: { + type: "number", + description: + "GIF compression quality, 1-30 (lower = better quality, slower encoding). Default: 10", + }, + }, + }, + }, + required: ["action", "tabId"], + }, + }, + { + name: "upload_image", + description: + "Upload a previously captured screenshot or user-uploaded image to a file input or drag & drop target. Supports two approaches: (1) ref - for targeting specific elements, especially hidden file inputs, (2) coordinate - for drag & drop to visible locations like Google Docs. Provide either ref or coordinate, not both.", + inputSchema: { + type: "object", + properties: { + imageId: { + type: "string", + description: + "ID of a previously captured screenshot (from the computer tool's screenshot action) or a user-uploaded image", + }, + ref: { + type: "string", + description: + 'Element reference ID from read_page or find tools (e.g., "ref_1", "ref_2"). Use this for file inputs (especially hidden ones) or specific elements. Provide either ref or coordinate, not both.', + }, + coordinate: { + type: "array", + items: { + type: "number", + }, + description: + "Viewport coordinates [x, y] for drag & drop to a visible location. Use this for drag & drop targets like Google Docs. Provide either ref or coordinate, not both.", + }, + tabId: { + type: "number", + description: + "Tab ID where the target element is located. This is where the image will be uploaded to.", + }, + filename: { + type: "string", + description: + 'Optional filename for the uploaded file (default: "image.png")', + }, + }, + required: ["imageId", "tabId"], + }, + }, + { + name: "get_page_text", + description: + "Extract raw text content from the page, prioritizing article content. Ideal for reading articles, blog posts, or other text-heavy pages. Returns plain text without HTML formatting. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", + inputSchema: { + type: "object", + properties: { + tabId: { + type: "number", + description: + "Tab ID to extract text from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["tabId"], + }, + }, + { + name: "tabs_context_mcp", + title: "Tabs Context", + description: + "Get context information about the current MCP tab group. Returns all tab IDs inside the group if it exists. CRITICAL: You must get the context at least once before using other browser automation tools so you know what tabs exist. Each new conversation should create its own new tab (using tabs_create_mcp) rather than reusing existing tabs, unless the user explicitly asks to use an existing tab.", + inputSchema: { + type: "object", + properties: { + createIfEmpty: { + type: "boolean", + description: + "Creates a new MCP tab group if none exists, creates a new Window with a new tab group containing an empty tab (which can be used for this conversation). If a MCP tab group already exists, this parameter has no effect.", + }, + }, + required: [], + }, + }, + { + name: "tabs_create_mcp", + title: "Tabs Create", + description: + "Creates a new empty tab in the MCP tab group. CRITICAL: You must get the context using tabs_context_mcp at least once before using other browser automation tools so you know what tabs exist.", + inputSchema: { + type: "object", + properties: {}, + required: [], + }, + }, + { + name: "update_plan", + description: + "Present a plan to the user for approval before taking actions. The user will see the domains you intend to visit and your approach. Once approved, you can proceed with actions on the approved domains without additional permission prompts.", + inputSchema: { + type: "object" as const, + properties: { + domains: { + type: "array" as const, + items: { type: "string" as const }, + description: + "List of domains you will visit (e.g., ['github.com', 'stackoverflow.com']). These domains will be approved for the session when the user accepts the plan.", + }, + approach: { + type: "array" as const, + items: { type: "string" as const }, + description: + "High-level description of what you will do. Focus on outcomes and key actions, not implementation details. Be concise - aim for 3-7 items.", + }, + }, + required: ["domains", "approach"], + }, + }, + { + name: "read_console_messages", + description: + "Read browser console messages (console.log, console.error, console.warn, etc.) from a specific tab. Useful for debugging JavaScript errors, viewing application logs, or understanding what's happening in the browser console. Returns console messages from the current domain only. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs. IMPORTANT: Always provide a pattern to filter messages - without a pattern, you may get too many irrelevant messages.", + inputSchema: { + type: "object", + properties: { + tabId: { + type: "number", + description: + "Tab ID to read console messages from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + onlyErrors: { + type: "boolean", + description: + "If true, only return error and exception messages. Default is false (return all message types).", + }, + clear: { + type: "boolean", + description: + "If true, clear the console messages after reading to avoid duplicates on subsequent calls. Default is false.", + }, + pattern: { + type: "string", + description: + "Regex pattern to filter console messages. Only messages matching this pattern will be returned (e.g., 'error|warning' to find errors and warnings, 'MyApp' to filter app-specific logs). You should always provide a pattern to avoid getting too many irrelevant messages.", + }, + limit: { + type: "number", + description: + "Maximum number of messages to return. Defaults to 100. Increase only if you need more results.", + }, + }, + required: ["tabId"], + }, + }, + { + name: "read_network_requests", + description: + "Read HTTP network requests (XHR, Fetch, documents, images, etc.) from a specific tab. Useful for debugging API calls, monitoring network activity, or understanding what requests a page is making. Returns all network requests made by the current page, including cross-origin requests. Requests are automatically cleared when the page navigates to a different domain. If you don't have a valid tab ID, use tabs_context_mcp first to get available tabs.", + inputSchema: { + type: "object", + properties: { + tabId: { + type: "number", + description: + "Tab ID to read network requests from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + urlPattern: { + type: "string", + description: + "Optional URL pattern to filter requests. Only requests whose URL contains this string will be returned (e.g., '/api/' to filter API calls, 'example.com' to filter by domain).", + }, + clear: { + type: "boolean", + description: + "If true, clear the network requests after reading to avoid duplicates on subsequent calls. Default is false.", + }, + limit: { + type: "number", + description: + "Maximum number of requests to return. Defaults to 100. Increase only if you need more results.", + }, + }, + required: ["tabId"], + }, + }, + { + name: "shortcuts_list", + description: + "List all available shortcuts and workflows (shortcuts and workflows are interchangeable). Returns shortcuts with their commands, descriptions, and whether they are workflows. Use shortcuts_execute to run a shortcut or workflow.", + inputSchema: { + type: "object", + properties: { + tabId: { + type: "number", + description: + "Tab ID to list shortcuts from. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + }, + required: ["tabId"], + }, + }, + { + name: "shortcuts_execute", + description: + "Execute a shortcut or workflow by running it in a new sidepanel window using the current tab (shortcuts and workflows are interchangeable). Use shortcuts_list first to see available shortcuts. This starts the execution and returns immediately - it does not wait for completion.", + inputSchema: { + type: "object", + properties: { + tabId: { + type: "number", + description: + "Tab ID to execute the shortcut on. Must be a tab in the current group. Use tabs_context_mcp first if you don't have a valid tab ID.", + }, + shortcutId: { + type: "string", + description: "The ID of the shortcut to execute", + }, + command: { + type: "string", + description: + "The command name of the shortcut to execute (e.g., 'debug', 'summarize'). Do not include the leading slash.", + }, + }, + required: ["tabId"], + }, + }, + { + name: "switch_browser", + description: + "Switch which Chrome browser is used for browser automation. Call this when the user wants to connect to a different Chrome browser. Broadcasts a connection request to all Chrome browsers with the extension installed — the user clicks 'Connect' in the desired browser.", + inputSchema: { + type: "object", + properties: {}, + required: [], + }, + }, +]; diff --git a/packages/@ant/claude-for-chrome-mcp/src/index.ts b/packages/@ant/claude-for-chrome-mcp/src/index.ts index d7d76b189..68b694069 100644 --- a/packages/@ant/claude-for-chrome-mcp/src/index.ts +++ b/packages/@ant/claude-for-chrome-mcp/src/index.ts @@ -1,11 +1,15 @@ -export const BROWSER_TOOLS: any[] = [] - -export class ClaudeForChromeContext {} - -export class Logger {} - -export type PermissionMode = any - -export function createClaudeForChromeMcpServer(..._args: any[]): any { - return null -} +export { BridgeClient, createBridgeClient } from "./bridgeClient.js"; +export { BROWSER_TOOLS } from "./browserTools.js"; +export { + createChromeSocketClient, + createClaudeForChromeMcpServer, +} from "./mcpServer.js"; +export { localPlatformLabel } from "./types.js"; +export type { + BridgeConfig, + ChromeExtensionInfo, + ClaudeForChromeContext, + Logger, + PermissionMode, + SocketClient, +} from "./types.js"; diff --git a/packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts b/packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts new file mode 100644 index 000000000..40b1812a5 --- /dev/null +++ b/packages/@ant/claude-for-chrome-mcp/src/mcpServer.ts @@ -0,0 +1,96 @@ +import { Server } from "@modelcontextprotocol/sdk/server/index.js"; +import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import { + CallToolRequestSchema, + ListToolsRequestSchema, +} from "@modelcontextprotocol/sdk/types.js"; + +import { createBridgeClient } from "./bridgeClient.js"; +import { BROWSER_TOOLS } from "./browserTools.js"; +import { createMcpSocketClient } from "./mcpSocketClient.js"; +import { createMcpSocketPool } from "./mcpSocketPool.js"; +import { handleToolCall } from "./toolCalls.js"; +import type { ClaudeForChromeContext, SocketClient } from "./types.js"; + +/** + * Create the socket/bridge client for the Chrome extension MCP server. + * Exported so Desktop can share a single instance between the registered + * MCP server and the InternalMcpServerManager (CCD sessions). + */ +export function createChromeSocketClient( + context: ClaudeForChromeContext, +): SocketClient { + return context.bridgeConfig + ? createBridgeClient(context) + : context.getSocketPaths + ? createMcpSocketPool(context) + : createMcpSocketClient(context); +} + +export function createClaudeForChromeMcpServer( + context: ClaudeForChromeContext, + existingSocketClient?: SocketClient, +): Server { + const { serverName, logger } = context; + + // Choose transport: bridge (WebSocket) > socket pool (multi-profile) > single socket. + const socketClient = + existingSocketClient ?? createChromeSocketClient(context); + + const server = new Server( + { + name: serverName, + version: "1.0.0", + }, + { + capabilities: { + tools: {}, + logging: {}, + }, + }, + ); + + server.setRequestHandler(ListToolsRequestSchema, async () => { + if (context.isDisabled?.()) { + return { tools: [] }; + } + return { + tools: context.bridgeConfig + ? BROWSER_TOOLS + : BROWSER_TOOLS.filter((t) => t.name !== "switch_browser"), + }; + }); + + server.setRequestHandler( + CallToolRequestSchema, + async (request): Promise => { + logger.info(`[${serverName}] Executing tool: ${request.params.name}`); + + return handleToolCall( + context, + socketClient, + request.params.name, + request.params.arguments || {}, + ); + }, + ); + + socketClient.setNotificationHandler((notification) => { + logger.info( + `[${serverName}] Forwarding MCP notification: ${notification.method}`, + ); + server + .notification({ + method: notification.method, + params: notification.params, + }) + .catch((error) => { + // Server may not be connected yet (e.g., during startup or after disconnect) + logger.info( + `[${serverName}] Failed to forward MCP notification: ${error.message}`, + ); + }); + }); + + return server; +} diff --git a/packages/@ant/claude-for-chrome-mcp/src/mcpSocketClient.ts b/packages/@ant/claude-for-chrome-mcp/src/mcpSocketClient.ts new file mode 100644 index 000000000..1443d7f5c --- /dev/null +++ b/packages/@ant/claude-for-chrome-mcp/src/mcpSocketClient.ts @@ -0,0 +1,493 @@ +import { promises as fsPromises } from "fs"; +import { createConnection } from "net"; +import type { Socket } from "net"; +import { platform } from "os"; +import { dirname } from "path"; + +import type { + ClaudeForChromeContext, + PermissionMode, + PermissionOverrides, +} from "./types.js"; + +export class SocketConnectionError extends Error { + constructor(message: string) { + super(message); + this.name = "SocketConnectionError"; + } +} + +interface ToolRequest { + method: string; // "execute_tool" + params?: { + client_id?: string; // "desktop" | "claude-code" + tool?: string; + args?: Record; + }; +} + +interface ToolResponse { + result?: unknown; + error?: string; +} + +interface Notification { + method: string; + params?: Record; +} + +type SocketMessage = ToolResponse | Notification; + +function isToolResponse(message: SocketMessage): message is ToolResponse { + return "result" in message || "error" in message; +} + +function isNotification(message: SocketMessage): message is Notification { + return "method" in message && typeof message.method === "string"; +} + +class McpSocketClient { + private socket: Socket | null = null; + private connected = false; + private connecting = false; + private responseCallback: ((response: ToolResponse) => void) | null = null; + private notificationHandler: ((notification: Notification) => void) | null = + null; + private responseBuffer = Buffer.alloc(0); + private reconnectAttempts = 0; + private maxReconnectAttempts = 10; + private reconnectDelay = 1000; + private reconnectTimer: NodeJS.Timeout | null = null; + private context: ClaudeForChromeContext; + // When true, disables automatic reconnection. Used by McpSocketPool which + // manages reconnection externally by rescanning available sockets. + public disableAutoReconnect = false; + + constructor(context: ClaudeForChromeContext) { + this.context = context; + } + + private async connect(): Promise { + const { serverName, logger } = this.context; + + if (this.connecting) { + logger.info( + `[${serverName}] Already connecting, skipping duplicate attempt`, + ); + return; + } + + this.closeSocket(); + this.connecting = true; + + const socketPath = + this.context.getSocketPath?.() ?? this.context.socketPath; + logger.info(`[${serverName}] Attempting to connect to: ${socketPath}`); + + try { + await this.validateSocketSecurity(socketPath); + } catch (error) { + this.connecting = false; + logger.info(`[${serverName}] Security validation failed:`, error); + // Don't retry on security failures (wrong perms/owner) - those won't + // self-resolve. Only the error handler retries on transient errors. + return; + } + + this.socket = createConnection(socketPath); + + // Timeout the initial connection attempt - if socket file exists but native + // host is dead, the connect can hang indefinitely + const connectTimeout = setTimeout(() => { + if (!this.connected) { + logger.info( + `[${serverName}] Connection attempt timed out after 5000ms`, + ); + this.closeSocket(); + this.scheduleReconnect(); + } + }, 5000); + + this.socket.on("connect", () => { + clearTimeout(connectTimeout); + this.connected = true; + this.connecting = false; + this.reconnectAttempts = 0; + logger.info(`[${serverName}] Successfully connected to bridge server`); + }); + + this.socket.on("data", (data: Buffer) => { + this.responseBuffer = Buffer.concat([this.responseBuffer, data]); + + while (this.responseBuffer.length >= 4) { + const length = this.responseBuffer.readUInt32LE(0); + + if (this.responseBuffer.length < 4 + length) { + break; + } + + const messageBytes = this.responseBuffer.slice(4, 4 + length); + this.responseBuffer = this.responseBuffer.slice(4 + length); + + try { + const message = JSON.parse( + messageBytes.toString("utf-8"), + ) as SocketMessage; + + if (isNotification(message)) { + logger.info( + `[${serverName}] Received notification: ${message.method}`, + ); + if (this.notificationHandler) { + this.notificationHandler(message); + } + } else if (isToolResponse(message)) { + logger.info(`[${serverName}] Received tool response: ${message}`); + this.handleResponse(message); + } else { + logger.info(`[${serverName}] Received unknown message: ${message}`); + } + } catch (error) { + logger.info(`[${serverName}] Failed to parse message:`, error); + } + } + }); + + this.socket.on("error", (error: Error & { code?: string }) => { + clearTimeout(connectTimeout); + logger.info(`[${serverName}] Socket error (code: ${error.code}):`, error); + this.connected = false; + this.connecting = false; + + if ( + error.code && + [ + "ECONNREFUSED", // Native host not listening (stale socket) + "ECONNRESET", // Connection reset by peer + "EPIPE", // Broken pipe (native host died mid-write) + "ENOENT", // Socket file was deleted + "EOPNOTSUPP", // Socket file exists but is not a valid socket + "ECONNABORTED", // Connection aborted + ].includes(error.code) + ) { + this.scheduleReconnect(); + } + }); + + this.socket.on("close", () => { + clearTimeout(connectTimeout); + this.connected = false; + this.connecting = false; + this.scheduleReconnect(); + }); + } + + private scheduleReconnect(): void { + const { serverName, logger } = this.context; + + if (this.disableAutoReconnect) { + return; + } + + if (this.reconnectTimer) { + logger.info(`[${serverName}] Reconnect already scheduled, skipping`); + return; + } + + this.reconnectAttempts++; + + // Give up after extended polling (~50 min). A new ensureConnected() call + // from a tool request will restart the cycle if needed. + const maxTotalAttempts = 100; + if (this.reconnectAttempts > maxTotalAttempts) { + logger.info( + `[${serverName}] Giving up after ${maxTotalAttempts} attempts. Will retry on next tool call.`, + ); + this.reconnectAttempts = 0; + return; + } + + // Use aggressive backoff for first 10 attempts, then slow poll every 30s. + const delay = Math.min( + this.reconnectDelay * Math.pow(1.5, this.reconnectAttempts - 1), + 30000, + ); + + if (this.reconnectAttempts <= this.maxReconnectAttempts) { + logger.info( + `[${serverName}] Reconnecting in ${Math.round(delay)}ms (attempt ${ + this.reconnectAttempts + })`, + ); + } else if (this.reconnectAttempts % 10 === 0) { + // Log every 10th slow-poll attempt to avoid log spam + logger.info( + `[${serverName}] Still polling for native host (attempt ${this.reconnectAttempts})`, + ); + } + + this.reconnectTimer = setTimeout(() => { + this.reconnectTimer = null; + void this.connect(); + }, delay); + } + + private handleResponse(response: ToolResponse): void { + if (this.responseCallback) { + const callback = this.responseCallback; + this.responseCallback = null; + callback(response); + } + } + + public setNotificationHandler( + handler: (notification: Notification) => void, + ): void { + this.notificationHandler = handler; + } + + public async ensureConnected(): Promise { + const { serverName } = this.context; + + if (this.connected && this.socket) { + return true; + } + + if (!this.socket && !this.connecting) { + await this.connect(); + } + + // Wait for connection with timeout + return new Promise((resolve, reject) => { + let checkTimeoutId: NodeJS.Timeout | null = null; + + const timeout = setTimeout(() => { + if (checkTimeoutId) { + clearTimeout(checkTimeoutId); + } + reject( + new SocketConnectionError( + `[${serverName}] Connection attempt timed out after 5000ms`, + ), + ); + }, 5000); + + const checkConnection = () => { + if (this.connected) { + clearTimeout(timeout); + resolve(true); + } else { + checkTimeoutId = setTimeout(checkConnection, 500); + } + }; + checkConnection(); + }); + } + + private async sendRequest( + request: ToolRequest, + timeoutMs = 30000, + ): Promise { + const { serverName } = this.context; + + if (!this.socket) { + throw new SocketConnectionError( + `[${serverName}] Cannot send request: not connected`, + ); + } + + const socket = this.socket; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.responseCallback = null; + reject( + new SocketConnectionError( + `[${serverName}] Tool request timed out after ${timeoutMs}ms`, + ), + ); + }, timeoutMs); + + this.responseCallback = (response) => { + clearTimeout(timeout); + resolve(response); + }; + + const requestJson = JSON.stringify(request); + const requestBytes = Buffer.from(requestJson, "utf-8"); + + const lengthPrefix = Buffer.allocUnsafe(4); + lengthPrefix.writeUInt32LE(requestBytes.length, 0); + + const message = Buffer.concat([lengthPrefix, requestBytes]); + socket.write(message); + }); + } + + public async callTool( + name: string, + args: Record, + _permissionOverrides?: PermissionOverrides, + ): Promise { + const request: ToolRequest = { + method: "execute_tool", + params: { + client_id: this.context.clientTypeId, + tool: name, + args, + }, + }; + + return this.sendRequestWithRetry(request); + } + + /** + * Send a request with automatic retry on connection errors. + * + * On connection error or timeout, the native host may be a zombie (connected + * to dead Chrome). Force reconnect to pick up a fresh native host process + * and retry once. + */ + private async sendRequestWithRetry(request: ToolRequest): Promise { + const { serverName, logger } = this.context; + + try { + return await this.sendRequest(request); + } catch (error) { + if (!(error instanceof SocketConnectionError)) { + throw error; + } + + logger.info( + `[${serverName}] Connection error, forcing reconnect and retrying: ${error.message}`, + ); + + this.closeSocket(); + await this.ensureConnected(); + + return await this.sendRequest(request); + } + } + + public async setPermissionMode( + _mode: PermissionMode, + _allowedDomains?: string[], + ): Promise { + // No-op: permission mode is only supported over the bridge (WebSocket) transport + } + + public isConnected(): boolean { + return this.connected; + } + + private closeSocket(): void { + if (this.socket) { + this.socket.removeAllListeners(); + this.socket.end(); + this.socket.destroy(); + this.socket = null; + } + this.connected = false; + this.connecting = false; + } + + private cleanup(): void { + if (this.reconnectTimer) { + clearTimeout(this.reconnectTimer); + this.reconnectTimer = null; + } + + this.closeSocket(); + this.reconnectAttempts = 0; + this.responseBuffer = Buffer.alloc(0); + this.responseCallback = null; + } + + public disconnect(): void { + this.cleanup(); + } + + private async validateSocketSecurity(socketPath: string): Promise { + const { serverName, logger } = this.context; + if (platform() === "win32") { + return; + } + try { + // Validate the parent directory permissions if it's the socket directory + // (not /tmp itself, which has mode 1777 for legacy single-socket paths) + const dirPath = dirname(socketPath); + const dirBasename = dirPath.split("/").pop() || ""; + const isSocketDir = dirBasename.startsWith("claude-mcp-browser-bridge-"); + if (isSocketDir) { + try { + const dirStats = await fsPromises.stat(dirPath); + if (dirStats.isDirectory()) { + const dirMode = dirStats.mode & 0o777; + if (dirMode !== 0o700) { + throw new Error( + `[${serverName}] Insecure socket directory permissions: ${dirMode.toString( + 8, + )} (expected 0700). Directory may have been tampered with.`, + ); + } + const currentUid = process.getuid?.(); + if (currentUid !== undefined && dirStats.uid !== currentUid) { + throw new Error( + `Socket directory not owned by current user (uid: ${currentUid}, dir uid: ${dirStats.uid}). ` + + `Potential security risk.`, + ); + } + } + } catch (dirError) { + if ((dirError as NodeJS.ErrnoException).code !== "ENOENT") { + throw dirError; + } + // Directory doesn't exist yet - native host will create it + } + } + + const stats = await fsPromises.stat(socketPath); + + if (!stats.isSocket()) { + throw new Error( + `[${serverName}] Path exists but it's not a socket: ${socketPath}`, + ); + } + + const mode = stats.mode & 0o777; + if (mode !== 0o600) { + throw new Error( + `[${serverName}] Insecure socket permissions: ${mode.toString( + 8, + )} (expected 0600). Socket may have been tampered with.`, + ); + } + + const currentUid = process.getuid?.(); + if (currentUid !== undefined && stats.uid !== currentUid) { + throw new Error( + `Socket not owned by current user (uid: ${currentUid}, socket uid: ${stats.uid}). ` + + `Potential security risk.`, + ); + } + + logger.info(`[${serverName}] Socket security validation passed`); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === "ENOENT") { + logger.info( + `[${serverName}] Socket not found, will be created by server`, + ); + return; + } + throw error; + } + } +} + +export function createMcpSocketClient( + context: ClaudeForChromeContext, +): McpSocketClient { + return new McpSocketClient(context); +} + +export type { McpSocketClient }; diff --git a/packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts b/packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts new file mode 100644 index 000000000..9329fb6fa --- /dev/null +++ b/packages/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts @@ -0,0 +1,327 @@ +import { + createMcpSocketClient, + SocketConnectionError, +} from "./mcpSocketClient.js"; +import type { McpSocketClient } from "./mcpSocketClient.js"; +import type { + ClaudeForChromeContext, + PermissionMode, + PermissionOverrides, +} from "./types.js"; + +/** + * Manages connections to multiple Chrome native host sockets (one per Chrome profile). + * Routes tool calls to the correct socket based on tab ID. + * + * For `tabs_context_mcp`: queries all connected sockets and merges results. + * For other tools: routes based on the `tabId` argument using a routing table + * built from tabs_context_mcp responses. + */ +export class McpSocketPool { + private clients: Map = new Map(); + private tabRoutes: Map = new Map(); + private context: ClaudeForChromeContext; + private notificationHandler: + | ((notification: { method: string; params?: Record }) => void) + | null = null; + + constructor(context: ClaudeForChromeContext) { + this.context = context; + } + + public setNotificationHandler( + handler: (notification: { + method: string; + params?: Record; + }) => void, + ): void { + this.notificationHandler = handler; + for (const client of this.clients.values()) { + client.setNotificationHandler(handler); + } + } + + /** + * Discover available sockets and ensure at least one is connected. + */ + public async ensureConnected(): Promise { + const { logger, serverName } = this.context; + + this.refreshClients(); + + // Try to connect any disconnected clients + const connectPromises: Promise[] = []; + for (const client of this.clients.values()) { + if (!client.isConnected()) { + connectPromises.push( + client.ensureConnected().catch(() => false), + ); + } + } + + if (connectPromises.length > 0) { + await Promise.all(connectPromises); + } + + const connectedCount = this.getConnectedClients().length; + if (connectedCount === 0) { + logger.info(`[${serverName}] No connected sockets in pool`); + return false; + } + + logger.info(`[${serverName}] Socket pool: ${connectedCount} connected`); + return true; + } + + /** + * Call a tool, routing to the correct socket based on tab ID. + * For tabs_context_mcp, queries all sockets and merges results. + */ + public async callTool( + name: string, + args: Record, + _permissionOverrides?: PermissionOverrides, + ): Promise { + if (name === "tabs_context_mcp") { + return this.callTabsContext(args); + } + + // Route by tabId if present + const tabId = args.tabId as number | undefined; + if (tabId !== undefined) { + const socketPath = this.tabRoutes.get(tabId); + if (socketPath) { + const client = this.clients.get(socketPath); + if (client?.isConnected()) { + return client.callTool(name, args); + } + } + // Tab route not found or client disconnected — fall through to any connected + } + + // Fallback: use first connected client + const connected = this.getConnectedClients(); + if (connected.length === 0) { + throw new SocketConnectionError( + `[${this.context.serverName}] No connected sockets available`, + ); + } + return connected[0]!.callTool(name, args); + } + + public async setPermissionMode( + mode: PermissionMode, + allowedDomains?: string[], + ): Promise { + const connected = this.getConnectedClients(); + await Promise.all( + connected.map((client) => client.setPermissionMode(mode, allowedDomains)), + ); + } + + public isConnected(): boolean { + return this.getConnectedClients().length > 0; + } + + public disconnect(): void { + for (const client of this.clients.values()) { + client.disconnect(); + } + this.clients.clear(); + this.tabRoutes.clear(); + } + + private getConnectedClients(): McpSocketClient[] { + return [...this.clients.values()].filter((c) => c.isConnected()); + } + + /** + * Query all connected sockets for tabs and merge results. + * Updates the tab routing table. + */ + private async callTabsContext( + args: Record, + ): Promise { + const { logger, serverName } = this.context; + const connected = this.getConnectedClients(); + + if (connected.length === 0) { + throw new SocketConnectionError( + `[${serverName}] No connected sockets available`, + ); + } + + // If only one client, skip merging overhead + if (connected.length === 1) { + const result = await connected[0]!.callTool("tabs_context_mcp", args); + this.updateTabRoutes(result, this.getSocketPathForClient(connected[0]!)); + return result; + } + + // Query all connected clients in parallel + const results = await Promise.allSettled( + connected.map(async (client) => { + const result = await client.callTool("tabs_context_mcp", args); + const socketPath = this.getSocketPathForClient(client); + return { result, socketPath }; + }), + ); + + // Merge tab results + const mergedTabs: unknown[] = []; + this.tabRoutes.clear(); + + for (const settledResult of results) { + if (settledResult.status !== "fulfilled") { + logger.info( + `[${serverName}] tabs_context_mcp failed on one socket: ${settledResult.reason}`, + ); + continue; + } + + const { result, socketPath } = settledResult.value; + this.updateTabRoutes(result, socketPath); + + const tabs = this.extractTabs(result); + if (tabs) { + mergedTabs.push(...tabs); + } + } + + // Return merged result in the same format as the extension response + if (mergedTabs.length > 0) { + const tabListText = mergedTabs + .map((t) => { + const tab = t as { tabId: number; title: string; url: string }; + return ` • tabId ${tab.tabId}: "${tab.title}" (${tab.url})`; + }) + .join("\n"); + + return { + result: { + content: [ + { + type: "text", + text: JSON.stringify({ availableTabs: mergedTabs }), + }, + { + type: "text", + text: `\n\nTab Context:\n- Available tabs:\n${tabListText}`, + }, + ], + }, + }; + } + + // Fallback: return first successful result as-is + for (const settledResult of results) { + if (settledResult.status === "fulfilled") { + return settledResult.value.result; + } + } + + throw new SocketConnectionError( + `[${serverName}] All sockets failed for tabs_context_mcp`, + ); + } + + /** + * Extract tab objects from a tool response to update routing table. + */ + private updateTabRoutes(result: unknown, socketPath: string): void { + const tabs = this.extractTabs(result); + if (!tabs) return; + + for (const tab of tabs) { + if (typeof tab === "object" && tab !== null && "tabId" in tab) { + const tabId = (tab as { tabId: number }).tabId; + this.tabRoutes.set(tabId, socketPath); + } + } + } + + private extractTabs(result: unknown): unknown[] | null { + if (!result || typeof result !== "object") return null; + + // Response format: { result: { content: [{ type: "text", text: "{\"availableTabs\":[...],\"tabGroupId\":...}" }] } } + const asResponse = result as { + result?: { content?: Array<{ type: string; text?: string }> }; + }; + const content = asResponse.result?.content; + if (!content || !Array.isArray(content)) return null; + + for (const item of content) { + if (item.type === "text" && item.text) { + try { + const parsed = JSON.parse(item.text); + if (Array.isArray(parsed)) return parsed; + // Handle { availableTabs: [...] } format + if (parsed && Array.isArray(parsed.availableTabs)) { + return parsed.availableTabs; + } + } catch { + // Not JSON, skip + } + } + } + return null; + } + + private getSocketPathForClient(client: McpSocketClient): string { + for (const [path, c] of this.clients.entries()) { + if (c === client) return path; + } + return ""; + } + + /** + * Scan for available sockets and create/remove clients as needed. + */ + private refreshClients(): void { + const socketPaths = this.getAvailableSocketPaths(); + const { logger, serverName } = this.context; + + // Add new clients for newly discovered sockets + for (const path of socketPaths) { + if (!this.clients.has(path)) { + logger.info(`[${serverName}] Adding socket to pool: ${path}`); + const clientContext: ClaudeForChromeContext = { + ...this.context, + socketPath: path, + getSocketPath: undefined, + getSocketPaths: undefined, + }; + const client = createMcpSocketClient(clientContext); + client.disableAutoReconnect = true; + if (this.notificationHandler) { + client.setNotificationHandler(this.notificationHandler); + } + this.clients.set(path, client); + } + } + + // Remove clients for sockets that no longer exist + for (const [path, client] of this.clients.entries()) { + if (!socketPaths.includes(path)) { + logger.info(`[${serverName}] Removing stale socket from pool: ${path}`); + client.disconnect(); + this.clients.delete(path); + for (const [tabId, socketPath] of this.tabRoutes.entries()) { + if (socketPath === path) { + this.tabRoutes.delete(tabId); + } + } + } + } + } + + private getAvailableSocketPaths(): string[] { + return this.context.getSocketPaths?.() ?? []; + } +} + +export function createMcpSocketPool( + context: ClaudeForChromeContext, +): McpSocketPool { + return new McpSocketPool(context); +} diff --git a/packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts b/packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts new file mode 100644 index 000000000..65bd8d178 --- /dev/null +++ b/packages/@ant/claude-for-chrome-mcp/src/toolCalls.ts @@ -0,0 +1,301 @@ +import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; + +import { SocketConnectionError } from "./mcpSocketClient.js"; +import type { + ClaudeForChromeContext, + PermissionMode, + PermissionOverrides, + SocketClient, +} from "./types.js"; + +export const handleToolCall = async ( + context: ClaudeForChromeContext, + socketClient: SocketClient, + name: string, + args: Record, + permissionOverrides?: PermissionOverrides, +): Promise => { + // Handle permission mode changes locally (not forwarded to extension) + if (name === "set_permission_mode") { + return handleSetPermissionMode(socketClient, args); + } + + // Handle switch_browser outside the normal tool call flow (manages its own connection) + if (name === "switch_browser") { + return handleSwitchBrowser(context, socketClient); + } + + try { + const isConnected = await socketClient.ensureConnected(); + + context.logger.silly( + `[${context.serverName}] Server is connected: ${isConnected}. Received tool call: ${name} with args: ${JSON.stringify(args)}.`, + ); + + if (isConnected) { + return await handleToolCallConnected( + context, + socketClient, + name, + args, + permissionOverrides, + ); + } + + return handleToolCallDisconnected(context); + } catch (error) { + context.logger.info(`[${context.serverName}] Error calling tool:`, error); + + if (error instanceof SocketConnectionError) { + return handleToolCallDisconnected(context); + } + + return { + content: [ + { + type: "text", + text: `Error calling tool, please try again. : ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } +}; + +async function handleToolCallConnected( + context: ClaudeForChromeContext, + socketClient: SocketClient, + name: string, + args: Record, + permissionOverrides?: PermissionOverrides, +): Promise { + const response = await socketClient.callTool(name, args, permissionOverrides); + + context.logger.silly( + `[${context.serverName}] Received result from socket bridge: ${JSON.stringify(response)}`, + ); + + if (response === null || response === undefined) { + return { + content: [{ type: "text", text: "Tool execution completed" }], + }; + } + + // Response will have either result or error field + const { result, error } = response as { + result?: { content: unknown[] | string }; + error?: { content: unknown[] | string }; + }; + + // Determine which field has the content and whether it's an error + const contentData = error || result; + const isError = !!error; + + if (!contentData) { + return { + content: [{ type: "text", text: "Tool execution completed" }], + }; + } + + if (isError && isAuthenticationError(contentData.content)) { + context.onAuthenticationError(); + } + + const { content } = contentData; + + if (content && Array.isArray(content)) { + if (isError) { + return { + content: content.map((item: unknown) => { + if (typeof item === "object" && item !== null && "type" in item) { + return item; + } + + return { type: "text", text: String(item) }; + }), + isError: true, + } as CallToolResult; + } + + const convertedContent = content.map((item: unknown) => { + if ( + typeof item === "object" && + item !== null && + "type" in item && + "source" in item + ) { + const typedItem = item; + if ( + typedItem.type === "image" && + typeof typedItem.source === "object" && + typedItem.source !== null && + "data" in typedItem.source + ) { + return { + type: "image", + data: typedItem.source.data, + mimeType: + "media_type" in typedItem.source + ? typedItem.source.media_type || "image/png" + : "image/png", + }; + } + } + + if (typeof item === "object" && item !== null && "type" in item) { + return item; + } + + return { type: "text", text: String(item) }; + }); + + return { + content: convertedContent, + isError, + } as CallToolResult; + } + + // Handle string content + if (typeof content === "string") { + return { + content: [{ type: "text", text: content }], + isError, + } as CallToolResult; + } + + // Fallback for unexpected result format + context.logger.warn( + `[${context.serverName}] Unexpected result format from socket bridge`, + response, + ); + + return { + content: [{ type: "text", text: JSON.stringify(response) }], + isError, + }; +} + +function handleToolCallDisconnected( + context: ClaudeForChromeContext, +): CallToolResult { + const text = context.onToolCallDisconnected(); + return { + content: [{ type: "text", text }], + }; +} + +/** + * Handle set_permission_mode tool call locally. + * This is security-sensitive as it controls whether permission prompts are shown. + */ +async function handleSetPermissionMode( + socketClient: SocketClient, + args: Record, +): Promise { + // Validate permission mode at runtime + const validModes = [ + "ask", + "skip_all_permission_checks", + "follow_a_plan", + ] as const; + const mode = args.mode as string | undefined; + const permissionMode: PermissionMode = + mode && validModes.includes(mode as PermissionMode) + ? (mode as PermissionMode) + : "ask"; + + if (socketClient.setPermissionMode) { + await socketClient.setPermissionMode( + permissionMode, + args.allowed_domains as string[] | undefined, + ); + } + + return { + content: [ + { type: "text", text: `Permission mode set to: ${permissionMode}` }, + ], + }; +} + +/** + * Handle switch_browser tool call. Broadcasts a pairing request and blocks + * until a browser responds or timeout. + */ +async function handleSwitchBrowser( + context: ClaudeForChromeContext, + socketClient: SocketClient, +): Promise { + if (!context.bridgeConfig) { + return { + content: [ + { + type: "text", + text: "Browser switching is only available with bridge connections.", + }, + ], + isError: true, + }; + } + + const isConnected = await socketClient.ensureConnected(); + if (!isConnected) { + return handleToolCallDisconnected(context); + } + + const result = (await socketClient.switchBrowser?.()) ?? null; + + if (result === "no_other_browsers") { + return { + content: [ + { + type: "text", + text: "No other browsers available to switch to. Open Chrome with the Claude extension in another browser to switch.", + }, + ], + isError: true, + }; + } + + if (result) { + return { + content: [ + { type: "text", text: `Connected to browser "${result.name}".` }, + ], + }; + } + + return { + content: [ + { + type: "text", + text: "No browser responded within the timeout. Make sure Chrome is open with the Claude extension installed, then try again.", + }, + ], + isError: true, + }; +} + +/** + * Check if the error content indicates an authentication issue + */ +function isAuthenticationError(content: unknown[] | string): boolean { + const errorText = Array.isArray(content) + ? content + .map((item) => { + if (typeof item === "string") return item; + if ( + typeof item === "object" && + item !== null && + "text" in item && + typeof item.text === "string" + ) { + return item.text; + } + return ""; + }) + .join(" ") + : String(content); + + return errorText.toLowerCase().includes("re-authenticated"); +} diff --git a/packages/@ant/claude-for-chrome-mcp/src/types.ts b/packages/@ant/claude-for-chrome-mcp/src/types.ts new file mode 100644 index 000000000..0334882b9 --- /dev/null +++ b/packages/@ant/claude-for-chrome-mcp/src/types.ts @@ -0,0 +1,134 @@ +export interface Logger { + info: (message: string, ...args: unknown[]) => void; + error: (message: string, ...args: unknown[]) => void; + warn: (message: string, ...args: unknown[]) => void; + debug: (message: string, ...args: unknown[]) => void; + silly: (message: string, ...args: unknown[]) => void; +} + +export type PermissionMode = + | "ask" + | "skip_all_permission_checks" + | "follow_a_plan"; + +export interface BridgeConfig { + /** Bridge WebSocket base URL (e.g., wss://bridge.claudeusercontent.com) */ + url: string; + /** Returns the user's account UUID for the connection path */ + getUserId: () => Promise; + /** Returns a valid OAuth token for bridge authentication */ + getOAuthToken: () => Promise; + /** Optional dev user ID for local development (bypasses OAuth) */ + devUserId?: string; +} + +/** Metadata about a connected Chrome extension instance. */ +export interface ChromeExtensionInfo { + deviceId: string; + osPlatform?: string; + connectedAt: number; + name?: string; +} + +export interface ClaudeForChromeContext { + serverName: string; + logger: Logger; + socketPath: string; + // Optional dynamic resolver for socket path. When provided, called on each + // connection attempt to handle runtime conditions (e.g., TMPDIR mismatch). + getSocketPath?: () => string; + // Optional resolver returning all available socket paths (for multi-profile support). + // When provided, a socket pool connects to all sockets and routes by tab ID. + getSocketPaths?: () => string[]; + clientTypeId: string; // "desktop" | "claude-code" + onToolCallDisconnected: () => string; + onAuthenticationError: () => void; + isDisabled?: () => boolean; + /** Bridge WebSocket configuration. When provided, uses bridge instead of socket. */ + bridgeConfig?: BridgeConfig; + /** If set, permission mode is sent to the extension immediately on bridge connection. */ + initialPermissionMode?: PermissionMode; + /** Optional callback to track telemetry events for bridge connections */ + trackEvent?: ( + eventName: K, + metadata: Record | null, + ) => void; + /** Called when user pairs with an extension via the browser pairing flow. */ + onExtensionPaired?: (deviceId: string, name: string) => void; + /** Returns the previously paired deviceId, if any. */ + getPersistedDeviceId?: () => string | undefined; + /** Called when a remote extension is auto-selected (only option available). */ + onRemoteExtensionWarning?: (ext: ChromeExtensionInfo) => void; +} + +/** + * Map Node's process.platform to the platform string reported by Chrome extensions + * via navigator.userAgentData.platform. + */ +export function localPlatformLabel(): string { + return process.platform === "darwin" + ? "macOS" + : process.platform === "win32" + ? "Windows" + : "Linux"; +} + +/** Permission request forwarded from the extension to the desktop for user approval. */ +export interface BridgePermissionRequest { + /** Links to the pending tool_call */ + toolUseId: string; + /** Unique ID for this permission request */ + requestId: string; + /** Tool type, e.g. "navigate", "click", "execute_javascript" */ + toolType: string; + /** The URL/domain context */ + url: string; + /** Additional action data (click coordinates, text, etc.) */ + actionData?: Record; +} + +/** Desktop response to a bridge permission request. */ +export interface BridgePermissionResponse { + requestId: string; + allowed: boolean; +} + +/** Per-call permission overrides, allowing each session to use its own permission state. */ +export interface PermissionOverrides { + permissionMode: PermissionMode; + allowedDomains?: string[]; + /** Callback invoked when the extension requests user permission via the bridge. */ + onPermissionRequest?: (request: BridgePermissionRequest) => Promise; +} + +/** Shared interface for McpSocketClient and McpSocketPool */ +export interface SocketClient { + ensureConnected(): Promise; + callTool( + name: string, + args: Record, + permissionOverrides?: PermissionOverrides, + ): Promise; + isConnected(): boolean; + disconnect(): void; + setNotificationHandler( + handler: (notification: { + method: string; + params?: Record; + }) => void, + ): void; + /** Set permission mode for the current session. Only effective on BridgeClient. */ + setPermissionMode?( + mode: PermissionMode, + allowedDomains?: string[], + ): Promise; + /** Switch to a different browser. Only available on BridgeClient. */ + switchBrowser?(): Promise< + | { + deviceId: string; + name: string; + } + | "no_other_browsers" + | null + >; +}