diff --git a/config.example.json b/config.example.json index f2add227..4f8059f5 100644 --- a/config.example.json +++ b/config.example.json @@ -250,6 +250,19 @@ "defaultContextWindow": 128000, "defaultMaxTokens": 8192 }, + "browser": { + "cloudProvider": "none", + "browserUse": { + "baseUrl": "https://api.browser-use.com/api/v3", + "defaultModel": "claude-sonnet-4.6", + "defaultProxyCountry": "us", + "enableRecording": false, + "maxCostPerTaskUsd": 1, + "maxSessionTimeoutMinutes": 30, + "preferAgentMode": true, + "deterministicRerun": true + } + }, "auxiliaryModels": { "vision": { "provider": "auto", diff --git a/container/package-lock.json b/container/package-lock.json index ca860d6b..ff34632a 100644 --- a/container/package-lock.json +++ b/container/package-lock.json @@ -11,6 +11,7 @@ "@modelcontextprotocol/sdk": "1.27.1", "@mozilla/readability": "0.6.0", "agent-browser": "0.15.1", + "browser-use-sdk": "^3.4.3", "jszip": "3.10.1", "linkedom": "0.18.12", "pdf-lib": "1.17.1", @@ -998,6 +999,25 @@ "balanced-match": "^1.0.0" } }, + "node_modules/browser-use-sdk": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/browser-use-sdk/-/browser-use-sdk-3.4.3.tgz", + "integrity": "sha512-4z4sVHCfboa4zq3a+wcdbhG9yhQH5drRaOuWeHwpZnBGwDdpGmyNnHDvbufFPtTJves8fvgbV10oGjRRdeU1yg==", + "license": "MIT", + "dependencies": { + "dotenv": "^17.2.4", + "zod": "^4.3.6" + } + }, + "node_modules/browser-use-sdk/node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/buffer": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz", @@ -1567,6 +1587,18 @@ "url": "https://github.com/fb55/domutils?sponsor=1" } }, + "node_modules/dotenv": { + "version": "17.4.2", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz", + "integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", diff --git a/container/package.json b/container/package.json index 3cd3d2f8..2dead042 100644 --- a/container/package.json +++ b/container/package.json @@ -20,6 +20,7 @@ "@modelcontextprotocol/sdk": "1.27.1", "@mozilla/readability": "0.6.0", "agent-browser": "0.15.1", + "browser-use-sdk": "^3.4.3", "jszip": "3.10.1", "linkedom": "0.18.12", "pdf-lib": "1.17.1", diff --git a/container/src/browser-tools.ts b/container/src/browser-tools.ts index 766402c3..2408b600 100644 --- a/container/src/browser-tools.ts +++ b/container/src/browser-tools.ts @@ -7,6 +7,7 @@ import os from 'node:os'; import path from 'node:path'; import { promisify } from 'node:util'; +import { browserUseProvider } from './browser-use-provider.js'; import { callAuxiliaryModel } from './providers/auxiliary.js'; import { DISCORD_MEDIA_CACHE_ROOT_DISPLAY, @@ -199,6 +200,15 @@ type BrowserSession = { lastUsedAt: number; }; +type BrowserExecutionStrategy = 'local-cdp' | 'cloud-cdp' | 'cloud-agent'; + +type ResolvedCdpConnection = { + strategy: BrowserExecutionStrategy; + cdpUrl?: string; + liveUrl?: string | null; + cloudSessionId?: string; +}; + type BrowserVisionContext = BrowserModelContext & { isLocal?: boolean; contextWindow?: number; @@ -345,7 +355,7 @@ function resolveSharedProfileDir(): string | undefined { } } -function resolveCdpUrl(explicit?: string): string | undefined { +function resolveExplicitCdpUrl(explicit?: string): string | undefined { const direct = String(explicit || '').trim(); if (direct) return direct; const configured = String(process.env.BROWSER_CDP_URL || '').trim(); @@ -551,50 +561,66 @@ async function terminateSessionProcess(session: BrowserSession): Promise { await terminateProcess(readSessionPid(session)); } -async function closeSession( - sessionId: string, - options: { createIfMissing?: boolean } = {}, -): Promise { +async function closeSession(sessionId: string): Promise<{ + warning: string | null; + artifacts: Array<{ path: string; filename: string; mimeType: string }>; +} | null> { const sessionKey = normalizeSessionKey(sessionId); - const session = options.createIfMissing - ? getSession(sessionKey) - : activeSessions.get(sessionKey); - if (!session) return null; - const pidBeforeClose = readSessionPid(session); - - const result = await runAgentBrowser(session.sessionKey, 'close', [], { - timeoutMs: BROWSER_CLOSE_TIMEOUT_MS, - }); - if (result.success) { - let warning: string | null = null; - try { - await terminateProcess(pidBeforeClose); - } catch (err) { - warning = - err instanceof Error && err.message - ? `daemon termination failed: ${err.message}` - : 'daemon termination failed'; - } finally { - removeSessionResources(session); + const session = activeSessions.get(sessionKey); + let localWarning: string | null = null; + if (session) { + const pidBeforeClose = readSessionPid(session); + const result = await runAgentBrowser(session.sessionKey, 'close', [], { + timeoutMs: BROWSER_CLOSE_TIMEOUT_MS, + }); + if (result.success) { + try { + await terminateProcess(pidBeforeClose); + } catch (err) { + localWarning = + err instanceof Error && err.message + ? `daemon termination failed: ${err.message}` + : 'daemon termination failed'; + } finally { + removeSessionResources(session); + } + } else { + try { + await terminateSessionProcess(session); + } catch { + // Best effort fallback. The warning below preserves the original error. + } finally { + removeSessionResources(session); + } + localWarning = result.error || 'session close returned non-success'; } - return warning; } - try { - await terminateSessionProcess(session); - } catch { - // Best effort fallback. The warning below preserves the original error. - } finally { - removeSessionResources(session); - } + const cloudResult = await browserUseProvider.closeLocalSession( + sessionKey, + BROWSER_ARTIFACT_ROOT, + ); + const warnings = [ + ...(localWarning ? [localWarning] : []), + ...cloudResult.warnings, + ]; - return result.error || 'session close returned non-success'; + if (!session && warnings.length === 0 && cloudResult.artifacts.length === 0) { + return null; + } + return { + warning: warnings.length > 0 ? warnings.join('; ') : null, + artifacts: cloudResult.artifacts, + }; } export async function cleanupAllBrowserSessions(): Promise { - const sessions = Array.from(activeSessions.values()); - for (const session of sessions) { - await closeSession(session.sessionKey); + const tracked = new Set([ + ...Array.from(activeSessions.keys()), + ...browserUseProvider.getTrackedSessionIds(), + ]); + for (const sessionKey of tracked) { + await closeSession(sessionKey); } } @@ -687,6 +713,108 @@ async function assertNavigationUrl(raw: unknown): Promise { return parsed; } +function extractTaskUrlCandidates(task: string): string[] { + const matches = task.match(/https?:\/\/[^\s)>"'`]+/gi) || []; + const out: string[] = []; + const seen = new Set(); + for (const match of matches) { + const normalized = match.trim().replace(/[),.;:]+$/, ''); + if (!normalized || seen.has(normalized)) continue; + seen.add(normalized); + out.push(normalized); + } + return out; +} + +async function assertCloudAgentTaskAllowed(task: string): Promise { + const candidates = extractTaskUrlCandidates(task); + for (const candidate of candidates) { + const parsed = await assertNavigationUrl(candidate); + if (await isPrivateHost(parsed.hostname)) { + throw new Error( + `Browser Use cloud task blocked: private or loopback host (${parsed.hostname}). Use the local browser tools flow instead.`, + ); + } + } + + const normalizedTask = task.toLowerCase(); + if ( + normalizedTask.includes('localhost') || + normalizedTask.includes('127.0.0.1') || + normalizedTask.includes('.local') + ) { + throw new Error( + 'Browser Use cloud task blocked: localhost/private targets are not allowed in cloud agent mode.', + ); + } +} + +async function resolveExecutionStrategy(params: { + kind: 'cdp' | 'agent'; + sessionId?: string; + url?: URL; + proxyCountry?: string; + timeoutMinutes?: number; +}): Promise { + const explicitCdpUrl = resolveExplicitCdpUrl(); + if (params.kind === 'agent') { + return browserUseProvider.isEnabled() ? 'cloud-agent' : 'local-cdp'; + } + if (explicitCdpUrl) return 'local-cdp'; + if (!browserUseProvider.isEnabled()) return 'local-cdp'; + if (params.url && (await isPrivateHost(params.url.hostname))) { + return 'local-cdp'; + } + return browserUseProvider.shouldUseCloudCdp({ + localSessionId: normalizeSessionKey(params.sessionId || 'default'), + proxyCountry: params.proxyCountry, + timeoutMinutes: params.timeoutMinutes, + }) + ? 'cloud-cdp' + : 'local-cdp'; +} + +async function resolveCdpConnection( + sessionId: string, + options: { + explicitCdpUrl?: string; + url?: URL; + proxyCountry?: string; + timeoutMinutes?: number; + } = {}, +): Promise { + const direct = resolveExplicitCdpUrl(options.explicitCdpUrl); + if (direct) { + return { + strategy: 'local-cdp', + cdpUrl: direct, + }; + } + + const strategy = await resolveExecutionStrategy({ + kind: 'cdp', + sessionId, + url: options.url, + proxyCountry: options.proxyCountry, + timeoutMinutes: options.timeoutMinutes, + }); + if (strategy !== 'cloud-cdp') { + return { strategy }; + } + + const cloud = await browserUseProvider.ensureCdpSession({ + localSessionId: normalizeSessionKey(sessionId), + proxyCountry: options.proxyCountry, + timeoutMinutes: options.timeoutMinutes, + }); + return { + strategy, + cdpUrl: cloud.cdpUrl, + liveUrl: cloud.liveUrl, + cloudSessionId: cloud.id, + }; +} + function truncateSnapshot(text: string): { text: string; truncated: boolean } { if (text.length <= BROWSER_MAX_SNAPSHOT_CHARS) { return { text, truncated: false }; @@ -829,6 +957,17 @@ function asRecord(value: unknown): Record | null { return value as Record; } +function normalizeOutputSchema( + value: unknown, +): Record | undefined { + const schema = asRecord(value); + return schema ? { ...schema } : undefined; +} + +function normalizeArtifactPaths(value: unknown): string[] { + return normalizeStringList(value, 32); +} + function normalizeSnapshotMode(rawMode: unknown): SnapshotMode { if (rawMode == null || String(rawMode).trim() === '') return 'default'; const mode = String(rawMode).trim().toLowerCase(); @@ -1290,7 +1429,12 @@ async function runAgentBrowser( sessionId: string, command: string, commandArgs: string[] = [], - options: { timeoutMs?: number; cdpUrl?: string } = {}, + options: { + timeoutMs?: number; + cdpUrl?: string; + proxyCountry?: string; + timeoutMinutes?: number; + } = {}, ): Promise<{ success: boolean; data?: unknown; error?: string }> { const runner = resolveRunner(); if (!runner) { @@ -1314,7 +1458,12 @@ async function runAgentBrowser( resolvePlaywrightBrowsersPath(), ); const args = [...runner.prefixArgs]; - const cdpUrl = resolveCdpUrl(options.cdpUrl); + const cdpConnection = await resolveCdpConnection(session.sessionKey, { + explicitCdpUrl: options.cdpUrl, + proxyCountry: options.proxyCountry, + timeoutMinutes: options.timeoutMinutes, + }); + const cdpUrl = cdpConnection.cdpUrl; if (cdpUrl) { args.push('--cdp', cdpUrl); } @@ -1414,11 +1563,29 @@ export async function executeBrowserTool( switch (name) { case 'browser_navigate': { const parsed = await assertNavigationUrl(args.url); + const connection = await resolveCdpConnection(effectiveSessionId, { + url: parsed, + proxyCountry: String(args.proxy_country || '').trim() || undefined, + timeoutMinutes: + typeof args.timeout_minutes === 'number' && + Number.isFinite(args.timeout_minutes) + ? Math.floor(args.timeout_minutes) + : undefined, + }); const result = await runAgentBrowser( effectiveSessionId, 'open', [parsed.toString()], - { timeoutMs: 60_000 }, + { + timeoutMs: 60_000, + cdpUrl: connection.cdpUrl, + proxyCountry: String(args.proxy_country || '').trim() || undefined, + timeoutMinutes: + typeof args.timeout_minutes === 'number' && + Number.isFinite(args.timeout_minutes) + ? Math.floor(args.timeout_minutes) + : undefined, + }, ); if (!result.success) return failure(result.error || 'navigation failed'); @@ -1456,6 +1623,11 @@ export async function executeBrowserTool( url: data.url || parsed.toString(), title, session_id: effectiveSessionId, + execution_strategy: connection.strategy, + ...(connection.cloudSessionId + ? { cloud_session_id: connection.cloudSessionId } + : {}), + ...(connection.liveUrl ? { live_url: connection.liveUrl } : {}), content_text_length: contentLength, ...(contentPreview ? { content_preview: contentPreview } : {}), ...(contentPreview @@ -1469,6 +1641,66 @@ export async function executeBrowserTool( }); } + case 'browser_agent_task': { + const task = String(args.task || '').trim(); + if (!task) return failure('task is required'); + await assertCloudAgentTaskAllowed(task); + const strategy = await resolveExecutionStrategy({ kind: 'agent' }); + if (strategy !== 'cloud-agent') { + return failure( + 'browser_agent_task requires Browser Use cloud. Configure browser.cloudProvider="browser-use" and BROWSER_USE_API_KEY, or use the step-by-step browser_* tools instead.', + ); + } + const outputSchema = normalizeOutputSchema(args.output_schema); + const artifactPaths = normalizeArtifactPaths(args.artifact_paths); + const agentResult = await browserUseProvider.runAgentTask({ + localSessionId: effectiveSessionId, + task, + outputSchema, + artifactPaths, + sessionId: String(args.session_id || '').trim() || undefined, + proxyCountry: String(args.proxy_country || '').trim() || undefined, + model: String(args.model || '').trim() || undefined, + artifactRoot: BROWSER_ARTIFACT_ROOT, + progress: (message) => { + console.error(`[tool] browser_agent_task: ${message}`); + }, + }); + return success({ + execution_strategy: strategy, + session_id: agentResult.sessionId, + status: agentResult.status, + is_task_successful: agentResult.isTaskSuccessful, + output: agentResult.output, + ...(agentResult.outputText + ? { output_text: agentResult.outputText } + : {}), + step_count: agentResult.stepCount, + ...(agentResult.lastStepSummary + ? { last_step_summary: agentResult.lastStepSummary } + : {}), + ...(agentResult.liveUrl ? { live_url: agentResult.liveUrl } : {}), + ...(agentResult.profileId + ? { profile_id: agentResult.profileId } + : {}), + ...(agentResult.workspaceId + ? { workspace_id: agentResult.workspaceId } + : {}), + workspace_artifact_paths: agentResult.workspaceArtifactPaths, + llm_cost_usd: agentResult.llmCostUsd, + proxy_cost_usd: agentResult.proxyCostUsd, + browser_cost_usd: agentResult.browserCostUsd, + total_cost_usd: agentResult.totalCostUsd, + total_input_tokens: agentResult.totalInputTokens, + total_output_tokens: agentResult.totalOutputTokens, + ...(agentResult.screenshotUrl + ? { screenshot_url: agentResult.screenshotUrl } + : {}), + recording_paths: agentResult.recordingPaths, + artifacts: agentResult.artifacts, + }); + } + case 'browser_snapshot': { const mode = normalizeSnapshotMode(args.mode); const full = args.full === true; @@ -1760,6 +1992,68 @@ export async function executeBrowserTool( return success({ count: images.length, images }); } + case 'browser_save_profile': { + if (!browserUseProvider.isEnabled()) { + return failure( + 'Browser Use cloud profiles are not configured. Set browser.cloudProvider="browser-use" and provide BROWSER_USE_API_KEY.', + ); + } + const profile = await browserUseProvider.createProfile({ + localSessionId: effectiveSessionId, + name: String(args.name || '').trim() || undefined, + userId: String(args.user_id || '').trim() || undefined, + }); + return success({ + profile_id: profile.profile.id, + ...(profile.profile.name ? { name: profile.profile.name } : {}), + ...(profile.profile.userId + ? { user_id: profile.profile.userId } + : {}), + applies_to_current_session: profile.appliesToCurrentSession, + applies_to_next_session: profile.appliesToNextSession, + }); + } + + case 'browser_load_profile': { + if (!browserUseProvider.isEnabled()) { + return failure( + 'Browser Use cloud profiles are not configured. Set browser.cloudProvider="browser-use" and provide BROWSER_USE_API_KEY.', + ); + } + const profile = await browserUseProvider.loadProfile({ + localSessionId: effectiveSessionId, + profileId: String(args.profile_id || '').trim() || undefined, + query: String(args.query || args.name || '').trim() || undefined, + userId: String(args.user_id || '').trim() || undefined, + }); + return success({ + profile_id: profile.profile.id, + ...(profile.profile.name ? { name: profile.profile.name } : {}), + ...(profile.profile.userId + ? { user_id: profile.profile.userId } + : {}), + ...(profile.profile.cookieDomains + ? { cookie_domains: profile.profile.cookieDomains } + : {}), + applies_to_current_session: profile.appliesToCurrentSession, + applies_to_next_session: profile.appliesToNextSession, + }); + } + + case 'browser_get_recording': { + const artifacts = browserUseProvider.getLatestRecordingArtifacts( + normalizeSessionKey(effectiveSessionId), + ); + const recordingPaths = artifacts + .map((entry) => toWorkspaceRelativePath(entry.path)) + .filter((entry): entry is string => Boolean(entry)); + return success({ + count: recordingPaths.length, + recording_paths: recordingPaths, + artifacts, + }); + } + case 'browser_console': { const clear = args.clear === true; const commandArgs = clear ? ['--clear'] : []; @@ -1883,13 +2177,20 @@ export async function executeBrowserTool( } case 'browser_close': { - const warning = await closeSession(effectiveSessionId, { - createIfMissing: true, - }); - if (warning) { + const closed = await closeSession(effectiveSessionId); + if (closed && (closed.warning || closed.artifacts.length > 0)) { + const recordingPaths = closed.artifacts + .map((entry) => toWorkspaceRelativePath(entry.path)) + .filter((entry): entry is string => Boolean(entry)); return success({ closed: true, - warning, + ...(closed.warning ? { warning: closed.warning } : {}), + ...(closed.artifacts.length > 0 + ? { + recording_paths: recordingPaths, + artifacts: closed.artifacts, + } + : {}), }); } return success({ closed: true }); @@ -1917,11 +2218,67 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [ type: 'string', description: 'URL to open (http:// or https://)', }, + proxy_country: { + type: 'string', + description: + 'Optional two-letter proxy country override for Browser Use cloud sessions (for example "us" or "de").', + }, + timeout_minutes: { + type: 'number', + description: + 'Optional Browser Use cloud session timeout override in minutes.', + }, }, required: ['url'], }, }, }, + { + type: 'function', + function: { + name: 'browser_agent_task', + description: + 'Delegate a multi-step web task to Browser Use cloud agent mode. Prefer this over many step-by-step browser_* calls for workflows like extraction, checkout flows, settings updates, or structured scraping. Private or localhost targets are not allowed in cloud agent mode.', + parameters: { + type: 'object', + properties: { + task: { + type: 'string', + description: 'Natural-language task for the browser agent.', + }, + output_schema: { + type: 'object', + description: + 'Optional JSON Schema for the final structured output.', + }, + artifact_paths: { + type: 'array', + description: + 'Optional relative file paths under .browser-artifacts to upload into the Browser Use workspace for this task.', + items: { + type: 'string', + }, + }, + session_id: { + type: 'string', + description: + 'Optional Browser Use session id to reuse for follow-up tasks.', + }, + proxy_country: { + type: 'string', + description: + 'Optional two-letter proxy country override for a new cloud session.', + }, + model: { + type: 'string', + description: + 'Optional Browser Use model override. Defaults to the configured cloud model.', + }, + }, + required: ['task'], + }, + }, + }, { type: 'function', function: { @@ -2174,6 +2531,74 @@ export const BROWSER_TOOL_DEFINITIONS: ToolDefinition[] = [ }, }, }, + { + type: 'function', + function: { + name: 'browser_save_profile', + description: + 'Create a new Browser Use cloud profile and select it for this HybridClaw session. Run this before login-heavy cloud browser flows when you want future Browser Use sessions to preserve auth state.', + parameters: { + type: 'object', + properties: { + name: { + type: 'string', + description: 'Optional human-readable name for the profile.', + }, + user_id: { + type: 'string', + description: + 'Optional application user id to associate with the cloud profile.', + }, + }, + required: [], + }, + }, + }, + { + type: 'function', + function: { + name: 'browser_load_profile', + description: + 'Select an existing Browser Use cloud profile for this HybridClaw session. Use before Browser Use cloud tasks when you want to reuse saved cookies or auth state.', + parameters: { + type: 'object', + properties: { + profile_id: { + type: 'string', + description: 'Exact Browser Use profile id to select.', + }, + query: { + type: 'string', + description: + 'Optional profile name or id search when profile_id is not provided.', + }, + name: { + type: 'string', + description: 'Alias for query when selecting by profile name.', + }, + user_id: { + type: 'string', + description: + 'Optional application user id filter when searching for a profile.', + }, + }, + required: [], + }, + }, + }, + { + type: 'function', + function: { + name: 'browser_get_recording', + description: + 'Return the most recently downloaded Browser Use cloud session recordings for this HybridClaw session. Use after browser_agent_task or browser_close when recording is enabled.', + parameters: { + type: 'object', + properties: {}, + required: [], + }, + }, + }, { type: 'function', function: { diff --git a/container/src/browser-use-provider.ts b/container/src/browser-use-provider.ts new file mode 100644 index 00000000..b5e3f84e --- /dev/null +++ b/container/src/browser-use-provider.ts @@ -0,0 +1,904 @@ +import fs from 'node:fs'; +import path from 'node:path'; + +import { + type BrowserSessionItemView, + type BrowserSessionView, + BrowserUse, + type MessageResponse, + type ProfileView, + type ProxyCountryCode, + type SessionResponse, + type StopSessionRequest, + type WorkspaceView, +} from 'browser-use-sdk/v3'; + +type BrowserUseRunHandle = PromiseLike & + AsyncIterable & { + sessionId: string | null; + result: SessionResponse | null; + }; + +type BrowserUseBrowserSession = BrowserSessionItemView | BrowserSessionView; + +type BrowserUseConfig = { + enabled: boolean; + provider: 'none' | 'browser-use'; + apiKey: string; + baseUrl: string; + defaultModel: string; + defaultProxyCountry: ProxyCountryCode | null; + enableRecording: boolean; + maxCostPerTaskUsd: number | null; + maxSessionTimeoutMinutes: number; + preferAgentMode: boolean; + deterministicRerun: boolean; +}; + +type BrowserUseSessionState = { + preferredProfileId?: string; + preferredWorkspaceId?: string; + browserSessionId?: string; + browserSession?: BrowserUseBrowserSession; + browserSessionPromise?: Promise; + browserProfileId?: string; + browserProxyCountry?: string | null; + agentSessionId?: string; + lastRecordingPaths: string[]; +}; + +export type BrowserUseCdpSessionResult = { + id: string; + cdpUrl: string; + liveUrl?: string | null; + profileId?: string | null; + proxyCountryCode?: string | null; + timeoutAt?: string; + enableRecording: boolean; +}; + +export type BrowserUseArtifact = { + path: string; + filename: string; + mimeType: string; +}; + +export type BrowserUseCloseResult = { + warnings: string[]; + artifacts: BrowserUseArtifact[]; +}; + +export type BrowserUseAgentTaskParams = { + localSessionId: string; + task: string; + outputSchema?: Record; + artifactPaths?: string[]; + sessionId?: string; + proxyCountry?: string; + model?: string; + artifactRoot: string; + progress?: (message: string, raw: MessageResponse) => void; +}; + +export type BrowserUseAgentTaskResult = { + sessionId: string; + status: string; + isTaskSuccessful: boolean | null | undefined; + output: unknown; + outputText: string; + stepCount: number; + lastStepSummary?: string | null; + liveUrl?: string | null; + profileId?: string | null; + workspaceId?: string | null; + llmCostUsd: string; + proxyCostUsd: string; + browserCostUsd: string; + totalCostUsd: string; + totalInputTokens: number; + totalOutputTokens: number; + screenshotUrl?: string | null; + recordingUrls: string[]; + recordingPaths: string[]; + workspaceArtifactPaths: string[]; + artifacts: BrowserUseArtifact[]; +}; + +export type BrowserUseProfileBindingResult = { + profile: ProfileView; + appliesToCurrentSession: boolean; + appliesToNextSession: boolean; +}; + +const ENV_FALSEY = new Set(['0', 'false', 'no', 'off']); +const DEFAULT_BASE_URL = 'https://api.browser-use.com/api/v3'; +const RECORDING_DOWNLOAD_TIMEOUT_MS = 20_000; + +function envFlagEnabled(name: string, defaultValue: boolean): boolean { + const raw = process.env[name]; + if (raw == null || raw.trim() === '') return defaultValue; + return !ENV_FALSEY.has(raw.trim().toLowerCase()); +} + +function normalizeProvider( + raw: string, + fallback: BrowserUseConfig['provider'], +): BrowserUseConfig['provider'] { + const normalized = raw.trim().toLowerCase(); + if (!normalized) return fallback; + if (normalized === 'browser-use' || normalized === 'browser_use') { + return 'browser-use'; + } + if (normalized === 'none' || normalized === 'local' || normalized === 'off') { + return 'none'; + } + return fallback; +} + +function normalizeNumber( + raw: string, + fallback: number, + options: { min?: number; max?: number } = {}, +): number { + const parsed = Number.parseFloat(raw); + let normalized = Number.isFinite(parsed) ? parsed : fallback; + if (options.min != null && normalized < options.min) normalized = options.min; + if (options.max != null && normalized > options.max) normalized = options.max; + return normalized; +} + +function normalizeInteger( + raw: string, + fallback: number, + options: { min?: number; max?: number } = {}, +): number { + return Math.trunc(normalizeNumber(raw, fallback, options)); +} + +function normalizeProxyCountry( + raw: string, + fallback: ProxyCountryCode | null, +): ProxyCountryCode | null { + const normalized = raw.trim().toLowerCase(); + if (!normalized) return fallback; + if (normalized === 'none' || normalized === 'off') return null; + if (!/^[a-z]{2}$/.test(normalized)) return fallback; + return normalized as ProxyCountryCode; +} + +function getBrowserUseConfig(): BrowserUseConfig { + const provider = normalizeProvider( + String(process.env.BROWSER_CLOUD_PROVIDER || ''), + 'none', + ); + const apiKey = String(process.env.BROWSER_USE_API_KEY || '').trim(); + const baseUrl = + String(process.env.BROWSER_USE_BASE_URL || '') + .trim() + .replace(/\/+$/, '') || DEFAULT_BASE_URL; + const defaultModel = + String(process.env.BROWSER_USE_DEFAULT_MODEL || '').trim() || + 'claude-sonnet-4.6'; + const maxCostPerTaskUsd = normalizeNumber( + String(process.env.BROWSER_USE_MAX_COST_PER_TASK_USD || ''), + 1, + { min: 0 }, + ); + return { + enabled: provider === 'browser-use' && Boolean(apiKey), + provider, + apiKey, + baseUrl, + defaultModel, + defaultProxyCountry: normalizeProxyCountry( + String(process.env.BROWSER_USE_DEFAULT_PROXY_COUNTRY || ''), + 'us', + ), + enableRecording: envFlagEnabled('BROWSER_USE_ENABLE_RECORDING', false), + maxCostPerTaskUsd: + maxCostPerTaskUsd > 0 ? Number(maxCostPerTaskUsd.toFixed(4)) : null, + maxSessionTimeoutMinutes: normalizeInteger( + String(process.env.BROWSER_USE_MAX_SESSION_TIMEOUT_MINUTES || ''), + 30, + { min: 1, max: 240 }, + ), + preferAgentMode: envFlagEnabled('BROWSER_USE_PREFER_AGENT_MODE', true), + deterministicRerun: envFlagEnabled('BROWSER_USE_DETERMINISTIC_RERUN', true), + }; +} + +function formatOutputText(output: unknown): string { + if (typeof output === 'string') return output; + if (output == null) return ''; + try { + return JSON.stringify(output, null, 2); + } catch { + return String(output); + } +} + +function truncateProgress(text: string, maxLength = 240): string { + const normalized = text.replace(/\s+/g, ' ').trim(); + if (!normalized) return ''; + return normalized.length > maxLength + ? `${normalized.slice(0, maxLength - 1)}…` + : normalized; +} + +function formatProgressMessage(message: MessageResponse): string { + const summary = truncateProgress(message.summary || ''); + if (summary) return summary; + const data = truncateProgress(message.data || ''); + if (data) return data; + return truncateProgress(message.type || ''); +} + +function toPosixRelative(root: string, absolutePath: string): string { + return path.relative(root, absolutePath).split(path.sep).join('/'); +} + +function isPathInsideRoot(root: string, targetPath: string): boolean { + const relative = path.relative(root, targetPath); + return ( + relative === '' || + (!relative.startsWith('..') && !path.isAbsolute(relative)) + ); +} + +async function listFilesRecursive(root: string): Promise { + const out: string[] = []; + const walk = async (dir: string): Promise => { + const entries = await fs.promises.readdir(dir, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + await walk(fullPath); + continue; + } + if (!entry.isFile()) continue; + out.push(fullPath); + } + }; + if (fs.existsSync(root)) { + await walk(root); + } + return out.sort((left, right) => left.localeCompare(right)); +} + +async function downloadUrlToFile(url: string, filePath: string): Promise { + const controller = new AbortController(); + const timer = setTimeout( + () => controller.abort(), + RECORDING_DOWNLOAD_TIMEOUT_MS, + ); + try { + const response = await fetch(url, { signal: controller.signal }); + if (!response.ok) { + throw new Error( + `download failed (${response.status} ${response.statusText})`, + ); + } + const buffer = Buffer.from(await response.arrayBuffer()); + await fs.promises.mkdir(path.dirname(filePath), { recursive: true }); + await fs.promises.writeFile(filePath, buffer); + } finally { + clearTimeout(timer); + } +} + +export class BrowserUseProvider { + private readonly config = getBrowserUseConfig(); + private readonly states = new Map(); + private clientInstance: BrowserUse | null = null; + + isEnabled(): boolean { + return this.config.enabled; + } + + getExecutionPreferences(): Pick< + BrowserUseConfig, + 'enabled' | 'provider' | 'preferAgentMode' + > { + return { + enabled: this.config.enabled, + provider: this.config.provider, + preferAgentMode: this.config.preferAgentMode, + }; + } + + private getClient(): BrowserUse { + if (!this.config.enabled) { + throw new Error( + 'Browser Use cloud is not configured. Set browser.cloudProvider to "browser-use" and provide BROWSER_USE_API_KEY.', + ); + } + if (!this.clientInstance) { + this.clientInstance = new BrowserUse({ + apiKey: this.config.apiKey, + baseUrl: this.config.baseUrl, + }); + } + return this.clientInstance; + } + + private getState(localSessionId: string): BrowserUseSessionState { + let state = this.states.get(localSessionId); + if (!state) { + state = { + lastRecordingPaths: [], + }; + this.states.set(localSessionId, state); + } + return state; + } + + private activeBrowserUsesProfile( + state: BrowserUseSessionState, + profileId: string, + ): boolean { + return ( + Boolean(state.browserSessionId) && state.browserProfileId === profileId + ); + } + + getTrackedSessionIds(): string[] { + const tracked: string[] = []; + for (const [localSessionId, state] of this.states.entries()) { + if (!state.browserSessionId && !state.agentSessionId) continue; + tracked.push(localSessionId); + } + return tracked.sort((left, right) => left.localeCompare(right)); + } + + getLatestRecordingArtifacts(localSessionId: string): BrowserUseArtifact[] { + const state = this.states.get(localSessionId); + if (!state?.lastRecordingPaths.length) return []; + return state.lastRecordingPaths + .filter((filePath) => fs.existsSync(filePath)) + .map((filePath) => ({ + path: filePath, + filename: path.basename(filePath), + mimeType: 'video/mp4', + })); + } + + shouldUseCloudCdp(params: { + localSessionId: string; + proxyCountry?: string; + timeoutMinutes?: number; + }): boolean { + if (!this.config.enabled) return false; + if (params.proxyCountry?.trim()) return true; + if ( + typeof params.timeoutMinutes === 'number' && + Number.isFinite(params.timeoutMinutes) + ) { + return true; + } + const state = this.states.get(params.localSessionId); + return Boolean( + state?.preferredProfileId || + state?.browserSessionId || + state?.browserSessionPromise, + ); + } + + private async ensureWorkspace( + localSessionId: string, + ): Promise { + const state = this.getState(localSessionId); + const workspaceId = state.preferredWorkspaceId; + const client = this.getClient(); + if (workspaceId) { + try { + return await client.workspaces.get(workspaceId); + } catch { + state.preferredWorkspaceId = undefined; + } + } + const workspace = await client.workspaces.create({ + name: `HybridClaw ${localSessionId}`, + }); + state.preferredWorkspaceId = workspace.id; + return workspace; + } + + private async uploadArtifactPaths( + workspaceId: string, + artifactRoot: string, + artifactPaths: string[], + ): Promise { + const client = this.getClient(); + if (artifactPaths.length === 0) return; + const seen = new Set(); + const files: Array<{ absolutePath: string; relativePath: string }> = []; + for (const rawPath of artifactPaths) { + const normalized = rawPath.trim(); + if (!normalized) continue; + const absolutePath = path.resolve(artifactRoot, normalized); + if (!isPathInsideRoot(artifactRoot, absolutePath)) { + throw new Error( + `artifact path must stay under .browser-artifacts: ${normalized}`, + ); + } + const stat = await fs.promises.stat(absolutePath).catch(() => null); + if (!stat?.isFile()) { + throw new Error( + `artifact path does not exist or is not a file: ${normalized}`, + ); + } + const relativePath = toPosixRelative(artifactRoot, absolutePath); + if ( + !relativePath || + relativePath.startsWith('..') || + seen.has(relativePath) + ) { + continue; + } + seen.add(relativePath); + files.push({ absolutePath, relativePath }); + } + for (const file of files) { + const prefix = path.posix.dirname(file.relativePath); + await client.workspaces.upload( + workspaceId, + file.absolutePath, + prefix === '.' ? {} : { prefix }, + ); + } + } + + private async downloadWorkspaceArtifacts( + workspaceId: string, + artifactRoot: string, + localSessionId: string, + remoteSessionId: string, + ): Promise { + const downloadRoot = path.join( + artifactRoot, + 'browser-use-workspaces', + localSessionId, + remoteSessionId, + ); + await fs.promises.rm(downloadRoot, { recursive: true, force: true }); + await fs.promises.mkdir(downloadRoot, { recursive: true }); + await this.getClient().workspaces.downloadAll(workspaceId, { + to: downloadRoot, + }); + const files = await listFilesRecursive(downloadRoot); + return files.map((filePath) => ({ + path: filePath, + filename: path.basename(filePath), + mimeType: 'application/octet-stream', + })); + } + + private async resolveReusableAgentSession( + requestedSessionId: string | undefined, + localSessionId: string, + ): Promise { + const state = this.getState(localSessionId); + const candidate = requestedSessionId?.trim() || state.agentSessionId || ''; + if (!candidate) return null; + try { + const session = await this.getClient().sessions.get(candidate); + if (session.status === 'idle') { + state.agentSessionId = session.id; + return session.id; + } + if ( + session.status === 'stopped' || + session.status === 'timed_out' || + session.status === 'error' + ) { + if (state.agentSessionId === candidate) { + state.agentSessionId = undefined; + } + return null; + } + throw new Error( + `Browser Use session ${candidate} is ${session.status}. Wait for it to become idle or close it first.`, + ); + } catch (err) { + if (state.agentSessionId === candidate) { + state.agentSessionId = undefined; + } + if (requestedSessionId) { + throw err; + } + return null; + } + } + + async createProfile(params: { + localSessionId: string; + name?: string; + userId?: string; + }): Promise { + const profile = await this.getClient().profiles.create({ + ...(params.name?.trim() ? { name: params.name.trim() } : {}), + ...(params.userId?.trim() ? { userId: params.userId.trim() } : {}), + }); + const state = this.getState(params.localSessionId); + state.preferredProfileId = profile.id; + return { + profile, + appliesToCurrentSession: this.activeBrowserUsesProfile(state, profile.id), + appliesToNextSession: true, + }; + } + + async loadProfile(params: { + localSessionId: string; + profileId?: string; + query?: string; + userId?: string; + }): Promise { + const client = this.getClient(); + const state = this.getState(params.localSessionId); + let profile: ProfileView | null = null; + if (params.profileId?.trim()) { + profile = await client.profiles.get(params.profileId.trim()); + } else { + const listing = await client.profiles.list({ + ...(params.query?.trim() ? { query: params.query.trim() } : {}), + page: 1, + page_size: 100, + }); + const matches = listing.items.filter((entry) => { + if (params.userId?.trim() && entry.userId !== params.userId.trim()) { + return false; + } + if (!params.query?.trim()) return true; + const needle = params.query.trim().toLowerCase(); + return ( + String(entry.name || '').toLowerCase() === needle || + String(entry.id || '').toLowerCase() === needle + ); + }); + profile = matches[0] || null; + } + if (!profile) { + throw new Error('Browser Use profile not found.'); + } + state.preferredProfileId = profile.id; + return { + profile, + appliesToCurrentSession: this.activeBrowserUsesProfile(state, profile.id), + appliesToNextSession: true, + }; + } + + async ensureCdpSession(params: { + localSessionId: string; + proxyCountry?: string; + timeoutMinutes?: number; + }): Promise { + const state = this.getState(params.localSessionId); + if ( + state.browserSession?.status === 'active' && + state.browserSession.cdpUrl + ) { + return { + id: state.browserSession.id, + cdpUrl: state.browserSession.cdpUrl, + liveUrl: state.browserSession.liveUrl, + profileId: state.browserProfileId, + proxyCountryCode: state.browserProxyCountry, + timeoutAt: state.browserSession.timeoutAt, + enableRecording: this.config.enableRecording, + }; + } + if (state.browserSessionPromise) { + const session = await state.browserSessionPromise; + if (!session.cdpUrl) { + throw new Error('Browser Use did not return a CDP URL.'); + } + return { + id: session.id, + cdpUrl: session.cdpUrl, + liveUrl: session.liveUrl, + profileId: state.browserProfileId, + proxyCountryCode: state.browserProxyCountry, + timeoutAt: session.timeoutAt, + enableRecording: this.config.enableRecording, + }; + } + + const proxyCountry = normalizeProxyCountry( + params.proxyCountry || '', + this.config.defaultProxyCountry, + ); + state.browserSessionPromise = this.getClient() + .browsers.create({ + ...(state.preferredProfileId + ? { profileId: state.preferredProfileId } + : {}), + proxyCountryCode: proxyCountry, + timeout: Math.min( + 240, + Math.max( + 1, + params.timeoutMinutes || this.config.maxSessionTimeoutMinutes, + ), + ), + enableRecording: this.config.enableRecording, + }) + .then((session) => { + state.browserSessionId = session.id; + state.browserSession = session; + state.browserProfileId = state.preferredProfileId; + state.browserProxyCountry = proxyCountry; + return session; + }) + .finally(() => { + state.browserSessionPromise = undefined; + }); + + const session = await state.browserSessionPromise; + if (!session.cdpUrl) { + throw new Error('Browser Use did not return a CDP URL.'); + } + return { + id: session.id, + cdpUrl: session.cdpUrl, + liveUrl: session.liveUrl, + profileId: state.browserProfileId, + proxyCountryCode: state.browserProxyCountry, + timeoutAt: session.timeoutAt, + enableRecording: this.config.enableRecording, + }; + } + + private async maybeDownloadRecordingUrls( + localSessionId: string, + recordingUrls: string[], + artifactRoot: string, + prefix: 'session' | 'browser', + ): Promise { + const state = this.getState(localSessionId); + if (recordingUrls.length === 0) { + state.lastRecordingPaths = []; + return []; + } + + const recordingDir = path.join(artifactRoot, 'recordings'); + await fs.promises.mkdir(recordingDir, { recursive: true }); + const artifacts: BrowserUseArtifact[] = []; + for (const [index, url] of recordingUrls.entries()) { + const filePath = path.join( + recordingDir, + `${prefix}-${localSessionId}-${index + 1}.mp4`, + ); + await downloadUrlToFile(url, filePath); + artifacts.push({ + path: filePath, + filename: path.basename(filePath), + mimeType: 'video/mp4', + }); + } + state.lastRecordingPaths = artifacts.map((entry) => entry.path); + return artifacts; + } + + async runAgentTask( + params: BrowserUseAgentTaskParams, + ): Promise { + const client = this.getClient(); + const state = this.getState(params.localSessionId); + const reusableSessionId = await this.resolveReusableAgentSession( + params.sessionId, + params.localSessionId, + ); + const workspace = await this.ensureWorkspace(params.localSessionId); + if (workspace && params.artifactPaths?.length) { + await this.uploadArtifactPaths( + workspace.id, + params.artifactRoot, + params.artifactPaths, + ); + } + + const runOptions: Record = { + model: params.model?.trim() || this.config.defaultModel, + keepAlive: true, + ...(this.config.maxCostPerTaskUsd != null + ? { maxCostUsd: this.config.maxCostPerTaskUsd } + : {}), + ...(params.outputSchema ? { outputSchema: params.outputSchema } : {}), + ...(this.config.enableRecording ? { enableRecording: true } : {}), + ...(this.config.deterministicRerun && workspace?.id + ? { cacheScript: true, autoHeal: true } + : {}), + }; + if (reusableSessionId) { + runOptions.sessionId = reusableSessionId; + } else { + if (state.preferredProfileId) { + runOptions.profileId = state.preferredProfileId; + } + if (workspace?.id) { + runOptions.workspaceId = workspace.id; + } + const proxyCountry = normalizeProxyCountry( + params.proxyCountry || '', + this.config.defaultProxyCountry, + ); + if (proxyCountry !== undefined) { + runOptions.proxyCountryCode = proxyCountry; + } + } + + const run = client.run( + params.task, + runOptions as never, + ) as BrowserUseRunHandle; + let lastProgress = ''; + for await (const message of run) { + if (run.sessionId) { + state.agentSessionId = run.sessionId; + } + if (message.hidden) continue; + const formatted = formatProgressMessage(message); + if (!formatted || formatted === lastProgress) continue; + lastProgress = formatted; + params.progress?.(formatted, message); + } + + const result = (run.result as SessionResponse | null) ?? (await run); + state.agentSessionId = result.id; + state.preferredProfileId = result.profileId || state.preferredProfileId; + state.preferredWorkspaceId = result.workspaceId || workspace?.id; + + const workspaceArtifacts = state.preferredWorkspaceId + ? await this.downloadWorkspaceArtifacts( + state.preferredWorkspaceId, + params.artifactRoot, + params.localSessionId, + result.id, + ) + : []; + + const recordingUrls = + result.recordingUrls.length > 0 + ? result.recordingUrls + : this.config.enableRecording + ? await client.sessions.waitForRecording(result.id) + : []; + const recordingArtifacts = await this.maybeDownloadRecordingUrls( + params.localSessionId, + recordingUrls, + params.artifactRoot, + 'session', + ); + + return { + sessionId: result.id, + status: result.status, + isTaskSuccessful: result.isTaskSuccessful, + output: result.output, + outputText: formatOutputText(result.output), + stepCount: result.stepCount, + lastStepSummary: result.lastStepSummary, + liveUrl: result.liveUrl, + profileId: result.profileId, + workspaceId: result.workspaceId, + llmCostUsd: result.llmCostUsd, + proxyCostUsd: result.proxyCostUsd, + browserCostUsd: result.browserCostUsd, + totalCostUsd: result.totalCostUsd, + totalInputTokens: result.totalInputTokens, + totalOutputTokens: result.totalOutputTokens, + screenshotUrl: result.screenshotUrl, + recordingUrls, + recordingPaths: recordingArtifacts.map((entry) => entry.path), + workspaceArtifactPaths: workspaceArtifacts.map((entry) => entry.path), + artifacts: [...workspaceArtifacts, ...recordingArtifacts], + }; + } + + private async stopAgentSession( + localSessionId: string, + artifactRoot: string, + ): Promise { + const state = this.getState(localSessionId); + const warnings: string[] = []; + const artifacts: BrowserUseArtifact[] = []; + if (!state.agentSessionId) { + return { warnings, artifacts }; + } + try { + const stopped = await this.getClient().sessions.stop( + state.agentSessionId, + { + strategy: 'session', + } satisfies StopSessionRequest, + ); + const recordingUrls = + stopped.recordingUrls.length > 0 + ? stopped.recordingUrls + : this.config.enableRecording + ? await this.getClient().sessions.waitForRecording(stopped.id) + : []; + artifacts.push( + ...(await this.maybeDownloadRecordingUrls( + localSessionId, + recordingUrls, + artifactRoot, + 'session', + )), + ); + } catch (err) { + warnings.push( + err instanceof Error + ? err.message + : 'failed to stop Browser Use session', + ); + } finally { + state.agentSessionId = undefined; + } + return { warnings, artifacts }; + } + + private async stopBrowserSession( + localSessionId: string, + artifactRoot: string, + ): Promise { + const state = this.getState(localSessionId); + const warnings: string[] = []; + const artifacts: BrowserUseArtifact[] = []; + if (!state.browserSessionId) { + return { warnings, artifacts }; + } + try { + const stopped = await this.getClient().browsers.stop( + state.browserSessionId, + ); + const recordingArtifacts = + stopped.recordingUrl && this.config.enableRecording + ? await this.maybeDownloadRecordingUrls( + localSessionId, + [stopped.recordingUrl], + artifactRoot, + 'browser', + ) + : []; + artifacts.push(...recordingArtifacts); + } catch (err) { + warnings.push( + err instanceof Error + ? err.message + : 'failed to stop Browser Use browser', + ); + } finally { + state.browserSessionId = undefined; + state.browserSession = undefined; + state.browserProfileId = undefined; + state.browserProxyCountry = undefined; + } + return { warnings, artifacts }; + } + + async closeLocalSession( + localSessionId: string, + artifactRoot: string, + ): Promise { + if (!this.config.enabled) { + return { warnings: [], artifacts: [] }; + } + const sessionStop = await this.stopAgentSession( + localSessionId, + artifactRoot, + ); + const browserStop = await this.stopBrowserSession( + localSessionId, + artifactRoot, + ); + return { + warnings: [...sessionStop.warnings, ...browserStop.warnings], + artifacts: [...sessionStop.artifacts, ...browserStop.artifacts], + }; + } +} + +export const browserUseProvider = new BrowserUseProvider(); diff --git a/container/src/tools.ts b/container/src/tools.ts index 8ce6e396..d0362cf0 100644 --- a/container/src/tools.ts +++ b/container/src/tools.ts @@ -2851,6 +2851,7 @@ async function executeToolInternal( } case 'browser_navigate': + case 'browser_agent_task': case 'browser_snapshot': case 'browser_click': case 'browser_type': @@ -2862,6 +2863,9 @@ async function executeToolInternal( case 'browser_pdf': case 'browser_vision': case 'browser_get_images': + case 'browser_save_profile': + case 'browser_load_profile': + case 'browser_get_recording': case 'browser_console': case 'browser_network': case 'browser_close': { diff --git a/package-lock.json b/package-lock.json index 010463ab..a08e7d0e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -94,6 +94,7 @@ "@modelcontextprotocol/sdk": "1.27.1", "@mozilla/readability": "0.6.0", "agent-browser": "0.15.1", + "browser-use-sdk": "^3.4.3", "jszip": "3.10.1", "linkedom": "0.18.12", "pdf-lib": "1.17.1", @@ -5481,6 +5482,25 @@ } } }, + "node_modules/browser-use-sdk": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/browser-use-sdk/-/browser-use-sdk-3.4.3.tgz", + "integrity": "sha512-4z4sVHCfboa4zq3a+wcdbhG9yhQH5drRaOuWeHwpZnBGwDdpGmyNnHDvbufFPtTJves8fvgbV10oGjRRdeU1yg==", + "license": "MIT", + "dependencies": { + "dotenv": "^17.2.4", + "zod": "^4.3.6" + } + }, + "node_modules/browser-use-sdk/node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, "node_modules/browserslist": { "version": "4.28.1", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.1.tgz", @@ -6463,6 +6483,18 @@ "url": "https://github.com/fb55/domutils?sponsor=1" } }, + "node_modules/dotenv": { + "version": "17.4.2", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz", + "integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", diff --git a/src/agent/prompt-hooks.ts b/src/agent/prompt-hooks.ts index 3985ffc5..947f2bb0 100644 --- a/src/agent/prompt-hooks.ts +++ b/src/agent/prompt-hooks.ts @@ -417,6 +417,7 @@ function buildSafetyHook(context: PromptHookContext): string { 'Use `web_extract` when you want the fetched page condensed into a model-processed markdown summary; it is higher cost than `web_fetch` because it runs an auxiliary model after extraction.', 'Use browser tools only when at least one of these is true: (1) known app-like/auth-gated URL, (2) interaction is required (click/type/login/scroll), (3) `web_fetch` returned escalation hints, (4) user explicitly requested browser use.', 'Prefer browser for: SPAs/client-rendered apps (React/Vue/Angular/Next client routes), dashboards/web apps, social feeds, login/OAuth/cookie-consent/CAPTCHA flows, or API-driven pages that populate after initial render.', + 'When a task is clearly multi-step browser automation or structured extraction, prefer `browser_agent_task` over long `browser_navigate`/`browser_click`/`browser_type` chains.', 'Prefer web_fetch for: docs/wikis/READMEs/articles/reference pages, direct JSON/XML/text/CSV/PDF endpoints, and simple read-only extraction.', 'Escalation signals from web_fetch: `escalationHint` present, JavaScript-required pages, empty extraction, SPA shell-only pages, boilerplate-only extraction, or bot-blocked responses (403/429/challenge pages).', 'Cost note: browser calls are typically ~10-100x slower/more expensive than web_fetch.', diff --git a/src/agent/tool-summary.ts b/src/agent/tool-summary.ts index 4c18a10a..197f0279 100644 --- a/src/agent/tool-summary.ts +++ b/src/agent/tool-summary.ts @@ -25,6 +25,7 @@ const TOOL_GROUPS: ToolGroup[] = [ label: 'Browser', tools: [ 'browser_navigate', + 'browser_agent_task', 'browser_snapshot', 'browser_click', 'browser_type', @@ -36,6 +37,9 @@ const TOOL_GROUPS: ToolGroup[] = [ 'browser_pdf', 'browser_vision', 'browser_get_images', + 'browser_save_profile', + 'browser_load_profile', + 'browser_get_recording', 'browser_console', 'browser_network', 'browser_close', diff --git a/src/audit/audit-events.ts b/src/audit/audit-events.ts index 1666d2b3..acd4fa4b 100644 --- a/src/audit/audit-events.ts +++ b/src/audit/audit-events.ts @@ -41,6 +41,21 @@ function summarizeToolResult(text: string): string { return truncateAuditText(text, 280); } +function summarizeAuditToolResult(toolName: string, text: string): string { + if (!toolName.startsWith('browser_')) { + return summarizeToolResult(text); + } + const parsed = parseJsonObject(text); + if (!Object.hasOwn(parsed, 'live_url')) { + return summarizeToolResult(text); + } + const sanitized = { + ...parsed, + live_url: '[REDACTED]', + }; + return summarizeToolResult(JSON.stringify(sanitized)); +} + const SENSITIVE_ARG_KEY_RE = /(pass(word)?|secret|token|api[_-]?key|authorization|cookie|credential|session)/i; @@ -67,6 +82,94 @@ function sanitizeAuditArguments(toolName: string, value: unknown): unknown { return out; } +function asTrimmedString(value: unknown): string | undefined { + return typeof value === 'string' && value.trim() ? value.trim() : undefined; +} + +function asFiniteNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) + ? value + : undefined; +} + +function countArrayItems(value: unknown): number | undefined { + return Array.isArray(value) ? value.length : undefined; +} + +function emitBrowserToolAuditEvents(input: { + sessionId: string; + runId: string; + toolCallId: string; + execution: ToolExecution; +}): void { + const result = parseJsonObject(input.execution.result || '{}'); + const executionStrategy = asTrimmedString(result.execution_strategy); + const cloudSessionId = + asTrimmedString(result.cloud_session_id) || + asTrimmedString(result.session_id); + + if (executionStrategy?.startsWith('cloud-') && cloudSessionId) { + recordAuditEvent({ + sessionId: input.sessionId, + runId: input.runId, + event: { + type: 'browser.session', + toolCallId: input.toolCallId, + toolName: input.execution.name, + executionStrategy, + cloudSessionId, + }, + }); + } + + if (input.execution.name !== 'browser_agent_task' || !cloudSessionId) { + return; + } + + const stepCount = asFiniteNumber(result.step_count); + const totalInputTokens = asFiniteNumber(result.total_input_tokens); + const totalOutputTokens = asFiniteNumber(result.total_output_tokens); + const recordingCount = countArrayItems(result.recording_paths); + + recordAuditEvent({ + sessionId: input.sessionId, + runId: input.runId, + event: { + type: 'browser.agent_task', + toolCallId: input.toolCallId, + sessionId: cloudSessionId, + status: asTrimmedString(result.status) || 'unknown', + executionStrategy: executionStrategy || 'cloud-agent', + isTaskSuccessful: + typeof result.is_task_successful === 'boolean' + ? result.is_task_successful + : null, + ...(stepCount != null ? { stepCount } : {}), + ...(asTrimmedString(result.llm_cost_usd) + ? { llmCostUsd: asTrimmedString(result.llm_cost_usd) } + : {}), + ...(asTrimmedString(result.proxy_cost_usd) + ? { proxyCostUsd: asTrimmedString(result.proxy_cost_usd) } + : {}), + ...(asTrimmedString(result.browser_cost_usd) + ? { browserCostUsd: asTrimmedString(result.browser_cost_usd) } + : {}), + ...(asTrimmedString(result.total_cost_usd) + ? { totalCostUsd: asTrimmedString(result.total_cost_usd) } + : {}), + ...(totalInputTokens != null ? { totalInputTokens } : {}), + ...(totalOutputTokens != null ? { totalOutputTokens } : {}), + ...(asTrimmedString(result.profile_id) + ? { profileId: asTrimmedString(result.profile_id) } + : {}), + ...(asTrimmedString(result.workspace_id) + ? { workspaceId: asTrimmedString(result.workspace_id) } + : {}), + ...(recordingCount != null ? { recordingCount } : {}), + }, + }); +} + export function emitToolExecutionAuditEvents(input: { sessionId: string; runId: string; @@ -212,9 +315,19 @@ export function emitToolExecutionAuditEvents(input: { toolName: execution.name, isError: Boolean(execution.isError), blocked: Boolean(execution.blocked), - resultSummary: summarizeToolResult(execution.result || ''), + resultSummary: summarizeAuditToolResult( + execution.name, + execution.result || '', + ), durationMs: execution.durationMs, }, }); + + emitBrowserToolAuditEvents({ + sessionId, + runId, + toolCallId, + execution, + }); }); } diff --git a/src/config/config.ts b/src/config/config.ts index e93709d6..8bafcc4f 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -179,6 +179,11 @@ function syncRuntimeSecretExports(): void { 'HF_TOKEN', storedSecrets, ); + BROWSER_USE_API_KEY = readRuntimeSecretValue( + ['BROWSER_USE_API_KEY'], + 'BROWSER_USE_API_KEY', + storedSecrets, + ); } // Secrets come from the shell environment or ~/.hybridclaw/credentials.json. @@ -194,6 +199,7 @@ export let HYBRIDAI_API_KEY = ''; export let OPENROUTER_API_KEY = ''; export let MISTRAL_API_KEY = ''; export let HUGGINGFACE_API_KEY = ''; +export let BROWSER_USE_API_KEY = ''; syncRuntimeSecretExports(); export function refreshRuntimeSecretsFromEnv(): void { @@ -361,6 +367,16 @@ export let LOCAL_HEALTH_CHECK_INTERVAL_MS = 60_000; export let LOCAL_HEALTH_CHECK_TIMEOUT_MS = 5_000; export let LOCAL_DEFAULT_CONTEXT_WINDOW = 128_000; export let LOCAL_DEFAULT_MAX_TOKENS = 8_192; +export let BROWSER_CLOUD_PROVIDER: RuntimeConfig['browser']['cloudProvider'] = + 'none'; +export let BROWSER_USE_BASE_URL = 'https://api.browser-use.com/api/v3'; +export let BROWSER_USE_DEFAULT_MODEL = 'claude-sonnet-4.6'; +export let BROWSER_USE_DEFAULT_PROXY_COUNTRY = 'us'; +export let BROWSER_USE_ENABLE_RECORDING = false; +export let BROWSER_USE_MAX_COST_PER_TASK_USD = 1; +export let BROWSER_USE_MAX_SESSION_TIMEOUT_MINUTES = 30; +export let BROWSER_USE_PREFER_AGENT_MODE = true; +export let BROWSER_USE_DETERMINISTIC_RERUN = true; export let CONTAINER_IMAGE = 'hybridclaw-agent'; export let CONTAINER_MEMORY = '512m'; @@ -710,6 +726,26 @@ function applyRuntimeConfig(config: RuntimeConfig): void { LOCAL_HEALTH_CHECK_TIMEOUT_MS = config.local.healthCheck.timeoutMs; LOCAL_DEFAULT_CONTEXT_WINDOW = config.local.defaultContextWindow; LOCAL_DEFAULT_MAX_TOKENS = config.local.defaultMaxTokens; + BROWSER_CLOUD_PROVIDER = config.browser.cloudProvider; + BROWSER_USE_BASE_URL = normalizeConfiguredBaseUrl( + process.env.BROWSER_USE_BASE_URL, + config.browser.browserUse.baseUrl, + ); + BROWSER_USE_DEFAULT_MODEL = config.browser.browserUse.defaultModel; + BROWSER_USE_DEFAULT_PROXY_COUNTRY = + config.browser.browserUse.defaultProxyCountry; + BROWSER_USE_ENABLE_RECORDING = config.browser.browserUse.enableRecording; + BROWSER_USE_MAX_COST_PER_TASK_USD = Math.max( + 0, + config.browser.browserUse.maxCostPerTaskUsd, + ); + BROWSER_USE_MAX_SESSION_TIMEOUT_MINUTES = Math.max( + 1, + Math.min(240, config.browser.browserUse.maxSessionTimeoutMinutes), + ); + BROWSER_USE_PREFER_AGENT_MODE = config.browser.browserUse.preferAgentMode; + BROWSER_USE_DETERMINISTIC_RERUN = + config.browser.browserUse.deterministicRerun; CONTAINER_SANDBOX_MODE = resolveSandboxMode(config); CONTAINER_IMAGE = config.container.image; diff --git a/src/config/runtime-config.ts b/src/config/runtime-config.ts index 652122ba..04be5c6c 100644 --- a/src/config/runtime-config.ts +++ b/src/config/runtime-config.ts @@ -463,6 +463,22 @@ export interface RuntimeHttpRequestToolConfig { authRules: RuntimeHttpRequestAuthRule[]; } +export interface RuntimeBrowserUseConfig { + baseUrl: string; + defaultModel: string; + defaultProxyCountry: string; + enableRecording: boolean; + maxCostPerTaskUsd: number; + maxSessionTimeoutMinutes: number; + preferAgentMode: boolean; + deterministicRerun: boolean; +} + +export interface RuntimeBrowserConfig { + cloudProvider: 'none' | 'browser-use'; + browserUse: RuntimeBrowserUseConfig; +} + export interface RuntimeConfig { version: number; security: RuntimeSecurityConfig; @@ -539,6 +555,7 @@ export interface RuntimeConfig { models: string[]; }; local: LocalProviderConfig; + browser: RuntimeBrowserConfig; auxiliaryModels: { vision: RuntimeAuxiliaryModelPolicyConfig; compression: RuntimeAuxiliaryModelPolicyConfig; @@ -1004,6 +1021,19 @@ const DEFAULT_RUNTIME_CONFIG: RuntimeConfig = { defaultContextWindow: 128_000, defaultMaxTokens: 8_192, }, + browser: { + cloudProvider: 'none', + browserUse: { + baseUrl: 'https://api.browser-use.com/api/v3', + defaultModel: 'claude-sonnet-4.6', + defaultProxyCountry: 'us', + enableRecording: false, + maxCostPerTaskUsd: 1, + maxSessionTimeoutMinutes: 30, + preferAgentMode: true, + deterministicRerun: true, + }, + }, auxiliaryModels: { vision: { provider: 'auto', @@ -1368,6 +1398,63 @@ function normalizeStringArray(value: unknown, fallback: string[]): string[] { return fallback; } +function normalizeBrowserCloudProvider( + value: unknown, + fallback: RuntimeBrowserConfig['cloudProvider'], +): RuntimeBrowserConfig['cloudProvider'] { + if (typeof value !== 'string') return fallback; + const normalized = value.trim().toLowerCase(); + if (normalized === 'browser-use' || normalized === 'browser_use') { + return 'browser-use'; + } + if (normalized === 'none' || normalized === 'off' || normalized === 'local') { + return 'none'; + } + return fallback; +} + +function normalizeBrowserUseConfig( + value: unknown, + fallback: RuntimeBrowserUseConfig, +): RuntimeBrowserUseConfig { + const raw = isRecord(value) ? value : {}; + return { + baseUrl: normalizeString(raw.baseUrl, fallback.baseUrl, { + allowEmpty: false, + }).replace(/\/+$/, ''), + defaultModel: normalizeString(raw.defaultModel, fallback.defaultModel, { + allowEmpty: false, + }), + defaultProxyCountry: normalizeString( + raw.defaultProxyCountry, + fallback.defaultProxyCountry, + { allowEmpty: true }, + ).toLowerCase(), + enableRecording: normalizeBoolean( + raw.enableRecording, + fallback.enableRecording, + ), + maxCostPerTaskUsd: normalizeNumber( + raw.maxCostPerTaskUsd, + fallback.maxCostPerTaskUsd, + { min: 0 }, + ), + maxSessionTimeoutMinutes: normalizeInteger( + raw.maxSessionTimeoutMinutes, + fallback.maxSessionTimeoutMinutes, + { min: 1, max: 240 }, + ), + preferAgentMode: normalizeBoolean( + raw.preferAgentMode, + fallback.preferAgentMode, + ), + deterministicRerun: normalizeBoolean( + raw.deterministicRerun, + fallback.deterministicRerun, + ), + }; +} + function normalizeSkillChannelDisabled( value: unknown, ): Partial> { @@ -3480,6 +3567,7 @@ function normalizeRuntimeConfig( const rawMistral = isRecord(raw.mistral) ? raw.mistral : {}; const rawHuggingFace = isRecord(raw.huggingface) ? raw.huggingface : {}; const rawLocal = isRecord(raw.local) ? raw.local : {}; + const rawBrowser = isRecord(raw.browser) ? raw.browser : {}; const rawAuxiliaryModels = isRecord(raw.auxiliaryModels) ? raw.auxiliaryModels : {}; @@ -4091,6 +4179,16 @@ function normalizeRuntimeConfig( { min: 64, max: 1_000_000 }, ), }, + browser: { + cloudProvider: normalizeBrowserCloudProvider( + rawBrowser.cloudProvider, + DEFAULT_RUNTIME_CONFIG.browser.cloudProvider, + ), + browserUse: normalizeBrowserUseConfig( + rawBrowser.browserUse, + DEFAULT_RUNTIME_CONFIG.browser.browserUse, + ), + }, auxiliaryModels: { vision: { provider: normalizeAuxiliaryProviderSelection( diff --git a/src/gateway/gateway-service.ts b/src/gateway/gateway-service.ts index 306e115a..e4000a9c 100644 --- a/src/gateway/gateway-service.ts +++ b/src/gateway/gateway-service.ts @@ -576,6 +576,7 @@ const BASE_SUBAGENT_ALLOWED_TOOLS = [ 'http_request', 'message', 'browser_navigate', + 'browser_agent_task', 'browser_snapshot', 'browser_click', 'browser_type', @@ -589,6 +590,9 @@ const BASE_SUBAGENT_ALLOWED_TOOLS = [ 'vision_analyze', 'image', 'browser_get_images', + 'browser_save_profile', + 'browser_load_profile', + 'browser_get_recording', 'browser_console', 'browser_network', 'browser_close', diff --git a/src/infra/container-runner.ts b/src/infra/container-runner.ts index af34668d..386c53a1 100644 --- a/src/infra/container-runner.ts +++ b/src/infra/container-runner.ts @@ -10,6 +10,16 @@ import { DEFAULT_AGENT_ID } from '../agents/agent-types.js'; import { getBrowserProfileDir } from '../browser/browser-login.js'; import { ADDITIONAL_MOUNTS, + BROWSER_CLOUD_PROVIDER, + BROWSER_USE_API_KEY, + BROWSER_USE_BASE_URL, + BROWSER_USE_DEFAULT_MODEL, + BROWSER_USE_DEFAULT_PROXY_COUNTRY, + BROWSER_USE_DETERMINISTIC_RERUN, + BROWSER_USE_ENABLE_RECORDING, + BROWSER_USE_MAX_COST_PER_TASK_USD, + BROWSER_USE_MAX_SESSION_TIMEOUT_MINUTES, + BROWSER_USE_PREFER_AGENT_MODE, CONTAINER_BINDS, CONTAINER_CPUS, CONTAINER_IMAGE, @@ -564,10 +574,29 @@ function getOrSpawnContainer( `SEARXNG_BASE_URL=${WEB_SEARCH_SEARXNG_BASE_URL}`, '-e', 'PLAYWRIGHT_BROWSERS_PATH=/ms-playwright', + '-e', + `BROWSER_CLOUD_PROVIDER=${BROWSER_CLOUD_PROVIDER}`, + '-e', + `BROWSER_USE_BASE_URL=${BROWSER_USE_BASE_URL}`, + '-e', + `BROWSER_USE_DEFAULT_MODEL=${BROWSER_USE_DEFAULT_MODEL}`, + '-e', + `BROWSER_USE_DEFAULT_PROXY_COUNTRY=${BROWSER_USE_DEFAULT_PROXY_COUNTRY}`, + '-e', + `BROWSER_USE_ENABLE_RECORDING=${BROWSER_USE_ENABLE_RECORDING ? 'true' : 'false'}`, + '-e', + `BROWSER_USE_MAX_COST_PER_TASK_USD=${BROWSER_USE_MAX_COST_PER_TASK_USD}`, + '-e', + `BROWSER_USE_MAX_SESSION_TIMEOUT_MINUTES=${BROWSER_USE_MAX_SESSION_TIMEOUT_MINUTES}`, + '-e', + `BROWSER_USE_PREFER_AGENT_MODE=${BROWSER_USE_PREFER_AGENT_MODE ? 'true' : 'false'}`, + '-e', + `BROWSER_USE_DETERMINISTIC_RERUN=${BROWSER_USE_DETERMINISTIC_RERUN ? 'true' : 'false'}`, ]; for (const [name, value] of [ ['BRAVE_API_KEY', process.env.BRAVE_API_KEY || ''], + ['BROWSER_USE_API_KEY', BROWSER_USE_API_KEY], ['PERPLEXITY_API_KEY', process.env.PERPLEXITY_API_KEY || ''], ['TAVILY_API_KEY', process.env.TAVILY_API_KEY || ''], ] as const) { diff --git a/src/scheduler/heartbeat.ts b/src/scheduler/heartbeat.ts index f72e4616..2d3010b2 100644 --- a/src/scheduler/heartbeat.ts +++ b/src/scheduler/heartbeat.ts @@ -61,6 +61,7 @@ const HEARTBEAT_ALLOWED_TOOLS = [ 'message', 'cron', 'browser_navigate', + 'browser_agent_task', 'browser_snapshot', 'browser_click', 'browser_type', @@ -73,6 +74,9 @@ const HEARTBEAT_ALLOWED_TOOLS = [ 'vision_analyze', 'image', 'browser_get_images', + 'browser_save_profile', + 'browser_load_profile', + 'browser_get_recording', 'browser_console', 'browser_network', 'browser_close', diff --git a/src/security/runtime-secrets.ts b/src/security/runtime-secrets.ts index 0d262cc6..6c473e31 100644 --- a/src/security/runtime-secrets.ts +++ b/src/security/runtime-secrets.ts @@ -34,6 +34,7 @@ const SECRET_KEYS = [ 'GOOGLE_API_KEY', 'VLLM_API_KEY', 'BRAVE_API_KEY', + 'BROWSER_USE_API_KEY', 'DISCORD_TOKEN', 'EMAIL_PASSWORD', 'TELEGRAM_BOT_TOKEN', diff --git a/tests/audit-events.test.ts b/tests/audit-events.test.ts index a2b85378..282dd64b 100644 --- a/tests/audit-events.test.ts +++ b/tests/audit-events.test.ts @@ -110,3 +110,107 @@ test('emits approval request and response events for pending red actions', async 'tool.call', ]); }); + +test('emits Browser Use session and cost audit events for browser_agent_task', async () => { + const homeDir = makeTempHome(); + process.env.HOME = homeDir; + vi.resetModules(); + + const { initDatabase, getRecentStructuredAuditForSession } = await import( + '../src/memory/db.ts' + ); + const { emitToolExecutionAuditEvents } = await import( + '../src/audit/audit-events.ts' + ); + + initDatabase({ quiet: true }); + emitToolExecutionAuditEvents({ + sessionId: 'session-browser-use-audit', + runId: 'run-browser-use-audit', + toolExecutions: [ + { + name: 'browser_agent_task', + arguments: + '{"task":"Extract account balances","output_schema":{"type":"object"}}', + result: JSON.stringify({ + execution_strategy: 'cloud-agent', + session_id: 'browser-use-session-123', + status: 'idle', + is_task_successful: true, + step_count: 4, + llm_cost_usd: '0.12', + proxy_cost_usd: '0.02', + browser_cost_usd: '0.03', + total_cost_usd: '0.17', + total_input_tokens: 345, + total_output_tokens: 67, + profile_id: 'profile-123', + workspace_id: 'workspace-123', + live_url: 'https://browser-use.example/live/session-123', + recording_paths: [ + '.browser-artifacts/recordings/session-session-browser-use-audit-1.mp4', + ], + }), + durationMs: 1578, + isError: false, + blocked: false, + approvalTier: 'green', + approvalBaseTier: 'green', + approvalDecision: 'auto', + }, + ], + }); + + const events = getRecentStructuredAuditForSession( + 'session-browser-use-audit', + 10, + ); + const sessionEvent = events.find( + (event) => event.event_type === 'browser.session', + ); + const agentTaskEvent = events.find( + (event) => event.event_type === 'browser.agent_task', + ); + + expect(sessionEvent).toBeDefined(); + expect(agentTaskEvent).toBeDefined(); + expect(sessionEvent ? JSON.parse(sessionEvent.payload) : null).toMatchObject({ + type: 'browser.session', + executionStrategy: 'cloud-agent', + cloudSessionId: 'browser-use-session-123', + toolName: 'browser_agent_task', + }); + expect( + sessionEvent ? JSON.parse(sessionEvent.payload) : null, + ).not.toHaveProperty('liveUrl'); + expect( + agentTaskEvent ? JSON.parse(agentTaskEvent.payload) : null, + ).toMatchObject({ + type: 'browser.agent_task', + sessionId: 'browser-use-session-123', + status: 'idle', + executionStrategy: 'cloud-agent', + isTaskSuccessful: true, + stepCount: 4, + llmCostUsd: '0.12', + totalCostUsd: '0.17', + totalInputTokens: 345, + totalOutputTokens: 67, + profileId: 'profile-123', + workspaceId: 'workspace-123', + recordingCount: 1, + }); + expect( + agentTaskEvent ? JSON.parse(agentTaskEvent.payload) : null, + ).not.toHaveProperty('liveUrl'); + const toolResultEvent = events.find( + (event) => event.event_type === 'tool.result', + ); + expect( + toolResultEvent ? JSON.parse(toolResultEvent.payload) : null, + ).toMatchObject({ + resultSummary: expect.not.stringContaining( + 'https://browser-use.example/live/session-123', + ), + }); +}); diff --git a/tests/browser-navigate-routing.test.ts b/tests/browser-navigate-routing.test.ts new file mode 100644 index 00000000..c2e298a9 --- /dev/null +++ b/tests/browser-navigate-routing.test.ts @@ -0,0 +1,188 @@ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { afterEach, expect, test, vi } from 'vitest'; + +let tempRoot = ''; + +function createAgentBrowserNavigateStub(root: string, logPath: string): string { + const scriptPath = path.join(root, 'agent-browser-navigate-stub.mjs'); + fs.writeFileSync( + scriptPath, + `#!/usr/bin/env node +import fs from 'node:fs'; + +const args = process.argv.slice(2); +const jsonIndex = args.indexOf('--json'); +const command = jsonIndex >= 0 ? args[jsonIndex + 1] : ''; +const commandArgs = jsonIndex >= 0 ? args.slice(jsonIndex + 2) : []; +fs.appendFileSync( + ${JSON.stringify(logPath)}, + JSON.stringify({ command, commandArgs, rawArgs: args }) + '\\n', +); + +if (command === 'open') { + process.stdout.write(JSON.stringify({ + data: { + url: commandArgs[commandArgs.length - 1] || 'https://example.com', + title: 'Example', + }, + })); +} else if (command === 'eval') { + process.stdout.write(JSON.stringify({ + data: { + result: { + text_length: 0, + preview: '', + preview_truncated: false, + has_noscript: false, + root_shell: false, + ready_state: 'complete', + }, + }, + })); +} else { + process.stdout.write(JSON.stringify({ data: {} })); +} +`, + 'utf-8', + ); + fs.chmodSync(scriptPath, 0o755); + return scriptPath; +} + +async function importBrowserToolsWithProvider( + browserUseProvider: Record, +) { + vi.resetModules(); + vi.doMock('../container/src/browser-use-provider.js', () => ({ + browserUseProvider, + })); + return await import('../container/src/browser-tools.js'); +} + +afterEach(() => { + vi.unstubAllEnvs(); + vi.restoreAllMocks(); + vi.resetModules(); + vi.doUnmock('../container/src/browser-use-provider.js'); + if (tempRoot) { + fs.rmSync(tempRoot, { recursive: true, force: true }); + tempRoot = ''; + } +}); + +test('browser_navigate keeps the local browser path by default when Browser Use is configured', async () => { + tempRoot = fs.mkdtempSync( + path.join(os.tmpdir(), 'hybridclaw-browser-navigate-local-'), + ); + const logPath = path.join(tempRoot, 'navigate-log.jsonl'); + const ensureCdpSession = vi.fn(); + vi.stubEnv('HYBRIDCLAW_AGENT_WORKSPACE_ROOT', tempRoot); + vi.stubEnv( + 'AGENT_BROWSER_BIN', + createAgentBrowserNavigateStub(tempRoot, logPath), + ); + + const { executeBrowserTool } = await importBrowserToolsWithProvider({ + isEnabled: () => true, + shouldUseCloudCdp: () => false, + ensureCdpSession, + closeLocalSession: async () => ({ warnings: [], artifacts: [] }), + getTrackedSessionIds: () => [], + getLatestRecordingArtifacts: () => [], + }); + + const output = await executeBrowserTool( + 'browser_navigate', + { url: 'https://example.com' }, + 'session-1', + ); + const parsed = JSON.parse(output) as { + success: boolean; + execution_strategy: string; + }; + const commands = fs + .readFileSync(logPath, 'utf-8') + .trim() + .split('\n') + .map( + (line) => + JSON.parse(line) as { + command: string; + commandArgs: string[]; + rawArgs: string[]; + }, + ); + const openCommand = commands.find((entry) => entry.command === 'open'); + + expect(parsed.success).toBe(true); + expect(parsed.execution_strategy).toBe('local-cdp'); + expect(ensureCdpSession).not.toHaveBeenCalled(); + expect(openCommand?.commandArgs).not.toContain('--cdp'); +}); + +test('browser_navigate uses cloud CDP only when the Browser Use session explicitly opts in', async () => { + tempRoot = fs.mkdtempSync( + path.join(os.tmpdir(), 'hybridclaw-browser-navigate-cloud-'), + ); + const logPath = path.join(tempRoot, 'navigate-log.jsonl'); + const ensureCdpSession = vi.fn(async () => ({ + id: 'browser-123', + cdpUrl: 'wss://browser-use.example/cdp/browser-123', + liveUrl: 'https://browser-use.example/live/browser-123', + enableRecording: false, + })); + vi.stubEnv('HYBRIDCLAW_AGENT_WORKSPACE_ROOT', tempRoot); + vi.stubEnv( + 'AGENT_BROWSER_BIN', + createAgentBrowserNavigateStub(tempRoot, logPath), + ); + + const { executeBrowserTool } = await importBrowserToolsWithProvider({ + isEnabled: () => true, + shouldUseCloudCdp: () => true, + ensureCdpSession, + closeLocalSession: async () => ({ warnings: [], artifacts: [] }), + getTrackedSessionIds: () => [], + getLatestRecordingArtifacts: () => [], + }); + + const output = await executeBrowserTool( + 'browser_navigate', + { url: 'https://example.com' }, + 'session-1', + ); + const parsed = JSON.parse(output) as { + success: boolean; + execution_strategy: string; + cloud_session_id?: string; + }; + const commands = fs + .readFileSync(logPath, 'utf-8') + .trim() + .split('\n') + .map( + (line) => + JSON.parse(line) as { + command: string; + commandArgs: string[]; + rawArgs: string[]; + }, + ); + const openCommand = commands.find((entry) => entry.command === 'open'); + + expect(parsed.success).toBe(true); + expect(parsed.execution_strategy).toBe('cloud-cdp'); + expect(parsed.cloud_session_id).toBe('browser-123'); + expect(ensureCdpSession).toHaveBeenCalledWith({ + localSessionId: 'session-1', + proxyCountry: undefined, + timeoutMinutes: undefined, + }); + expect(openCommand?.rawArgs).toContain('--cdp'); + expect(openCommand?.rawArgs).toContain( + 'wss://browser-use.example/cdp/browser-123', + ); +}); diff --git a/tests/browser-tool-schema.test.ts b/tests/browser-tool-schema.test.ts index c04b2634..a2af3b6a 100644 --- a/tests/browser-tool-schema.test.ts +++ b/tests/browser-tool-schema.test.ts @@ -23,3 +23,45 @@ test('browser_click schema avoids unsupported top-level combinators', () => { expect(parameters.allOf).toBeUndefined(); expect(parameters.not).toBeUndefined(); }); + +test('browser_agent_task exposes a plain object schema for structured output', () => { + const browserAgentTask = BROWSER_TOOL_DEFINITIONS.find( + (entry) => + entry.type === 'function' && entry.function.name === 'browser_agent_task', + ); + expect(browserAgentTask).toBeDefined(); + + const outputSchema = browserAgentTask?.function.parameters.properties + .output_schema as { + type?: string | string[]; + anyOf?: unknown; + oneOf?: unknown; + allOf?: unknown; + }; + const artifactPaths = browserAgentTask?.function.parameters.properties + .artifact_paths as { + type?: string | string[]; + items?: { type?: string | string[] }; + }; + + expect(outputSchema.type).toBe('object'); + expect(outputSchema.anyOf).toBeUndefined(); + expect(outputSchema.oneOf).toBeUndefined(); + expect(outputSchema.allOf).toBeUndefined(); + expect(artifactPaths.type).toBe('array'); + expect(artifactPaths.items?.type).toBe('string'); +}); + +test('browser_get_recording exposes an empty object schema', () => { + const browserGetRecording = BROWSER_TOOL_DEFINITIONS.find( + (entry) => + entry.type === 'function' && + entry.function.name === 'browser_get_recording', + ); + expect(browserGetRecording).toBeDefined(); + expect(browserGetRecording?.function.parameters).toEqual({ + type: 'object', + properties: {}, + required: [], + }); +}); diff --git a/tests/browser-use-provider.test.ts b/tests/browser-use-provider.test.ts new file mode 100644 index 00000000..09c915ed --- /dev/null +++ b/tests/browser-use-provider.test.ts @@ -0,0 +1,463 @@ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; + +import { afterEach, expect, test, vi } from 'vitest'; + +type MockState = { + browserCreateResult: Record; + browserCreateCalls: Record[]; + browserStopCalls: string[]; + browserStopResult: Record; + profileCreateCalls: Record[]; + profileGetCalls: string[]; + profileListCalls: Record[]; + profileCreateResult: Record; + profileListItems: Record[]; + workspaceCreateCalls: Record[]; + workspaceCreateResult: Record; + workspaceGetCalls: string[]; + workspaceUploadCalls: Array<{ + workspaceId: string; + filePath: string; + options: Record; + }>; + workspaceDownloadCalls: Array<{ + workspaceId: string; + options: Record; + }>; + sessionGetCalls: string[]; + sessionGetResult: Record | null; + sessionStopCalls: Array<{ sessionId: string; body: Record }>; + sessionStopResult: Record; + sessionWaitForRecordingCalls: string[]; + sessionWaitForRecordingResult: string[]; + runCalls: Array<{ task: string; options: Record }>; + runMessages: Record[]; + runResult: Record; +}; + +let mockState: MockState; +let tempRoot = ''; + +function resetMockState(): void { + mockState = { + browserCreateResult: { + id: 'browser-1', + status: 'active', + cdpUrl: 'wss://browser-use.example/cdp', + liveUrl: 'https://browser-use.example/live/browser-1', + timeoutAt: '2026-04-13T10:00:00.000Z', + }, + browserCreateCalls: [], + browserStopCalls: [], + browserStopResult: { + id: 'browser-1', + status: 'stopped', + recordingUrl: null, + }, + profileCreateCalls: [], + profileGetCalls: [], + profileListCalls: [], + profileCreateResult: { + id: 'profile-created', + name: 'Saved profile', + userId: 'user-1', + createdAt: '2026-04-13T09:00:00.000Z', + updatedAt: '2026-04-13T09:00:00.000Z', + }, + profileListItems: [], + workspaceCreateCalls: [], + workspaceCreateResult: { + id: 'workspace-1', + name: 'HybridClaw session', + createdAt: '2026-04-13T09:00:00.000Z', + updatedAt: '2026-04-13T09:00:00.000Z', + }, + workspaceGetCalls: [], + workspaceUploadCalls: [], + workspaceDownloadCalls: [], + sessionGetCalls: [], + sessionGetResult: null, + sessionStopCalls: [], + sessionStopResult: { + id: 'session-1', + status: 'stopped', + recordingUrls: [], + }, + sessionWaitForRecordingCalls: [], + sessionWaitForRecordingResult: [], + runCalls: [], + runMessages: [], + runResult: { + id: 'session-1', + status: 'idle', + output: { ok: true }, + stepCount: 2, + lastStepSummary: 'Finished', + isTaskSuccessful: true, + liveUrl: 'https://browser-use.example/live/session-1', + recordingUrls: [], + profileId: 'profile-created', + workspaceId: 'workspace-1', + llmCostUsd: '0.10', + proxyCostUsd: '0.01', + browserCostUsd: '0.02', + totalCostUsd: '0.13', + totalInputTokens: 120, + totalOutputTokens: 30, + screenshotUrl: 'https://browser-use.example/screens/session-1.png', + }, + }; +} + +function createMockBrowserUseModule() { + class BrowserUse { + browsers = { + create: vi.fn(async (body: Record = {}) => { + mockState.browserCreateCalls.push({ ...body }); + return structuredClone(mockState.browserCreateResult); + }), + stop: vi.fn(async (sessionId: string) => { + mockState.browserStopCalls.push(sessionId); + return structuredClone(mockState.browserStopResult); + }), + }; + + profiles = { + create: vi.fn(async (body: Record = {}) => { + mockState.profileCreateCalls.push({ ...body }); + return structuredClone(mockState.profileCreateResult); + }), + get: vi.fn(async (profileId: string) => { + mockState.profileGetCalls.push(profileId); + return { + ...structuredClone(mockState.profileCreateResult), + id: profileId, + }; + }), + list: vi.fn(async (params: Record = {}) => { + mockState.profileListCalls.push({ ...params }); + return { + items: structuredClone(mockState.profileListItems), + totalItems: mockState.profileListItems.length, + pageNumber: 1, + pageSize: 100, + }; + }), + }; + + workspaces = { + create: vi.fn(async (body: Record = {}) => { + mockState.workspaceCreateCalls.push({ ...body }); + return structuredClone(mockState.workspaceCreateResult); + }), + get: vi.fn(async (workspaceId: string) => { + mockState.workspaceGetCalls.push(workspaceId); + return { + ...structuredClone(mockState.workspaceCreateResult), + id: workspaceId, + }; + }), + upload: vi.fn( + async ( + workspaceId: string, + filePath: string, + options: Record = {}, + ) => { + mockState.workspaceUploadCalls.push({ + workspaceId, + filePath, + options: { ...options }, + }); + return [filePath]; + }, + ), + downloadAll: vi.fn( + async (workspaceId: string, options: Record = {}) => { + mockState.workspaceDownloadCalls.push({ + workspaceId, + options: { ...options }, + }); + const outDir = String(options.to || ''); + if (outDir) { + fs.mkdirSync(outDir, { recursive: true }); + fs.writeFileSync(path.join(outDir, 'downloaded.json'), '{}'); + } + return [path.join(outDir, 'downloaded.json')]; + }, + ), + }; + + sessions = { + get: vi.fn(async (sessionId: string) => { + mockState.sessionGetCalls.push(sessionId); + if (mockState.sessionGetResult) { + return structuredClone(mockState.sessionGetResult); + } + return { + id: sessionId, + status: 'stopped', + }; + }), + stop: vi.fn( + async (sessionId: string, body: Record = {}) => { + mockState.sessionStopCalls.push({ sessionId, body: { ...body } }); + return structuredClone(mockState.sessionStopResult); + }, + ), + waitForRecording: vi.fn(async (sessionId: string) => { + mockState.sessionWaitForRecordingCalls.push(sessionId); + return [...mockState.sessionWaitForRecordingResult]; + }), + }; + + run(task: string, options: Record = {}) { + mockState.runCalls.push({ task, options: { ...options } }); + const result = structuredClone(mockState.runResult); + const messages = structuredClone(mockState.runMessages) as Array< + Record + >; + const run = Promise.resolve(result) as Promise & { + sessionId: string; + result: typeof result; + [Symbol.asyncIterator](): AsyncGenerator< + Record, + void, + unknown + >; + }; + run.sessionId = String(result.id || ''); + run.result = result; + run[Symbol.asyncIterator] = async function* () { + for (const message of messages) { + yield message; + } + }; + return run; + } + } + + return { + BrowserUse, + }; +} + +async function importFreshProvider() { + vi.resetModules(); + vi.doMock('browser-use-sdk/v3', () => createMockBrowserUseModule()); + return await import('../container/src/browser-use-provider.js'); +} + +afterEach(() => { + vi.unstubAllEnvs(); + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + vi.resetModules(); + vi.doUnmock('browser-use-sdk/v3'); + resetMockState(); + if (tempRoot) { + fs.rmSync(tempRoot, { recursive: true, force: true }); + tempRoot = ''; + } +}); + +test('ensureCdpSession creates one cloud browser and reuses it for the same HybridClaw session', async () => { + resetMockState(); + vi.stubEnv('BROWSER_CLOUD_PROVIDER', 'browser-use'); + vi.stubEnv('BROWSER_USE_API_KEY', 'bu-test-key'); + vi.stubEnv('BROWSER_USE_DEFAULT_PROXY_COUNTRY', 'us'); + + const { browserUseProvider } = await importFreshProvider(); + const profile = await browserUseProvider.createProfile({ + localSessionId: 'session-a', + name: 'Authenticated profile', + }); + expect(profile.profile.id).toBe('profile-created'); + + const first = await browserUseProvider.ensureCdpSession({ + localSessionId: 'session-a', + }); + const second = await browserUseProvider.ensureCdpSession({ + localSessionId: 'session-a', + }); + + expect(first.cdpUrl).toBe('wss://browser-use.example/cdp'); + expect(second.id).toBe(first.id); + expect(mockState.browserCreateCalls).toHaveLength(1); + expect(mockState.browserCreateCalls[0]).toMatchObject({ + profileId: 'profile-created', + proxyCountryCode: 'us', + timeout: 30, + enableRecording: false, + }); +}); + +test('runAgentTask syncs artifacts, streams progress, and downloads the recording', async () => { + resetMockState(); + vi.stubEnv('BROWSER_CLOUD_PROVIDER', 'browser-use'); + vi.stubEnv('BROWSER_USE_API_KEY', 'bu-test-key'); + vi.stubEnv('BROWSER_USE_ENABLE_RECORDING', 'true'); + + tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'hybridclaw-browser-use-')); + fs.writeFileSync(path.join(tempRoot, 'input.txt'), 'hello world', 'utf-8'); + fs.writeFileSync(path.join(tempRoot, 'stale.txt'), 'stale data', 'utf-8'); + + mockState.workspaceCreateResult = { + id: 'workspace-123', + name: 'Workspace 123', + createdAt: '2026-04-13T09:00:00.000Z', + updatedAt: '2026-04-13T09:00:00.000Z', + }; + mockState.runResult = { + id: 'session-123', + status: 'idle', + output: { title: 'Example' }, + stepCount: 3, + lastStepSummary: 'Completed extraction', + isTaskSuccessful: true, + liveUrl: 'https://browser-use.example/live/session-123', + recordingUrls: [], + profileId: 'profile-created', + workspaceId: 'workspace-123', + llmCostUsd: '0.12', + proxyCostUsd: '0.02', + browserCostUsd: '0.03', + totalCostUsd: '0.17', + totalInputTokens: 345, + totalOutputTokens: 67, + screenshotUrl: 'https://browser-use.example/screens/session-123.png', + }; + mockState.runMessages = [ + { + id: 'msg-1', + sessionId: 'session-123', + role: 'ai', + data: 'Opening example.com', + type: 'planning', + summary: 'Opening example.com', + hidden: false, + createdAt: '2026-04-13T09:01:00.000Z', + }, + { + id: 'msg-2', + sessionId: 'session-123', + role: 'ai', + data: 'Extracting fields', + type: 'browser_action', + summary: 'Extracting fields', + hidden: false, + createdAt: '2026-04-13T09:01:05.000Z', + }, + ]; + mockState.sessionWaitForRecordingResult = [ + 'https://browser-use.example/recordings/session-123.mp4', + ]; + + vi.stubGlobal( + 'fetch', + vi.fn(async () => new Response(new Uint8Array([1, 2, 3]), { status: 200 })), + ); + + const progress = vi.fn(); + const { browserUseProvider } = await importFreshProvider(); + const result = await browserUseProvider.runAgentTask({ + localSessionId: 'session-b', + task: 'Extract the page title and return it as structured JSON.', + outputSchema: { + type: 'object', + properties: { + title: { type: 'string' }, + }, + }, + artifactPaths: ['input.txt'], + artifactRoot: tempRoot, + progress, + }); + + expect(mockState.workspaceCreateCalls).toHaveLength(1); + expect(mockState.workspaceUploadCalls).toHaveLength(1); + expect(mockState.workspaceUploadCalls[0]).toMatchObject({ + workspaceId: 'workspace-123', + filePath: path.join(tempRoot, 'input.txt'), + }); + expect(mockState.runCalls[0]).toMatchObject({ + task: 'Extract the page title and return it as structured JSON.', + }); + expect(mockState.runCalls[0]?.options).toMatchObject({ + workspaceId: 'workspace-123', + outputSchema: { + type: 'object', + properties: { + title: { type: 'string' }, + }, + }, + enableRecording: true, + cacheScript: true, + autoHeal: true, + maxCostUsd: 1, + }); + expect(progress).toHaveBeenCalledWith( + 'Opening example.com', + expect.objectContaining({ id: 'msg-1' }), + ); + expect(progress).toHaveBeenCalledWith( + 'Extracting fields', + expect.objectContaining({ id: 'msg-2' }), + ); + expect(mockState.workspaceDownloadCalls).toHaveLength(1); + expect(mockState.workspaceDownloadCalls[0]).toMatchObject({ + workspaceId: 'workspace-123', + options: { + to: path.join( + tempRoot, + 'browser-use-workspaces', + 'session-b', + 'session-123', + ), + }, + }); + expect(mockState.sessionWaitForRecordingCalls).toEqual(['session-123']); + expect(result.totalCostUsd).toBe('0.17'); + expect(result.recordingPaths).toHaveLength(1); + expect(result.workspaceArtifactPaths).toEqual([ + path.join( + tempRoot, + 'browser-use-workspaces', + 'session-b', + 'session-123', + 'downloaded.json', + ), + ]); + expect(fs.existsSync(result.recordingPaths[0] || '')).toBe(true); + expect( + browserUseProvider.getLatestRecordingArtifacts('session-b'), + ).toMatchObject([ + { + path: result.recordingPaths[0], + mimeType: 'video/mp4', + }, + ]); +}); + +test('closeLocalSession stops tracked cloud sessions and clears tracked ids', async () => { + resetMockState(); + vi.stubEnv('BROWSER_CLOUD_PROVIDER', 'browser-use'); + vi.stubEnv('BROWSER_USE_API_KEY', 'bu-test-key'); + + tempRoot = fs.mkdtempSync( + path.join(os.tmpdir(), 'hybridclaw-browser-close-'), + ); + + const { browserUseProvider } = await importFreshProvider(); + await browserUseProvider.ensureCdpSession({ + localSessionId: 'session-c', + }); + + expect(browserUseProvider.getTrackedSessionIds()).toEqual(['session-c']); + + await browserUseProvider.closeLocalSession('session-c', tempRoot); + + expect(mockState.browserStopCalls).toEqual(['browser-1']); + expect(browserUseProvider.getTrackedSessionIds()).toEqual([]); +}); diff --git a/tests/prompt-hooks.tool-summary.test.ts b/tests/prompt-hooks.tool-summary.test.ts index f4a11a4d..cd79dbc3 100644 --- a/tests/prompt-hooks.tool-summary.test.ts +++ b/tests/prompt-hooks.tool-summary.test.ts @@ -17,9 +17,11 @@ test('buildToolsSummary groups the full tool catalog', () => { expect(summary).toContain( '**Files**: `read`, `write`, `edit`, `delete`, `glob`, `grep`', ); - expect(summary).toContain( - '**Browser**: `browser_navigate`, `browser_snapshot`, `browser_click`', - ); + expect(summary).toContain('**Browser**:'); + expect(summary).toContain('`browser_navigate`'); + expect(summary).toContain('`browser_agent_task`'); + expect(summary).toContain('`browser_snapshot`'); + expect(summary).toContain('`browser_click`'); expect(summary).toContain( '**Web**: `web_search`, `web_fetch`, `web_extract`, `http_request`', );