From 7d645cd91199dd87d77cd95cf65ecd598e7a031c Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 5 Dec 2025 21:26:26 -0800 Subject: [PATCH 01/13] ad the action cache list and return on each page --- src/agent/index.ts | 26 ++++++- src/agent/shared/action-cache.ts | 116 +++++++++++++++++++++++++++++++ src/agent/tools/agent.ts | 26 ++++++- src/types/agent/types.ts | 24 +++++++ src/types/index.ts | 4 ++ 5 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 src/agent/shared/action-cache.ts diff --git a/src/agent/index.ts b/src/agent/index.ts index 33a5a39..8e034b0 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -12,6 +12,7 @@ import { ActionContext, ActionType, AgentActionDefinition, + ActionCacheOutput, endTaskStatuses, Task, TaskOutput, @@ -71,6 +72,7 @@ export class HyperAgent { private browserProviderType: T; private actions: Array = [...DEFAULT_ACTIONS]; private cdpActionsEnabled: boolean; + private actionCacheByTaskId: Record = {}; public browser: Browser | null = null; public context: BrowserContext | null = null; @@ -248,6 +250,15 @@ export class HyperAgent { delete this._variables[key]; } + public getActionCache(taskId: string): ActionCacheOutput | null { + const cache = this.actionCacheByTaskId[taskId]; + if (!cache) return null; + return { + ...cache, + steps: [...cache.steps], + }; + } + /** * Get all pages in the context * @returns Array of HyperPage objects @@ -352,6 +363,7 @@ export class HyperAgent { throw new HyperagentError(`Task ${taskId} not found`); } return { + id: taskId, getStatus: () => taskState.status, pause: () => { if (taskState.status === TaskStatus.RUNNING) { @@ -432,7 +444,12 @@ export class HyperAgent { taskState, mergedParams ) - .then(() => cleanup()) + .then((result) => { + if (result.actionCache) { + this.actionCacheByTaskId[taskId] = result.actionCache; + } + cleanup(); + }) .catch((error: Error) => { cleanup(); // Retrieve the correct state to update @@ -510,6 +527,9 @@ export class HyperAgent { mergedParams ); this.context?.off("page", onPage); + if (result.actionCache) { + this.actionCacheByTaskId[taskId] = result.actionCache; + } return result; } catch (error) { this.context?.off("page", onPage); @@ -766,6 +786,7 @@ export class HyperAgent { pageOrGetter: Page | (() => Page), _params?: TaskParams ): Promise { + const taskId = uuidv4(); const actionStart = performance.now(); const startTime = new Date().toISOString(); if (this.debug) { @@ -930,6 +951,7 @@ export class HyperAgent { logPerf(this.debug, "[Perf][executeSingleAction] total", actionStart); return { + taskId, status: TaskStatus.COMPLETED, steps: [], output: `Successfully executed: ${instruction}`, @@ -1236,6 +1258,8 @@ export class HyperAgent { return executeSingleActionWithRetry(instruction, params); }; + hyperPage.getActionCache = (taskId: string) => this.getActionCache(taskId); + // aiAsync tasks run in background, so we just use the current scope start point. // The task itself has internal auto-following logic (from executeTaskAsync implementation). hyperPage.aiAsync = (task: string, params?: TaskParams) => diff --git a/src/agent/shared/action-cache.ts b/src/agent/shared/action-cache.ts new file mode 100644 index 0000000..e72a684 --- /dev/null +++ b/src/agent/shared/action-cache.ts @@ -0,0 +1,116 @@ +import { ActionOutput, ActionType } from "@/types"; +import { ActionCacheEntry } from "@/types/agent/types"; +import { + A11yDOMState, + asEncodedId, +} from "@/context-providers/a11y-dom/types"; + +const TEXT_NODE_SUFFIX = /\/text\(\)(\[\d+\])?$/iu; + +const isString = (value: unknown): value is string => + typeof value === "string"; + +const isStringArray = (value: unknown): value is string[] => + Array.isArray(value) && value.every((item) => typeof item === "string"); + +const normalizeXPath = (raw?: string | null): string | null => { + if (!raw) { + return null; + } + return raw.replace(TEXT_NODE_SUFFIX, ""); +}; + +const extractInstruction = (action: ActionType): string => { + const params = action.params as Record; + if (isString(params.instruction)) { + return params.instruction; + } + return action.type; +}; + +const extractElementId = (action: ActionType): string | null => { + const params = action.params as Record; + if (isString(params.elementId)) { + return params.elementId; + } + return null; +}; + +const extractMethod = (action: ActionType): string | null => { + const params = action.params as Record; + if (isString(params.method)) { + return params.method; + } + return null; +}; + +const extractArguments = (action: ActionType): string[] => { + const params = action.params as Record; + if (isStringArray(params.arguments)) { + return params.arguments; + } + return []; +}; + +const extractFrameIndex = (elementId: string | null): number | null => { + if (!elementId) { + return null; + } + const encodedId = asEncodedId(elementId); + if (!encodedId) { + return null; + } + const [framePart] = encodedId.split("-"); + const parsed = Number.parseInt(framePart, 10); + return Number.isNaN(parsed) ? null : parsed; +}; + +const extractXPathFromDebug = (actionOutput: ActionOutput): string | null => { + const debug = actionOutput.debug as Record | undefined; + if (!debug || typeof debug !== "object") { + return null; + } + + const metadata = debug.elementMetadata as Record | undefined; + if (metadata && isString(metadata.xpath)) { + return metadata.xpath; + } + return null; +}; + +export const buildActionCacheEntry = ({ + stepIndex, + action, + actionOutput, + domState, +}: { + stepIndex: number; + action: ActionType; + actionOutput: ActionOutput; + domState: A11yDOMState; +}): ActionCacheEntry => { + const instruction = extractInstruction(action); + const elementId = extractElementId(action); + const method = extractMethod(action); + const args = extractArguments(action); + const encodedId = elementId ? asEncodedId(elementId) : undefined; + const frameIndex = extractFrameIndex(elementId); + + const xpathFromDom = encodedId ? domState.xpathMap?.[encodedId] || null : null; + const xpath = normalizeXPath( + xpathFromDom || extractXPathFromDebug(actionOutput) + ); + + return { + stepIndex, + instruction, + elementId, + method, + arguments: args, + frameIndex, + xpath, + actionType: action.type, + success: actionOutput.success, + message: actionOutput.message, + }; +}; diff --git a/src/agent/tools/agent.ts b/src/agent/tools/agent.ts index 5dc96bd..f5109ac 100644 --- a/src/agent/tools/agent.ts +++ b/src/agent/tools/agent.ts @@ -1,4 +1,4 @@ -import { AgentStep } from "@/types/agent/types"; +import { ActionCacheOutput, AgentStep } from "@/types/agent/types"; import fs from "fs"; import { performance } from "perf_hooks"; @@ -39,6 +39,7 @@ import { ActionNotFoundError } from "../actions"; import { AgentCtx } from "./types"; import { HyperAgentMessage } from "@/llm/types"; import { Jimp } from "jimp"; +import { buildActionCacheEntry } from "../shared/action-cache"; // DomChunkAggregator logic moved to shared/dom-capture.ts @@ -267,6 +268,7 @@ export const runAgentTask = async ( const MAX_CONSECUTIVE_FAILURES_OR_WAITS = 5; let lastOverlayKey: string | null = null; let lastScreenshotBase64: string | undefined; + const actionCacheSteps: ActionCacheOutput["steps"] = []; try { // Initialize context at the start of the task @@ -560,6 +562,14 @@ export const runAgentTask = async ( markDomSnapshotDirty(page); } + const actionCacheEntry = buildActionCacheEntry({ + stepIndex: currStep, + action, + actionOutput, + domState, + }); + actionCacheSteps.push(actionCacheEntry); + // Check action result and handle retry logic if (action.type === "wait") { // Wait action - increment counter @@ -659,10 +669,24 @@ export const runAgentTask = async ( cleanupDomListeners(page); } + const actionCache: ActionCacheOutput = { + taskId, + createdAt: new Date().toISOString(), + status: taskState.status, + steps: actionCacheSteps, + }; + fs.mkdirSync(debugDir, { recursive: true }); + fs.writeFileSync( + `${debugDir}/action-cache.json`, + JSON.stringify(actionCache, null, 2) + ); + const taskOutput: TaskOutput = { + taskId, status: taskState.status, steps: taskState.steps, output, + actionCache, }; if (ctx.debug) { fs.writeFileSync( diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index d220c30..75b2f92 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -28,6 +28,26 @@ export interface AgentStep { actionOutput: ActionOutput; } +export interface ActionCacheEntry { + stepIndex: number; + instruction: string; + elementId: string | null; + method: string | null; + arguments: string[]; + frameIndex: number | null; + xpath: string | null; + actionType: string; + success: boolean; + message: string; +} + +export interface ActionCacheOutput { + taskId: string; + createdAt: string; + status?: TaskStatus; + steps: ActionCacheEntry[]; +} + export interface TaskParams { maxSteps?: number; debugDir?: string; @@ -41,12 +61,15 @@ export interface TaskParams { } export interface TaskOutput { + taskId: string; status?: TaskStatus; steps: AgentStep[]; output?: string; + actionCache?: ActionCacheOutput; } export interface Task { + id: string; getStatus: () => TaskStatus; pause: () => TaskStatus; resume: () => TaskStatus; @@ -112,4 +135,5 @@ export interface HyperPage extends Page { outputSchema?: T, params?: Omit ): Promise ? z.infer : string>; + getActionCache: (taskId: string) => ActionCacheOutput | null; } diff --git a/src/types/index.ts b/src/types/index.ts index 9924f9a..bb5a8ac 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -12,6 +12,8 @@ import { AgentOutputFn, AgentOutput, AgentStep, + ActionCacheEntry, + ActionCacheOutput, TaskParams, TaskOutput, Task, @@ -39,6 +41,8 @@ export { AgentOutputFn, AgentOutput, AgentStep, + ActionCacheEntry, + ActionCacheOutput, TaskParams, TaskOutput, Task, From 697bbe0388fd0472d1214c60fe6c7eb592d7e2fd Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 5 Dec 2025 22:02:20 -0800 Subject: [PATCH 02/13] executeScript from json --- src/agent/index.ts | 167 ++++++++++++++++++++++++++++ src/agent/shared/action-cache.ts | 1 + src/agent/shared/element-locator.ts | 57 +++++++++- src/types/agent/types.ts | 27 +++++ src/types/index.ts | 6 + 5 files changed, 257 insertions(+), 1 deletion(-) diff --git a/src/agent/index.ts b/src/agent/index.ts index 8e034b0..973a070 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -13,6 +13,8 @@ import { ActionType, AgentActionDefinition, ActionCacheOutput, + ActionCacheReplayResult, + RunFromActionCacheParams, endTaskStatuses, Task, TaskOutput, @@ -20,6 +22,7 @@ import { TaskState, TaskStatus, } from "@/types"; +import fs from "fs"; import { CompleteActionDefinition, DEFAULT_ACTIONS, @@ -49,6 +52,9 @@ import { markDomSnapshotDirty } from "@/context-providers/a11y-dom/dom-cache"; import { setDebugOptions } from "@/debug/options"; import { initializeRuntimeContext } from "./shared/runtime-context"; import { performAction } from "./actions/shared/perform-action"; +import { captureDOMState } from "./shared/dom-capture"; +import { getLocatorFromXPath } from "./shared/element-locator"; +import { executePlaywrightMethod } from "./shared/execute-playwright-method"; export class HyperAgent { // aiAction configuration constants @@ -538,6 +544,164 @@ export class HyperAgent { } } + public async runFromActionCache( + cache: ActionCacheOutput, + pageOrGetter: Page | (() => Page), + params?: RunFromActionCacheParams + ): Promise { + const replayId = uuidv4(); + const maxXPathRetries = params?.maxXPathRetries ?? 3; + const debug = params?.debug ?? this.debug; + const getPage = () => + typeof pageOrGetter === "function" ? pageOrGetter() : pageOrGetter; + + const stepsResult: ActionCacheReplayResult["steps"] = []; + let replayStatus: TaskStatus.COMPLETED | TaskStatus.FAILED = + TaskStatus.COMPLETED; + + for (const step of [...cache.steps].sort( + (a, b) => a.stepIndex - b.stepIndex + )) { + const page = getPage(); + + await waitForSettledDOM(page); + const domState = await captureDOMState(page, { + useCache: false, + debug, + enableVisualMode: false, + }); + + const stepResult = { + stepIndex: step.stepIndex, + actionType: step.actionType, + usedXPath: false, + fallbackUsed: false, + retries: 0, + success: false, + message: "", + }; + + if (step.actionType === "goToUrl") { + const urlFromArgs = + (step.arguments && step.arguments[0]) || undefined; + const urlFromMessage = + typeof step.message === "string" + ? (step.message.match(/https?:\/\/\S+/)?.[0] as string | undefined) + : undefined; + const url = + (step.actionParams?.url as string | undefined) || + urlFromArgs || + urlFromMessage; + if (typeof url === "string" && url.length > 0) { + try { + await page.goto(url); + await waitForSettledDOM(page); + stepResult.success = true; + stepResult.message = `Navigated to ${url}`; + stepsResult.push(stepResult); + markDomSnapshotDirty(page); + continue; + } catch (error) { + stepResult.success = false; + stepResult.message = + error instanceof Error ? error.message : String(error); + replayStatus = TaskStatus.FAILED; + stepsResult.push(stepResult); + break; + } + } else { + stepResult.success = false; + stepResult.message = "No URL found in action cache for goToUrl"; + replayStatus = TaskStatus.FAILED; + stepsResult.push(stepResult); + break; + } + } + + if (step.xpath && step.method) { + for (let attempt = 0; attempt < maxXPathRetries; attempt++) { + stepResult.retries = attempt + 1; + try { + const locator = await getLocatorFromXPath( + step.xpath, + page, + step.frameIndex ?? undefined, + domState.frameMap, + debug + ); + await executePlaywrightMethod( + step.method, + step.arguments ?? [], + locator, + { + clickTimeout: 3500, + debug, + } + ); + stepResult.usedXPath = true; + stepResult.success = true; + stepResult.message = "Executed via cached xpath"; + break; + } catch (error) { + stepResult.message = + error instanceof Error ? error.message : String(error); + await waitForSettledDOM(page); + if (attempt >= maxXPathRetries - 1) { + stepResult.success = false; + } + } + } + } + + if (!stepResult.success) { + stepResult.fallbackUsed = true; + try { + const fallbackResult = await this.executeSingleAction( + step.instruction, + page, + { maxSteps: 1 } + ); + stepResult.success = fallbackResult.status === TaskStatus.COMPLETED; + stepResult.message = + fallbackResult.output || + (fallbackResult.status ?? "").toString() || + "Fallback completed"; + } catch (error) { + stepResult.success = false; + stepResult.message = + error instanceof Error ? error.message : String(error); + } + } + + await waitForSettledDOM(page); + markDomSnapshotDirty(page); + stepsResult.push(stepResult); + + if (!stepResult.success) { + replayStatus = TaskStatus.FAILED; + break; + } + } + + const replayResult: ActionCacheReplayResult = { + replayId, + sourceTaskId: cache.taskId, + steps: stepsResult, + status: replayStatus, + }; + + if (debug) { + const debugDir = "debug/action-cache"; + fs.mkdirSync(debugDir, { recursive: true }); + fs.writeFileSync( + `${debugDir}/replay-${replayId}.json`, + JSON.stringify(replayResult, null, 2) + ); + } + + return replayResult; + } + /** * Find element with retry logic * Retries element finding with DOM refetch until element is found or max retries reached @@ -1260,6 +1424,9 @@ export class HyperAgent { hyperPage.getActionCache = (taskId: string) => this.getActionCache(taskId); + hyperPage.runFromActionCache = (cache, params) => + this.runFromActionCache(cache, getActivePage, params); + // aiAsync tasks run in background, so we just use the current scope start point. // The task itself has internal auto-following logic (from executeTaskAsync implementation). hyperPage.aiAsync = (task: string, params?: TaskParams) => diff --git a/src/agent/shared/action-cache.ts b/src/agent/shared/action-cache.ts index e72a684..beccfcf 100644 --- a/src/agent/shared/action-cache.ts +++ b/src/agent/shared/action-cache.ts @@ -107,6 +107,7 @@ export const buildActionCacheEntry = ({ elementId, method, arguments: args, + actionParams: (action.params as Record) || undefined, frameIndex, xpath, actionType: action.type, diff --git a/src/agent/shared/element-locator.ts b/src/agent/shared/element-locator.ts index c43799d..5a21ac8 100644 --- a/src/agent/shared/element-locator.ts +++ b/src/agent/shared/element-locator.ts @@ -4,7 +4,11 @@ */ import type { Page } from "playwright-core"; -import { toEncodedId, type IframeInfo, resolveFrameByXPath } from "../../context-providers/a11y-dom"; +import { + toEncodedId, + type IframeInfo, + resolveFrameByXPath, +} from "../../context-providers/a11y-dom"; import { HyperagentError } from "../error"; /** @@ -123,3 +127,54 @@ export async function getElementLocator( return { locator: targetFrame.locator(`xpath=${xpath}`), xpath }; } + +export async function getLocatorFromXPath( + xpath: string, + page: Page, + frameIndex?: number | null, + frameMap?: Map, + debug = false +): Promise> { + const targetFrameIndex = frameIndex ?? 0; + + if (targetFrameIndex === 0) { + return page.locator(`xpath=${xpath}`); + } + + if (!frameMap || !frameMap.has(targetFrameIndex)) { + const errorMsg = `Frame metadata not found for frame ${targetFrameIndex}`; + if (debug) { + console.error(`[getLocatorFromXPath] ${errorMsg}`); + } + throw new HyperagentError(errorMsg, 404); + } + + const targetFrame = + (await resolveFrameByXPath(page, frameMap, targetFrameIndex)) ?? undefined; + + if (!targetFrame) { + const errorMsg = `Could not resolve frame for xpath ${xpath} (frameIndex: ${targetFrameIndex})`; + if (debug) { + console.error(`[getLocatorFromXPath] ${errorMsg}`); + } + throw new HyperagentError(errorMsg, 404); + } + + if (debug) { + console.log( + `[getLocatorFromXPath] Using frame ${targetFrameIndex}: ${targetFrame.url()}` + ); + } + + try { + await targetFrame.waitForLoadState("domcontentloaded", { timeout: 5000 }); + } catch { + if (debug) { + console.warn( + `[getLocatorFromXPath] Timeout waiting for iframe to load (frame ${targetFrameIndex}), proceeding anyway` + ); + } + } + + return targetFrame.locator(`xpath=${xpath}`); +} diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index 75b2f92..0f84e21 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -34,6 +34,7 @@ export interface ActionCacheEntry { elementId: string | null; method: string | null; arguments: string[]; + actionParams?: Record; frameIndex: number | null; xpath: string | null; actionType: string; @@ -48,6 +49,28 @@ export interface ActionCacheOutput { steps: ActionCacheEntry[]; } +export interface ActionCacheReplayStepResult { + stepIndex: number; + actionType: string; + usedXPath: boolean; + fallbackUsed: boolean; + retries: number; + success: boolean; + message: string; +} + +export interface ActionCacheReplayResult { + replayId: string; + sourceTaskId: string; + steps: ActionCacheReplayStepResult[]; + status: TaskStatus.COMPLETED | TaskStatus.FAILED; +} + +export interface RunFromActionCacheParams { + maxXPathRetries?: number; + debug?: boolean; +} + export interface TaskParams { maxSteps?: number; debugDir?: string; @@ -136,4 +159,8 @@ export interface HyperPage extends Page { params?: Omit ): Promise ? z.infer : string>; getActionCache: (taskId: string) => ActionCacheOutput | null; + runFromActionCache: ( + cache: ActionCacheOutput, + params?: RunFromActionCacheParams + ) => Promise; } diff --git a/src/types/index.ts b/src/types/index.ts index bb5a8ac..3938058 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -14,6 +14,9 @@ import { AgentStep, ActionCacheEntry, ActionCacheOutput, + ActionCacheReplayResult, + ActionCacheReplayStepResult, + RunFromActionCacheParams, TaskParams, TaskOutput, Task, @@ -43,6 +46,9 @@ export { AgentStep, ActionCacheEntry, ActionCacheOutput, + ActionCacheReplayResult, + ActionCacheReplayStepResult, + RunFromActionCacheParams, TaskParams, TaskOutput, Task, From 1345bb397b927322fd3927e322e03c8d414adcc6 Mon Sep 17 00:00:00 2001 From: Devin Date: Fri, 5 Dec 2025 22:56:07 -0800 Subject: [PATCH 03/13] almost working action-cache --- src/agent/index.ts | 287 +++++++++++++++---------- src/agent/shared/element-locator.ts | 51 ----- src/agent/shared/xpath-cdp-resolver.ts | 101 +++++++++ src/types/agent/types.ts | 14 ++ src/types/index.ts | 1 + 5 files changed, 292 insertions(+), 162 deletions(-) create mode 100644 src/agent/shared/xpath-cdp-resolver.ts diff --git a/src/agent/index.ts b/src/agent/index.ts index 973a070..205ce1c 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -53,8 +53,9 @@ import { setDebugOptions } from "@/debug/options"; import { initializeRuntimeContext } from "./shared/runtime-context"; import { performAction } from "./actions/shared/perform-action"; import { captureDOMState } from "./shared/dom-capture"; -import { getLocatorFromXPath } from "./shared/element-locator"; import { executePlaywrightMethod } from "./shared/execute-playwright-method"; +import { resolveXPathWithCDP } from "./shared/xpath-cdp-resolver"; +import { ReplayStepMeta } from "@/types/agent/types"; export class HyperAgent { // aiAction configuration constants @@ -563,121 +564,31 @@ export class HyperAgent { (a, b) => a.stepIndex - b.stepIndex )) { const page = getPage(); - - await waitForSettledDOM(page); - const domState = await captureDOMState(page, { - useCache: false, - debug, - enableVisualMode: false, + const hyperPage = page as HyperPage; + const result = await hyperPage.perform(step.instruction, { + cachedAction: step, + maxSteps: maxXPathRetries, }); - const stepResult = { + const meta = result.replayStepMeta; + const success = result.status === TaskStatus.COMPLETED; + + stepsResult.push({ stepIndex: step.stepIndex, actionType: step.actionType, - usedXPath: false, - fallbackUsed: false, - retries: 0, - success: false, - message: "", - }; - - if (step.actionType === "goToUrl") { - const urlFromArgs = - (step.arguments && step.arguments[0]) || undefined; - const urlFromMessage = - typeof step.message === "string" - ? (step.message.match(/https?:\/\/\S+/)?.[0] as string | undefined) - : undefined; - const url = - (step.actionParams?.url as string | undefined) || - urlFromArgs || - urlFromMessage; - if (typeof url === "string" && url.length > 0) { - try { - await page.goto(url); - await waitForSettledDOM(page); - stepResult.success = true; - stepResult.message = `Navigated to ${url}`; - stepsResult.push(stepResult); - markDomSnapshotDirty(page); - continue; - } catch (error) { - stepResult.success = false; - stepResult.message = - error instanceof Error ? error.message : String(error); - replayStatus = TaskStatus.FAILED; - stepsResult.push(stepResult); - break; - } - } else { - stepResult.success = false; - stepResult.message = "No URL found in action cache for goToUrl"; - replayStatus = TaskStatus.FAILED; - stepsResult.push(stepResult); - break; - } - } - - if (step.xpath && step.method) { - for (let attempt = 0; attempt < maxXPathRetries; attempt++) { - stepResult.retries = attempt + 1; - try { - const locator = await getLocatorFromXPath( - step.xpath, - page, - step.frameIndex ?? undefined, - domState.frameMap, - debug - ); - await executePlaywrightMethod( - step.method, - step.arguments ?? [], - locator, - { - clickTimeout: 3500, - debug, - } - ); - stepResult.usedXPath = true; - stepResult.success = true; - stepResult.message = "Executed via cached xpath"; - break; - } catch (error) { - stepResult.message = - error instanceof Error ? error.message : String(error); - await waitForSettledDOM(page); - if (attempt >= maxXPathRetries - 1) { - stepResult.success = false; - } - } - } - } - - if (!stepResult.success) { - stepResult.fallbackUsed = true; - try { - const fallbackResult = await this.executeSingleAction( - step.instruction, - page, - { maxSteps: 1 } - ); - stepResult.success = fallbackResult.status === TaskStatus.COMPLETED; - stepResult.message = - fallbackResult.output || - (fallbackResult.status ?? "").toString() || - "Fallback completed"; - } catch (error) { - stepResult.success = false; - stepResult.message = - error instanceof Error ? error.message : String(error); - } - } - - await waitForSettledDOM(page); - markDomSnapshotDirty(page); - stepsResult.push(stepResult); + usedXPath: meta?.usedCachedAction ?? false, + fallbackUsed: meta?.fallbackUsed ?? false, + cachedXPath: meta?.cachedXPath ?? null, + fallbackXPath: meta?.fallbackXPath ?? null, + fallbackElementId: meta?.fallbackElementId ?? null, + retries: meta?.retries ?? 0, + success, + message: + result.output || + (success ? "Completed" : "Failed to execute cached action"), + }); - if (!stepResult.success) { + if (!success) { replayStatus = TaskStatus.FAILED; break; } @@ -963,8 +874,154 @@ export class HyperAgent { let domState: A11yDOMState | null = null; let elementMap: Map | null = null; + const replayStepMeta: ReplayStepMeta | undefined = _params?.cachedAction + ? { + usedCachedAction: false, + fallbackUsed: false, + retries: 0, + cachedXPath: _params.cachedAction.xpath ?? null, + fallbackXPath: null, + fallbackElementId: null, + } + : undefined; try { + const cachedAction = _params?.cachedAction; + + // Handle cached goToUrl directly + if (cachedAction && cachedAction.actionType === "goToUrl") { + const url = + (cachedAction.actionParams?.url as string | undefined) || + (cachedAction.arguments && cachedAction.arguments[0]); + if (!url || typeof url !== "string") { + throw new HyperagentError( + "Cached goToUrl action missing URL parameter", + 400 + ); + } + await initialPage.goto(url); + await waitForSettledDOM(initialPage); + markDomSnapshotDirty(initialPage); + if (replayStepMeta) { + replayStepMeta.usedCachedAction = true; + } + return { + taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: `Navigated to ${url}`, + replayStepMeta, + }; + } + // Handle cached complete directly without LLM or navigation + if (cachedAction && cachedAction.actionType === "complete") { + if (replayStepMeta) { + replayStepMeta.usedCachedAction = true; + } + return { + taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: "Task Complete", + replayStepMeta, + }; + } + + // Handle cached XPath-first execution if available + if ( + cachedAction && + cachedAction.xpath && + cachedAction.method && + cachedAction.actionType === "actElement" + ) { + await waitForSettledDOM(initialPage); + const cachedDomState = await captureDOMState(initialPage, { + useCache: false, + debug: this.debug, + enableVisualMode: false, + }); + const maxCachedRetries = _params?.maxSteps ?? 3; + for (let attempt = 0; attempt < maxCachedRetries; attempt++) { + if (replayStepMeta) { + replayStepMeta.retries = attempt + 1; + } + try { + const { cdpClient, frameContextManager } = + await initializeRuntimeContext(initialPage, this.debug); + const resolved = await resolveXPathWithCDP({ + xpath: cachedAction.xpath, + frameIndex: cachedAction.frameIndex ?? 0, + cdpClient, + frameContextManager, + debug: this.debug, + }); + + const actionContext: ActionContext = { + domState: cachedDomState, + page: initialPage, + tokenLimit: this.tokenLimit, + llm: this.llm, + debug: this.debug, + cdpActions: true, + cdp: { + client: cdpClient, + frameContextManager, + resolveElement, + dispatchCDPAction, + preferScriptBoundingBox: this.debug, + debug: this.debug, + }, + debugDir: undefined, + mcpClient: this.mcpClient, + variables: Object.values(this._variables), + invalidateDomCache: () => markDomSnapshotDirty(initialPage), + }; + + const encodedId = `${cachedAction.frameIndex ?? 0}-${resolved.backendNodeId}`; + cachedDomState.backendNodeMap = { + ...(cachedDomState.backendNodeMap || {}), + [encodedId]: resolved.backendNodeId, + }; + cachedDomState.xpathMap = { + ...(cachedDomState.xpathMap || {}), + [encodedId]: cachedAction.xpath, + }; + + const actionOutput = await performAction(actionContext, { + elementId: encodedId, + method: cachedAction.method, + arguments: cachedAction.arguments ?? [], + instruction, + confidence: 1, + }); + if (!actionOutput.success) { + throw new Error(actionOutput.message); + } + await waitForSettledDOM(initialPage); + markDomSnapshotDirty(initialPage); + if (replayStepMeta) { + replayStepMeta.usedCachedAction = true; + } + return { + taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: `Executed cached action: ${instruction}`, + replayStepMeta, + }; + } catch (error) { + if (attempt >= maxCachedRetries - 1) { + if (this.debug) { + console.warn( + `[executeSingleAction][cachedAction] XPath execution failed after ${maxCachedRetries} attempts:`, + error + ); + } + } + } + } + } + // Find element with retry logic const findStart = performance.now(); const { @@ -1077,6 +1134,13 @@ export class HyperAgent { actionXPath = (actionOutput.debug as any).elementMetadata?.xpath; } + if (replayStepMeta) { + replayStepMeta.fallbackUsed = true; + replayStepMeta.fallbackElementId = element.elementId; + replayStepMeta.fallbackXPath = + domState?.xpathMap?.[element.elementId] ?? null; + } + if (!actionOutput.success) { throw new Error(actionOutput.message); } @@ -1119,6 +1183,7 @@ export class HyperAgent { status: TaskStatus.COMPLETED, steps: [], output: `Successfully executed: ${instruction}`, + replayStepMeta, }; } catch (error) { // If page switched during execution, prioritize that over the error diff --git a/src/agent/shared/element-locator.ts b/src/agent/shared/element-locator.ts index 5a21ac8..ba4c109 100644 --- a/src/agent/shared/element-locator.ts +++ b/src/agent/shared/element-locator.ts @@ -127,54 +127,3 @@ export async function getElementLocator( return { locator: targetFrame.locator(`xpath=${xpath}`), xpath }; } - -export async function getLocatorFromXPath( - xpath: string, - page: Page, - frameIndex?: number | null, - frameMap?: Map, - debug = false -): Promise> { - const targetFrameIndex = frameIndex ?? 0; - - if (targetFrameIndex === 0) { - return page.locator(`xpath=${xpath}`); - } - - if (!frameMap || !frameMap.has(targetFrameIndex)) { - const errorMsg = `Frame metadata not found for frame ${targetFrameIndex}`; - if (debug) { - console.error(`[getLocatorFromXPath] ${errorMsg}`); - } - throw new HyperagentError(errorMsg, 404); - } - - const targetFrame = - (await resolveFrameByXPath(page, frameMap, targetFrameIndex)) ?? undefined; - - if (!targetFrame) { - const errorMsg = `Could not resolve frame for xpath ${xpath} (frameIndex: ${targetFrameIndex})`; - if (debug) { - console.error(`[getLocatorFromXPath] ${errorMsg}`); - } - throw new HyperagentError(errorMsg, 404); - } - - if (debug) { - console.log( - `[getLocatorFromXPath] Using frame ${targetFrameIndex}: ${targetFrame.url()}` - ); - } - - try { - await targetFrame.waitForLoadState("domcontentloaded", { timeout: 5000 }); - } catch { - if (debug) { - console.warn( - `[getLocatorFromXPath] Timeout waiting for iframe to load (frame ${targetFrameIndex}), proceeding anyway` - ); - } - } - - return targetFrame.locator(`xpath=${xpath}`); -} diff --git a/src/agent/shared/xpath-cdp-resolver.ts b/src/agent/shared/xpath-cdp-resolver.ts new file mode 100644 index 0000000..462e164 --- /dev/null +++ b/src/agent/shared/xpath-cdp-resolver.ts @@ -0,0 +1,101 @@ +import { CDPClient } from "@/cdp/types"; +import { FrameContextManager } from "@/cdp/frame-context-manager"; +import { HyperagentError } from "../error"; + +export interface ResolvedCDPFromXPath { + backendNodeId: number; + frameId: string; + objectId?: string; +} + +export interface ResolveXPathWithCDPParams { + xpath: string; + frameIndex: number | null | undefined; + cdpClient: CDPClient; + frameContextManager?: FrameContextManager; + debug?: boolean; +} + +export async function resolveXPathWithCDP( + params: ResolveXPathWithCDPParams +): Promise { + const { xpath, frameIndex = 0, cdpClient, frameContextManager, debug } = + params; + + // Use a DOM session without detaching the shared session; this keeps root session intact. + const session = await cdpClient.acquireSession("dom"); + let targetFrameId: string | undefined; + + if (frameContextManager) { + const frameInfo = frameContextManager.getFrameByIndex(frameIndex ?? 0); + targetFrameId = frameInfo?.frameId; + } + + if (!targetFrameId) { + throw new HyperagentError( + `Unable to resolve frameId for frameIndex ${frameIndex}`, + 404 + ); + } + + const executionContextId = frameContextManager + ? await frameContextManager.waitForExecutionContext(targetFrameId) + : undefined; + + if (!executionContextId && debug) { + console.warn( + `[resolveXPathWithCDP] Missing executionContextId for frame ${frameIndex} (${targetFrameId}), continuing` + ); + } + + await session.send("DOM.enable").catch(() => {}); + await session.send("Runtime.enable").catch(() => {}); + + const evalResponse = await session.send<{ + result: { objectId?: string | null }; + exceptionDetails?: unknown; + }>("Runtime.evaluate", { + expression: buildXPathEvaluationExpression(xpath), + contextId: executionContextId, + includeCommandLineAPI: false, + returnByValue: false, + awaitPromise: false, + }); + + const objectId = evalResponse.result.objectId || undefined; + if (!objectId) { + throw new HyperagentError( + `Failed to resolve XPath to objectId in frame ${frameIndex}`, + 404 + ); + } + + const describeNode = await session.send<{ + node?: { backendNodeId?: number }; + }>("DOM.describeNode", { objectId }); + const backendNodeId = describeNode.node?.backendNodeId; + if (typeof backendNodeId !== "number") { + throw new HyperagentError( + `DOM.describeNode did not return backendNodeId for frame ${frameIndex}`, + 404 + ); + } + + return { + backendNodeId, + frameId: targetFrameId, + objectId, + }; +} + +function buildXPathEvaluationExpression(xpath: string): string { + const escaped = JSON.stringify(xpath); + return `(function() { + try { + const result = document.evaluate(${escaped}, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); + return result.singleNodeValue || null; + } catch (error) { + return null; + } + })();`; +} diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index 0f84e21..edebae0 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -42,6 +42,15 @@ export interface ActionCacheEntry { message: string; } +export interface ReplayStepMeta { + usedCachedAction: boolean; + fallbackUsed: boolean; + retries?: number; + cachedXPath?: string | null; + fallbackXPath?: string | null; + fallbackElementId?: string | null; +} + export interface ActionCacheOutput { taskId: string; createdAt: string; @@ -54,6 +63,9 @@ export interface ActionCacheReplayStepResult { actionType: string; usedXPath: boolean; fallbackUsed: boolean; + cachedXPath?: string | null; + fallbackXPath?: string | null; + fallbackElementId?: string | null; retries: number; success: boolean; message: string; @@ -81,6 +93,7 @@ export interface TaskParams { enableVisualMode?: boolean; useDomCache?: boolean; enableDomStreaming?: boolean; + cachedAction?: ActionCacheEntry; } export interface TaskOutput { @@ -89,6 +102,7 @@ export interface TaskOutput { steps: AgentStep[]; output?: string; actionCache?: ActionCacheOutput; + replayStepMeta?: ReplayStepMeta; } export interface Task { diff --git a/src/types/index.ts b/src/types/index.ts index 3938058..c3773f7 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -16,6 +16,7 @@ import { ActionCacheOutput, ActionCacheReplayResult, ActionCacheReplayStepResult, + ReplayStepMeta, RunFromActionCacheParams, TaskParams, TaskOutput, From aa4e4316c4842bb0a6bb548ba4bb33a623a220b7 Mon Sep 17 00:00:00 2001 From: Devin Date: Sat, 6 Dec 2025 17:50:03 -0800 Subject: [PATCH 04/13] working action cache script --- src/agent/index.ts | 19 ++++- src/agent/shared/action-cache-script.ts | 93 +++++++++++++++++++++++++ src/agent/shared/action-cache.ts | 13 +++- src/types/agent/types.ts | 12 +++- src/types/index.ts | 1 + 5 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 src/agent/shared/action-cache-script.ts diff --git a/src/agent/index.ts b/src/agent/index.ts index 205ce1c..208aa66 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -41,7 +41,12 @@ import { } from "../context-providers/a11y-dom/types"; import { MCPClient } from "./mcp/client"; import { runAgentTask } from "./tools/agent"; -import { HyperPage, HyperVariable } from "../types/agent/types"; +import { + HyperPage, + HyperVariable, + ActionCacheEntry, + CachedActionHint, +} from "../types/agent/types"; import { z } from "zod"; import { ErrorEmitter } from "../utils"; import { waitForSettledDOM } from "@/utils/waitForSettledDOM"; @@ -55,6 +60,7 @@ import { performAction } from "./actions/shared/perform-action"; import { captureDOMState } from "./shared/dom-capture"; import { executePlaywrightMethod } from "./shared/execute-playwright-method"; import { resolveXPathWithCDP } from "./shared/xpath-cdp-resolver"; +import { createScriptFromActionCache } from "./shared/action-cache-script"; import { ReplayStepMeta } from "@/types/agent/types"; export class HyperAgent { @@ -891,8 +897,8 @@ export class HyperAgent { // Handle cached goToUrl directly if (cachedAction && cachedAction.actionType === "goToUrl") { const url = - (cachedAction.actionParams?.url as string | undefined) || - (cachedAction.arguments && cachedAction.arguments[0]); + (cachedAction.arguments && cachedAction.arguments[0]) || + (cachedAction.actionParams?.url as string | undefined); if (!url || typeof url !== "string") { throw new HyperagentError( "Cached goToUrl action missing URL parameter", @@ -1390,6 +1396,13 @@ export class HyperAgent { return session; } + public createScriptFromActionCache( + steps: ActionCacheEntry[], + taskId?: string + ): string { + return createScriptFromActionCache({ steps, taskId }); + } + private setupHyperPage(page: Page): HyperPage { const hyperPage = page as HyperPage; diff --git a/src/agent/shared/action-cache-script.ts b/src/agent/shared/action-cache-script.ts new file mode 100644 index 0000000..841d947 --- /dev/null +++ b/src/agent/shared/action-cache-script.ts @@ -0,0 +1,93 @@ +import fs from "fs"; +import path from "path"; +import { ActionCacheEntry } from "@/types"; + +interface CreateScriptFromActionCacheParams { + taskId?: string; + steps: ActionCacheEntry[]; +} + +export function createScriptFromActionCache( + params: CreateScriptFromActionCacheParams +): string { + const { taskId, steps } = params; + const id = + taskId && taskId.length > 0 + ? taskId + : new Date().toISOString().replace(/[:.]/g, "-"); + const dir = path.join(process.cwd(), "action-cache-scripts", id); + fs.mkdirSync(dir, { recursive: true }); + + const formatArguments = (args: unknown[] | undefined): string => { + if (!args || args.length === 0) { + return "[]"; + } + if (args.length === 1) { + return `[${JSON.stringify(args[0])}]`; + } + return `[\n${args + .map((arg) => ` ${JSON.stringify(arg)},`) + .join("\n")}\n ]`; + }; + + const formatCachedAction = (step: ActionCacheEntry): string => { + const fields = [ + `actionType: ${JSON.stringify(step.actionType)}`, + step.method ? `method: ${JSON.stringify(step.method)}` : undefined, + `arguments: ${formatArguments(step.arguments)}`, + step.frameIndex !== undefined && step.frameIndex !== null + ? `frameIndex: ${step.frameIndex}` + : undefined, + step.xpath ? `xpath: ${JSON.stringify(step.xpath)}` : undefined, + ].filter(Boolean); + + return `{\n ${fields.join(",\n ")}\n }`; + }; + + const stepSnippets = steps + .map((step) => { + if (step.actionType === "complete") { + return ` // Step ${step.stepIndex} (complete skipped in script)`; + } + if (step.actionType === "goToUrl") { + const urlArg = + (step.arguments && step.arguments[0]) || + "https://example.com"; // fallback safety + return ` // Step ${step.stepIndex} + await page.goto(${JSON.stringify( + urlArg + )}, { waitUntil: "domcontentloaded" });`; + } + + return ` // Step ${step.stepIndex} + await page.perform(${JSON.stringify(step.instruction)}, { + cachedAction: ${formatCachedAction(step)}, + maxSteps: 3, + });`; + }) + .join("\n\n"); + + const script = `import { HyperAgent } from "@hyperbrowser/agent"; + +async function main() { + const agent = new HyperAgent({ + // Configure your LLM/API keys + }); + + const page = await agent.newPage(); + +${stepSnippets} + + await agent.closeAgent(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); +`; + + const outPath = path.join(dir, "run-cached-actions.ts"); + fs.writeFileSync(outPath, script); + return outPath; +} diff --git a/src/agent/shared/action-cache.ts b/src/agent/shared/action-cache.ts index beccfcf..5b4731d 100644 --- a/src/agent/shared/action-cache.ts +++ b/src/agent/shared/action-cache.ts @@ -96,6 +96,17 @@ export const buildActionCacheEntry = ({ const encodedId = elementId ? asEncodedId(elementId) : undefined; const frameIndex = extractFrameIndex(elementId); + // Normalize goToUrl to use arguments[0] for URL to simplify replay paths + let normalizedArgs = args; + if ( + action.type === "goToUrl" && + (!args || args.length === 0) && + action.params && + typeof (action.params as any).url === "string" + ) { + normalizedArgs = [(action.params as any).url as string]; + } + const xpathFromDom = encodedId ? domState.xpathMap?.[encodedId] || null : null; const xpath = normalizeXPath( xpathFromDom || extractXPathFromDebug(actionOutput) @@ -106,7 +117,7 @@ export const buildActionCacheEntry = ({ instruction, elementId, method, - arguments: args, + arguments: normalizedArgs, actionParams: (action.params as Record) || undefined, frameIndex, xpath, diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index edebae0..635a929 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -42,6 +42,16 @@ export interface ActionCacheEntry { message: string; } +export interface CachedActionHint { + actionType: string; + xpath?: string | null; + frameIndex?: number | null; + method?: string | null; + arguments?: string[]; + elementId?: string | null; + actionParams?: Record; +} + export interface ReplayStepMeta { usedCachedAction: boolean; fallbackUsed: boolean; @@ -93,7 +103,7 @@ export interface TaskParams { enableVisualMode?: boolean; useDomCache?: boolean; enableDomStreaming?: boolean; - cachedAction?: ActionCacheEntry; + cachedAction?: CachedActionHint; } export interface TaskOutput { diff --git a/src/types/index.ts b/src/types/index.ts index c3773f7..0907dd8 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -17,6 +17,7 @@ import { ActionCacheReplayResult, ActionCacheReplayStepResult, ReplayStepMeta, + CachedActionHint, RunFromActionCacheParams, TaskParams, TaskOutput, From 5dbb3edc48fb3e47d58015bd415d3acd0d899e29 Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 7 Dec 2025 16:02:07 -0800 Subject: [PATCH 05/13] version of performAction individual page functions --- src/agent/index.ts | 3 + src/agent/shared/action-cache-exec.ts | 228 ++++++++++++++++++++++++ src/agent/shared/action-cache-script.ts | 116 +++++++----- src/types/agent/types.ts | 101 +++++++++++ 4 files changed, 407 insertions(+), 41 deletions(-) create mode 100644 src/agent/shared/action-cache-exec.ts diff --git a/src/agent/index.ts b/src/agent/index.ts index 208aa66..82e54d1 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -61,6 +61,7 @@ import { captureDOMState } from "./shared/dom-capture"; import { executePlaywrightMethod } from "./shared/execute-playwright-method"; import { resolveXPathWithCDP } from "./shared/xpath-cdp-resolver"; import { createScriptFromActionCache } from "./shared/action-cache-script"; +import { attachCachedActionHelpers } from "./shared/action-cache-exec"; import { ReplayStepMeta } from "@/types/agent/types"; export class HyperAgent { @@ -1505,6 +1506,8 @@ export class HyperAgent { hyperPage.runFromActionCache = (cache, params) => this.runFromActionCache(cache, getActivePage, params); + attachCachedActionHelpers(hyperPage); + // aiAsync tasks run in background, so we just use the current scope start point. // The task itself has internal auto-following logic (from executeTaskAsync implementation). hyperPage.aiAsync = (task: string, params?: TaskParams) => diff --git a/src/agent/shared/action-cache-exec.ts b/src/agent/shared/action-cache-exec.ts new file mode 100644 index 0000000..0ca89b5 --- /dev/null +++ b/src/agent/shared/action-cache-exec.ts @@ -0,0 +1,228 @@ +import { HyperPage, TaskOutput } from "@/types/agent/types"; +import { waitForSettledDOM } from "@/utils/waitForSettledDOM"; +import { markDomSnapshotDirty } from "@/context-providers/a11y-dom/dom-cache"; + +const DEFAULT_MAX_STEPS = 3; + +type PageAction = + | "click" + | "fill" + | "type" + | "press" + | "selectOptionFromDropdown" + | "check" + | "uncheck" + | "hover" + | "scrollToElement" + | "scrollToPercentage" + | "nextChunk" + | "prevChunk"; + +interface PerformOptions { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; +} + +interface PerformValueOptions extends PerformOptions { + value: string; +} + +interface PerformPositionOptions extends PerformOptions { + position: string | number; +} + +export async function performGoTo( + page: HyperPage, + url: string, + waitUntil: "domcontentloaded" | "load" | "networkidle" = "domcontentloaded" +): Promise { + await page.goto(url, { waitUntil }); + await waitForSettledDOM(page); + markDomSnapshotDirty(page); +} + +function runCachedAction( + page: HyperPage, + instruction: string, + method: PageAction, + xpath: string, + args: unknown[], + options?: PerformOptions +): Promise { + return page.perform(instruction, { + cachedAction: { + actionType: "actElement", + method, + arguments: args as string[], + frameIndex: options?.frameIndex ?? 0, + xpath, + }, + maxSteps: options?.maxSteps ?? DEFAULT_MAX_STEPS, + }); +} + +export function attachCachedActionHelpers(page: HyperPage): void { + page.performClick = ( + xpath: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Click element", + "click", + xpath, + [], + options + ); + + page.performHover = ( + xpath: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Hover element", + "hover", + xpath, + [], + options + ); + + page.performType = ( + xpath: string, + text: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Type text", + "type", + xpath, + [text], + options + ); + + page.performFill = ( + xpath: string, + text: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Fill input", + "fill", + xpath, + [text], + options + ); + + page.performPress = ( + xpath: string, + key: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Press key", + "press", + xpath, + [key], + options + ); + + page.performSelectOption = ( + xpath: string, + option: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Select option", + "selectOptionFromDropdown", + xpath, + [option], + options + ); + + page.performCheck = ( + xpath: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Check element", + "check", + xpath, + [], + options + ); + + page.performUncheck = ( + xpath: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Uncheck element", + "uncheck", + xpath, + [], + options + ); + + page.performScrollToElement = ( + xpath: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Scroll to element", + "scrollToElement", + xpath, + [], + options + ); + + page.performScrollToPercentage = ( + xpath: string, + position: string | number, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Scroll to percentage", + "scrollToPercentage", + xpath, + [position], + options + ); + + page.performNextChunk = ( + xpath: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Scroll next chunk", + "nextChunk", + xpath, + [], + options + ); + + page.performPrevChunk = ( + xpath: string, + options?: PerformOptions + ) => + runCachedAction( + page, + options?.performInstruction || "Scroll previous chunk", + "prevChunk", + xpath, + [], + options + ); +} + +export { DEFAULT_MAX_STEPS }; diff --git a/src/agent/shared/action-cache-script.ts b/src/agent/shared/action-cache-script.ts index 841d947..9d5de11 100644 --- a/src/agent/shared/action-cache-script.ts +++ b/src/agent/shared/action-cache-script.ts @@ -18,57 +18,91 @@ export function createScriptFromActionCache( const dir = path.join(process.cwd(), "action-cache-scripts", id); fs.mkdirSync(dir, { recursive: true }); - const formatArguments = (args: unknown[] | undefined): string => { - if (!args || args.length === 0) { - return "[]"; - } - if (args.length === 1) { - return `[${JSON.stringify(args[0])}]`; - } - return `[\n${args - .map((arg) => ` ${JSON.stringify(arg)},`) - .join("\n")}\n ]`; - }; +const METHOD_TO_CALL: Record = { + click: { fn: "performClick" }, + fill: { fn: "performFill", needsValue: true, valueName: "text" }, + type: { fn: "performType", needsValue: true, valueName: "text" }, + press: { fn: "performPress", needsValue: true, valueName: "key" }, + selectOptionFromDropdown: { + fn: "performSelectOption", + needsValue: true, + valueName: "option", + }, + check: { fn: "performCheck" }, + uncheck: { fn: "performUncheck" }, + hover: { fn: "performHover" }, + scrollToElement: { fn: "performScrollToElement" }, + scrollToPercentage: { + fn: "performScrollToPercentage", + needsValue: true, + valueName: "position", + }, + nextChunk: { fn: "performNextChunk" }, + prevChunk: { fn: "performPrevChunk" }, +}; - const formatCachedAction = (step: ActionCacheEntry): string => { - const fields = [ - `actionType: ${JSON.stringify(step.actionType)}`, - step.method ? `method: ${JSON.stringify(step.method)}` : undefined, - `arguments: ${formatArguments(step.arguments)}`, - step.frameIndex !== undefined && step.frameIndex !== null - ? `frameIndex: ${step.frameIndex}` - : undefined, - step.xpath ? `xpath: ${JSON.stringify(step.xpath)}` : undefined, - ].filter(Boolean); +const formatCall = (step: ActionCacheEntry): string => { + if (step.actionType === "complete") { + return ` // Step ${step.stepIndex} (complete skipped in script)`; + } - return `{\n ${fields.join(",\n ")}\n }`; - }; - - const stepSnippets = steps - .map((step) => { - if (step.actionType === "complete") { - return ` // Step ${step.stepIndex} (complete skipped in script)`; - } - if (step.actionType === "goToUrl") { - const urlArg = - (step.arguments && step.arguments[0]) || - "https://example.com"; // fallback safety - return ` // Step ${step.stepIndex} + if (step.actionType === "goToUrl") { + const urlArg = + (step.arguments && step.arguments[0]) || "https://example.com"; + return ` // Step ${step.stepIndex} await page.goto(${JSON.stringify( urlArg )}, { waitUntil: "domcontentloaded" });`; - } + } - return ` // Step ${step.stepIndex} + const call = step.method ? METHOD_TO_CALL[step.method] : undefined; + if (call) { + const args: string[] = []; + args.push(JSON.stringify(step.xpath)); + if (call.needsValue) { + const value = step.arguments?.[0] ?? ""; + args.push(JSON.stringify(value)); + } + const options: Record = { + performInstruction: step.instruction, + }; + if (step.frameIndex !== null && step.frameIndex !== undefined && step.frameIndex !== 0) { + options.frameIndex = step.frameIndex; + } + const hasOptions = + options.performInstruction !== undefined || + options.frameIndex !== undefined; + if (hasOptions) { + args.push(JSON.stringify(options)); + } + + return ` // Step ${step.stepIndex} + await page.${call.fn}(${args.join(", ")});`; + } + + // Fallback to perform with cachedAction if no helper mapping exists + const cached = { + actionType: step.actionType, + method: step.method, + arguments: step.arguments ?? [], + frameIndex: step.frameIndex ?? 0, + xpath: step.xpath, + elementId: step.elementId, + }; + return ` // Step ${step.stepIndex} await page.perform(${JSON.stringify(step.instruction)}, { - cachedAction: ${formatCachedAction(step)}, - maxSteps: 3, + cachedAction: ${JSON.stringify(cached, null, 2) + .split("\n") + .map((line, idx) => (idx === 0 ? line : " " + line)) + .join("\n")}, });`; - }) - .join("\n\n"); +}; - const script = `import { HyperAgent } from "@hyperbrowser/agent"; +const stepSnippets = steps + .map((step) => formatCall(step)) + .join("\n\n"); + const script = `import { HyperAgent } from "@hyperbrowser/agent"; async function main() { const agent = new HyperAgent({ // Configure your LLM/API keys diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index 635a929..a1dd350 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -156,6 +156,107 @@ export interface HyperVariable { } export interface HyperPage extends Page { + performClick: ( + xpath: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performHover: ( + xpath: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performType: ( + xpath: string, + text: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performFill: ( + xpath: string, + text: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performPress: ( + xpath: string, + key: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performSelectOption: ( + xpath: string, + option: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performCheck: ( + xpath: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performUncheck: ( + xpath: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performScrollToElement: ( + xpath: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performScrollToPercentage: ( + xpath: string, + position: string | number, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performNextChunk: ( + xpath: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; + performPrevChunk: ( + xpath: string, + options?: { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; + } + ) => Promise; /** * Execute a complex multi-step task using visual mode * Best for: Complex workflows, multi-step tasks, exploratory automation From c2569c8bfb13436c9d0e7dbbc98811ae7ec0c2a8 Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 7 Dec 2025 21:15:41 -0800 Subject: [PATCH 06/13] refactor --- src/agent/index.ts | 329 +++++++++++++------------- src/agent/shared/action-cache-exec.ts | 123 +++++----- src/agent/shared/run-cached-action.ts | 238 +++++++++++++++++++ src/types/agent/types.ts | 9 +- 4 files changed, 460 insertions(+), 239 deletions(-) create mode 100644 src/agent/shared/run-cached-action.ts diff --git a/src/agent/index.ts b/src/agent/index.ts index 82e54d1..e9db714 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -41,11 +41,10 @@ import { } from "../context-providers/a11y-dom/types"; import { MCPClient } from "./mcp/client"; import { runAgentTask } from "./tools/agent"; -import { +import type { HyperPage, HyperVariable, ActionCacheEntry, - CachedActionHint, } from "../types/agent/types"; import { z } from "zod"; import { ErrorEmitter } from "../utils"; @@ -567,35 +566,169 @@ export class HyperAgent { let replayStatus: TaskStatus.COMPLETED | TaskStatus.FAILED = TaskStatus.COMPLETED; + const helperMap: Record = { + click: "performClick", + fill: "performFill", + type: "performType", + press: "performPress", + selectOptionFromDropdown: "performSelectOption", + check: "performCheck", + uncheck: "performUncheck", + hover: "performHover", + scrollToElement: "performScrollToElement", + scrollToPercentage: "performScrollToPercentage", + nextChunk: "performNextChunk", + prevChunk: "performPrevChunk", + }; + for (const step of [...cache.steps].sort( (a, b) => a.stepIndex - b.stepIndex )) { const page = getPage(); const hyperPage = page as HyperPage; - const result = await hyperPage.perform(step.instruction, { - cachedAction: step, - maxSteps: maxXPathRetries, - }); + let result: TaskOutput; + + if (step.actionType === "goToUrl") { + const url = + (step.arguments && step.arguments[0]) || + (step.actionParams as any)?.url || + ""; + if (!url || typeof url !== "string") { + result = { + taskId: cache.taskId, + status: TaskStatus.FAILED, + steps: [], + output: "Missing URL for goToUrl", + }; + } else { + await hyperPage.goto(url, { waitUntil: "domcontentloaded" }); + await waitForSettledDOM(hyperPage); + markDomSnapshotDirty(hyperPage); + result = { + taskId: cache.taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: `Navigated to ${url}`, + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } + } else if (step.actionType === "complete") { + result = { + taskId: cache.taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: "Task Complete", + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } else { + const helperName = + step.method && helperMap[step.method] ? helperMap[step.method] : null; + if ( + helperName && + typeof (hyperPage as any)[helperName] === "function" + ) { + const options: any = { + performInstruction: step.instruction, + maxSteps: maxXPathRetries, + }; + if (step.frameIndex !== null && step.frameIndex !== undefined) { + options.frameIndex = step.frameIndex; + } + const valueArg = step.arguments?.[0]; + if ( + [ + "type", + "fill", + "press", + "selectOptionFromDropdown", + "scrollToPercentage", + ].includes(step.method ?? "") + ) { + result = await (hyperPage as any)[helperName]( + step.xpath ?? "", + valueArg, + options + ); + } else { + result = await (hyperPage as any)[helperName]( + step.xpath ?? "", + options + ); + } + } else { + result = await hyperPage.perform(step.instruction); + } + } const meta = result.replayStepMeta; const success = result.status === TaskStatus.COMPLETED; + // If cached/helper execution failed but we had a cached attempt, fall back to LLM perform + if ( + !success && + step.instruction && + typeof step.instruction === "string" && + (meta?.usedCachedAction ?? false) + ) { + const fallbackResult = await hyperPage.perform(step.instruction); + const existingMeta: ReplayStepMeta = meta || { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: step.xpath ?? null, + fallbackXPath: null, + fallbackElementId: null, + }; + result = { + ...fallbackResult, + replayStepMeta: { + usedCachedAction: existingMeta.usedCachedAction, + fallbackUsed: true, + retries: existingMeta.retries, + cachedXPath: existingMeta.cachedXPath, + fallbackXPath: + fallbackResult.replayStepMeta?.fallbackXPath ?? + existingMeta.fallbackXPath, + fallbackElementId: + fallbackResult.replayStepMeta?.fallbackElementId ?? + existingMeta.fallbackElementId, + }, + }; + } + + const finalMeta = result.replayStepMeta; + const finalSuccess = result.status === TaskStatus.COMPLETED; + stepsResult.push({ stepIndex: step.stepIndex, actionType: step.actionType, - usedXPath: meta?.usedCachedAction ?? false, - fallbackUsed: meta?.fallbackUsed ?? false, - cachedXPath: meta?.cachedXPath ?? null, - fallbackXPath: meta?.fallbackXPath ?? null, - fallbackElementId: meta?.fallbackElementId ?? null, - retries: meta?.retries ?? 0, - success, + usedXPath: finalMeta?.usedCachedAction ?? false, + fallbackUsed: finalMeta?.fallbackUsed ?? false, + cachedXPath: finalMeta?.cachedXPath ?? null, + fallbackXPath: finalMeta?.fallbackXPath ?? null, + fallbackElementId: finalMeta?.fallbackElementId ?? null, + retries: finalMeta?.retries ?? 0, + success: finalSuccess, message: result.output || - (success ? "Completed" : "Failed to execute cached action"), + (finalSuccess ? "Completed" : "Failed to execute cached action"), }); - if (!success) { + if (!finalSuccess) { replayStatus = TaskStatus.FAILED; break; } @@ -881,154 +1014,8 @@ export class HyperAgent { let domState: A11yDOMState | null = null; let elementMap: Map | null = null; - const replayStepMeta: ReplayStepMeta | undefined = _params?.cachedAction - ? { - usedCachedAction: false, - fallbackUsed: false, - retries: 0, - cachedXPath: _params.cachedAction.xpath ?? null, - fallbackXPath: null, - fallbackElementId: null, - } - : undefined; try { - const cachedAction = _params?.cachedAction; - - // Handle cached goToUrl directly - if (cachedAction && cachedAction.actionType === "goToUrl") { - const url = - (cachedAction.arguments && cachedAction.arguments[0]) || - (cachedAction.actionParams?.url as string | undefined); - if (!url || typeof url !== "string") { - throw new HyperagentError( - "Cached goToUrl action missing URL parameter", - 400 - ); - } - await initialPage.goto(url); - await waitForSettledDOM(initialPage); - markDomSnapshotDirty(initialPage); - if (replayStepMeta) { - replayStepMeta.usedCachedAction = true; - } - return { - taskId, - status: TaskStatus.COMPLETED, - steps: [], - output: `Navigated to ${url}`, - replayStepMeta, - }; - } - // Handle cached complete directly without LLM or navigation - if (cachedAction && cachedAction.actionType === "complete") { - if (replayStepMeta) { - replayStepMeta.usedCachedAction = true; - } - return { - taskId, - status: TaskStatus.COMPLETED, - steps: [], - output: "Task Complete", - replayStepMeta, - }; - } - - // Handle cached XPath-first execution if available - if ( - cachedAction && - cachedAction.xpath && - cachedAction.method && - cachedAction.actionType === "actElement" - ) { - await waitForSettledDOM(initialPage); - const cachedDomState = await captureDOMState(initialPage, { - useCache: false, - debug: this.debug, - enableVisualMode: false, - }); - const maxCachedRetries = _params?.maxSteps ?? 3; - for (let attempt = 0; attempt < maxCachedRetries; attempt++) { - if (replayStepMeta) { - replayStepMeta.retries = attempt + 1; - } - try { - const { cdpClient, frameContextManager } = - await initializeRuntimeContext(initialPage, this.debug); - const resolved = await resolveXPathWithCDP({ - xpath: cachedAction.xpath, - frameIndex: cachedAction.frameIndex ?? 0, - cdpClient, - frameContextManager, - debug: this.debug, - }); - - const actionContext: ActionContext = { - domState: cachedDomState, - page: initialPage, - tokenLimit: this.tokenLimit, - llm: this.llm, - debug: this.debug, - cdpActions: true, - cdp: { - client: cdpClient, - frameContextManager, - resolveElement, - dispatchCDPAction, - preferScriptBoundingBox: this.debug, - debug: this.debug, - }, - debugDir: undefined, - mcpClient: this.mcpClient, - variables: Object.values(this._variables), - invalidateDomCache: () => markDomSnapshotDirty(initialPage), - }; - - const encodedId = `${cachedAction.frameIndex ?? 0}-${resolved.backendNodeId}`; - cachedDomState.backendNodeMap = { - ...(cachedDomState.backendNodeMap || {}), - [encodedId]: resolved.backendNodeId, - }; - cachedDomState.xpathMap = { - ...(cachedDomState.xpathMap || {}), - [encodedId]: cachedAction.xpath, - }; - - const actionOutput = await performAction(actionContext, { - elementId: encodedId, - method: cachedAction.method, - arguments: cachedAction.arguments ?? [], - instruction, - confidence: 1, - }); - if (!actionOutput.success) { - throw new Error(actionOutput.message); - } - await waitForSettledDOM(initialPage); - markDomSnapshotDirty(initialPage); - if (replayStepMeta) { - replayStepMeta.usedCachedAction = true; - } - return { - taskId, - status: TaskStatus.COMPLETED, - steps: [], - output: `Executed cached action: ${instruction}`, - replayStepMeta, - }; - } catch (error) { - if (attempt >= maxCachedRetries - 1) { - if (this.debug) { - console.warn( - `[executeSingleAction][cachedAction] XPath execution failed after ${maxCachedRetries} attempts:`, - error - ); - } - } - } - } - } - // Find element with retry logic const findStart = performance.now(); const { @@ -1141,13 +1128,6 @@ export class HyperAgent { actionXPath = (actionOutput.debug as any).elementMetadata?.xpath; } - if (replayStepMeta) { - replayStepMeta.fallbackUsed = true; - replayStepMeta.fallbackElementId = element.elementId; - replayStepMeta.fallbackXPath = - domState?.xpathMap?.[element.elementId] ?? null; - } - if (!actionOutput.success) { throw new Error(actionOutput.message); } @@ -1190,7 +1170,6 @@ export class HyperAgent { status: TaskStatus.COMPLETED, steps: [], output: `Successfully executed: ${instruction}`, - replayStepMeta, }; } catch (error) { // If page switched during execution, prioritize that over the error @@ -1506,7 +1485,17 @@ export class HyperAgent { hyperPage.runFromActionCache = (cache, params) => this.runFromActionCache(cache, getActivePage, params); - attachCachedActionHelpers(hyperPage); + attachCachedActionHelpers( + { + debug: this.debug, + tokenLimit: this.tokenLimit, + llm: this.llm, + mcpClient: this.mcpClient, + variables: Object.values(this._variables), + cdpActionsEnabled: this.cdpActionsEnabled, + }, + hyperPage + ); // aiAsync tasks run in background, so we just use the current scope start point. // The task itself has internal auto-following logic (from executeTaskAsync implementation). diff --git a/src/agent/shared/action-cache-exec.ts b/src/agent/shared/action-cache-exec.ts index 0ca89b5..14ce295 100644 --- a/src/agent/shared/action-cache-exec.ts +++ b/src/agent/shared/action-cache-exec.ts @@ -1,6 +1,5 @@ -import { HyperPage, TaskOutput } from "@/types/agent/types"; -import { waitForSettledDOM } from "@/utils/waitForSettledDOM"; -import { markDomSnapshotDirty } from "@/context-providers/a11y-dom/dom-cache"; +import { HyperAgentInstance, HyperPage, TaskOutput } from "@/types/agent/types"; +import * as cachedRunner from "./run-cached-action"; const DEFAULT_MAX_STEPS = 3; @@ -24,50 +23,57 @@ interface PerformOptions { maxSteps?: number; } -interface PerformValueOptions extends PerformOptions { - value: string; -} - -interface PerformPositionOptions extends PerformOptions { - position: string | number; -} - -export async function performGoTo( +export async function performGoToHelper( page: HyperPage, url: string, waitUntil: "domcontentloaded" | "load" | "networkidle" = "domcontentloaded" ): Promise { - await page.goto(url, { waitUntil }); - await waitForSettledDOM(page); - markDomSnapshotDirty(page); + return cachedRunner.performGoTo(page, url, waitUntil); } function runCachedAction( + agent: HyperAgentInstance, page: HyperPage, instruction: string, method: PageAction, xpath: string, - args: unknown[], + args: Array, options?: PerformOptions ): Promise { - return page.perform(instruction, { - cachedAction: { - actionType: "actElement", - method, - arguments: args as string[], - frameIndex: options?.frameIndex ?? 0, - xpath, - }, + const runInstruction = + options?.performInstruction && options.performInstruction.length > 0 + ? options.performInstruction + : instruction; + const cachedAction = { + actionType: "actElement", + method, + arguments: args, + frameIndex: options?.frameIndex ?? 0, + xpath, + }; + + return cachedRunner.runCachedStep({ + page, + instruction: runInstruction, + cachedAction, maxSteps: options?.maxSteps ?? DEFAULT_MAX_STEPS, + debug: agent.debug, + tokenLimit: agent.tokenLimit, + llm: agent.llm, + mcpClient: agent.mcpClient, + variables: agent.variables ?? [], + preferScriptBoundingBox: agent.debug, + cdpActionsEnabled: agent.cdpActionsEnabled, }); } -export function attachCachedActionHelpers(page: HyperPage): void { - page.performClick = ( - xpath: string, - options?: PerformOptions - ) => +export function attachCachedActionHelpers( + agent: HyperAgentInstance, + page: HyperPage +): void { + page.performClick = (xpath: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Click element", "click", @@ -76,11 +82,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performHover = ( - xpath: string, - options?: PerformOptions - ) => + page.performHover = (xpath: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Hover element", "hover", @@ -89,12 +93,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performType = ( - xpath: string, - text: string, - options?: PerformOptions - ) => + page.performType = (xpath: string, text: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Type text", "type", @@ -103,12 +104,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performFill = ( - xpath: string, - text: string, - options?: PerformOptions - ) => + page.performFill = (xpath: string, text: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Fill input", "fill", @@ -117,12 +115,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performPress = ( - xpath: string, - key: string, - options?: PerformOptions - ) => + page.performPress = (xpath: string, key: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Press key", "press", @@ -137,6 +132,7 @@ export function attachCachedActionHelpers(page: HyperPage): void { options?: PerformOptions ) => runCachedAction( + agent, page, options?.performInstruction || "Select option", "selectOptionFromDropdown", @@ -145,11 +141,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performCheck = ( - xpath: string, - options?: PerformOptions - ) => + page.performCheck = (xpath: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Check element", "check", @@ -158,11 +152,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performUncheck = ( - xpath: string, - options?: PerformOptions - ) => + page.performUncheck = (xpath: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Uncheck element", "uncheck", @@ -171,11 +163,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performScrollToElement = ( - xpath: string, - options?: PerformOptions - ) => + page.performScrollToElement = (xpath: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Scroll to element", "scrollToElement", @@ -190,6 +180,7 @@ export function attachCachedActionHelpers(page: HyperPage): void { options?: PerformOptions ) => runCachedAction( + agent, page, options?.performInstruction || "Scroll to percentage", "scrollToPercentage", @@ -198,11 +189,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performNextChunk = ( - xpath: string, - options?: PerformOptions - ) => + page.performNextChunk = (xpath: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Scroll next chunk", "nextChunk", @@ -211,11 +200,9 @@ export function attachCachedActionHelpers(page: HyperPage): void { options ); - page.performPrevChunk = ( - xpath: string, - options?: PerformOptions - ) => + page.performPrevChunk = (xpath: string, options?: PerformOptions) => runCachedAction( + agent, page, options?.performInstruction || "Scroll previous chunk", "prevChunk", diff --git a/src/agent/shared/run-cached-action.ts b/src/agent/shared/run-cached-action.ts new file mode 100644 index 0000000..60b4a0c --- /dev/null +++ b/src/agent/shared/run-cached-action.ts @@ -0,0 +1,238 @@ +import { v4 as uuidv4 } from "uuid"; +import { ActionContext } from "@/types"; +import { performAction } from "@/agent/actions/shared/perform-action"; +import { captureDOMState } from "@/agent/shared/dom-capture"; +import { waitForSettledDOM } from "@/utils/waitForSettledDOM"; +import { markDomSnapshotDirty } from "@/context-providers/a11y-dom/dom-cache"; +import { initializeRuntimeContext } from "@/agent/shared/runtime-context"; +import { resolveXPathWithCDP } from "@/agent/shared/xpath-cdp-resolver"; +import { TaskOutput, TaskStatus } from "@/types/agent/types"; +import { resolveElement, dispatchCDPAction } from "@/cdp"; + +export interface CachedActionInput { + actionType: string; + xpath?: string | null; + frameIndex?: number | null; + method?: string | null; + arguments?: Array; + actionParams?: Record; +} + +export interface RunCachedStepParams { + page: import("playwright-core").Page; + instruction: string; + cachedAction: CachedActionInput; + maxSteps?: number; + debug?: boolean; + tokenLimit: number; + llm: any; + mcpClient: any; + variables: Array<{ key: string; value: string; description: string }>; + preferScriptBoundingBox?: boolean; + cdpActionsEnabled?: boolean; +} + +export async function runCachedStep( + params: RunCachedStepParams +): Promise { + const { + page, + instruction, + cachedAction, + maxSteps = 3, + debug, + tokenLimit, + llm, + mcpClient, + variables, + preferScriptBoundingBox, + cdpActionsEnabled, + } = params; + + const taskId = uuidv4(); + + if (cachedAction.actionType === "goToUrl") { + const url = + (cachedAction.arguments && cachedAction.arguments[0]) || + (cachedAction.actionParams as any)?.url || + ""; + if (!url || typeof url !== "string") { + return { + taskId, + status: TaskStatus.FAILED, + steps: [], + output: "Missing URL for goToUrl", + }; + } + await page.goto(url, { waitUntil: "domcontentloaded" }); + await waitForSettledDOM(page); + markDomSnapshotDirty(page); + return { + taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: `Navigated to ${url}`, + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 1, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } + + if (cachedAction.actionType === "complete") { + return { + taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: "Task Complete", + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 1, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } + + if ( + cachedAction.actionType !== "actElement" || + !cachedAction.xpath || + !cachedAction.method + ) { + return { + taskId, + status: TaskStatus.FAILED, + steps: [], + output: "Unsupported cached action", + }; + } + + for (let attempt = 0; attempt < maxSteps; attempt++) { + try { + await waitForSettledDOM(page); + const domState = await captureDOMState(page, { + useCache: false, + debug, + enableVisualMode: false, + }); + + const { cdpClient, frameContextManager } = await initializeRuntimeContext( + page, + debug + ); + const resolved = await resolveXPathWithCDP({ + xpath: cachedAction.xpath, + frameIndex: cachedAction.frameIndex ?? 0, + cdpClient, + frameContextManager, + debug, + }); + + const actionContext: ActionContext = { + domState, + page, + tokenLimit, + llm, + debug, + cdpActions: cdpActionsEnabled !== false, + cdp: { + client: cdpClient, + frameContextManager, + resolveElement, + dispatchCDPAction, + preferScriptBoundingBox: preferScriptBoundingBox ?? debug, + debug, + }, + debugDir: undefined, + mcpClient, + variables, + invalidateDomCache: () => markDomSnapshotDirty(page), + }; + + const encodedId = `${cachedAction.frameIndex ?? 0}-${resolved.backendNodeId}`; + domState.backendNodeMap = { + ...(domState.backendNodeMap || {}), + [encodedId]: resolved.backendNodeId, + }; + domState.xpathMap = { + ...(domState.xpathMap || {}), + [encodedId]: cachedAction.xpath, + }; + + const methodArgs = (cachedAction.arguments ?? []).map((v) => + v == null ? "" : String(v) + ); + + const actionOutput = await performAction(actionContext, { + elementId: encodedId, + method: cachedAction.method, + arguments: methodArgs, + instruction, + confidence: 1, + }); + + if (!actionOutput.success) { + throw new Error(actionOutput.message); + } + + await waitForSettledDOM(page); + markDomSnapshotDirty(page); + + return { + taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: `Executed cached action: ${instruction}`, + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: attempt + 1, + cachedXPath: cachedAction.xpath ?? null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } catch (error) { + if (attempt >= maxSteps - 1) { + return { + taskId, + status: TaskStatus.FAILED, + steps: [], + output: + (error as Error)?.message || "Failed to execute cached action", + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: attempt + 1, + cachedXPath: cachedAction.xpath ?? null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } + } + } + + return { + taskId, + status: TaskStatus.FAILED, + steps: [], + output: "Failed to execute cached action", + }; +} + +export async function performGoTo( + page: import("playwright-core").Page, + url: string, + waitUntil: "domcontentloaded" | "load" | "networkidle" = "domcontentloaded" +): Promise { + await page.goto(url, { waitUntil }); + await waitForSettledDOM(page); + markDomSnapshotDirty(page); +} diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index a1dd350..57377fd 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -103,7 +103,6 @@ export interface TaskParams { enableVisualMode?: boolean; useDomCache?: boolean; enableDomStreaming?: boolean; - cachedAction?: CachedActionHint; } export interface TaskOutput { @@ -149,6 +148,14 @@ export interface TaskState { error?: string; } +export interface HyperAgentInstance { + debug?: boolean; + tokenLimit: number; + llm: any; + mcpClient: any; + variables: Array<{ key: string; value: string; description: string }>; + cdpActionsEnabled?: boolean; +} export interface HyperVariable { key: string; value: string; From 703174780f0e98312da5607198b3e597b5867c3f Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 7 Dec 2025 22:05:42 -0800 Subject: [PATCH 07/13] refactor and add action cache --- src/agent/index.ts | 58 +----- src/agent/shared/action-cache-exec.ts | 20 +- src/agent/shared/action-cache-script.ts | 3 +- src/agent/shared/run-cached-action.ts | 236 +++++++++++++++--------- src/types/agent/types.ts | 2 +- 5 files changed, 168 insertions(+), 151 deletions(-) diff --git a/src/agent/index.ts b/src/agent/index.ts index e9db714..8682265 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -61,7 +61,7 @@ import { executePlaywrightMethod } from "./shared/execute-playwright-method"; import { resolveXPathWithCDP } from "./shared/xpath-cdp-resolver"; import { createScriptFromActionCache } from "./shared/action-cache-script"; import { attachCachedActionHelpers } from "./shared/action-cache-exec"; -import { ReplayStepMeta } from "@/types/agent/types"; +import { AgentDeps, ReplayStepMeta } from "@/types/agent/types"; export class HyperAgent { // aiAction configuration constants @@ -674,42 +674,6 @@ export class HyperAgent { } } - const meta = result.replayStepMeta; - const success = result.status === TaskStatus.COMPLETED; - - // If cached/helper execution failed but we had a cached attempt, fall back to LLM perform - if ( - !success && - step.instruction && - typeof step.instruction === "string" && - (meta?.usedCachedAction ?? false) - ) { - const fallbackResult = await hyperPage.perform(step.instruction); - const existingMeta: ReplayStepMeta = meta || { - usedCachedAction: true, - fallbackUsed: false, - retries: 0, - cachedXPath: step.xpath ?? null, - fallbackXPath: null, - fallbackElementId: null, - }; - result = { - ...fallbackResult, - replayStepMeta: { - usedCachedAction: existingMeta.usedCachedAction, - fallbackUsed: true, - retries: existingMeta.retries, - cachedXPath: existingMeta.cachedXPath, - fallbackXPath: - fallbackResult.replayStepMeta?.fallbackXPath ?? - existingMeta.fallbackXPath, - fallbackElementId: - fallbackResult.replayStepMeta?.fallbackElementId ?? - existingMeta.fallbackElementId, - }, - }; - } - const finalMeta = result.replayStepMeta; const finalSuccess = result.status === TaskStatus.COMPLETED; @@ -1485,17 +1449,15 @@ export class HyperAgent { hyperPage.runFromActionCache = (cache, params) => this.runFromActionCache(cache, getActivePage, params); - attachCachedActionHelpers( - { - debug: this.debug, - tokenLimit: this.tokenLimit, - llm: this.llm, - mcpClient: this.mcpClient, - variables: Object.values(this._variables), - cdpActionsEnabled: this.cdpActionsEnabled, - }, - hyperPage - ); + const deps: AgentDeps = { + debug: this.debug, + tokenLimit: this.tokenLimit, + llm: this.llm, + mcpClient: this.mcpClient, + variables: Object.values(this._variables), + cdpActionsEnabled: this.cdpActionsEnabled, + }; + attachCachedActionHelpers(deps, hyperPage); // aiAsync tasks run in background, so we just use the current scope start point. // The task itself has internal auto-following logic (from executeTaskAsync implementation). diff --git a/src/agent/shared/action-cache-exec.ts b/src/agent/shared/action-cache-exec.ts index 14ce295..6c67e9e 100644 --- a/src/agent/shared/action-cache-exec.ts +++ b/src/agent/shared/action-cache-exec.ts @@ -1,4 +1,4 @@ -import { HyperAgentInstance, HyperPage, TaskOutput } from "@/types/agent/types"; +import { AgentDeps, HyperPage, TaskOutput } from "@/types/agent/types"; import * as cachedRunner from "./run-cached-action"; const DEFAULT_MAX_STEPS = 3; @@ -23,16 +23,8 @@ interface PerformOptions { maxSteps?: number; } -export async function performGoToHelper( - page: HyperPage, - url: string, - waitUntil: "domcontentloaded" | "load" | "networkidle" = "domcontentloaded" -): Promise { - return cachedRunner.performGoTo(page, url, waitUntil); -} - function runCachedAction( - agent: HyperAgentInstance, + agent: AgentDeps, page: HyperPage, instruction: string, method: PageAction, @@ -64,13 +56,13 @@ function runCachedAction( variables: agent.variables ?? [], preferScriptBoundingBox: agent.debug, cdpActionsEnabled: agent.cdpActionsEnabled, + performFallback: options?.performInstruction + ? (instr) => page.perform(instr) + : undefined, }); } -export function attachCachedActionHelpers( - agent: HyperAgentInstance, - page: HyperPage -): void { +export function attachCachedActionHelpers(agent: AgentDeps, page: HyperPage): void { page.performClick = (xpath: string, options?: PerformOptions) => runCachedAction( agent, diff --git a/src/agent/shared/action-cache-script.ts b/src/agent/shared/action-cache-script.ts index 9d5de11..a785bf3 100644 --- a/src/agent/shared/action-cache-script.ts +++ b/src/agent/shared/action-cache-script.ts @@ -65,6 +65,7 @@ const formatCall = (step: ActionCacheEntry): string => { } const options: Record = { performInstruction: step.instruction, + maxSteps: 3, }; if (step.frameIndex !== null && step.frameIndex !== undefined && step.frameIndex !== 0) { options.frameIndex = step.frameIndex; @@ -76,7 +77,7 @@ const formatCall = (step: ActionCacheEntry): string => { args.push(JSON.stringify(options)); } - return ` // Step ${step.stepIndex} + return ` // Step ${step.stepIndex} await page.${call.fn}(${args.join(", ")});`; } diff --git a/src/agent/shared/run-cached-action.ts b/src/agent/shared/run-cached-action.ts index 60b4a0c..6a7ef9b 100644 --- a/src/agent/shared/run-cached-action.ts +++ b/src/agent/shared/run-cached-action.ts @@ -6,8 +6,8 @@ import { waitForSettledDOM } from "@/utils/waitForSettledDOM"; import { markDomSnapshotDirty } from "@/context-providers/a11y-dom/dom-cache"; import { initializeRuntimeContext } from "@/agent/shared/runtime-context"; import { resolveXPathWithCDP } from "@/agent/shared/xpath-cdp-resolver"; -import { TaskOutput, TaskStatus } from "@/types/agent/types"; import { resolveElement, dispatchCDPAction } from "@/cdp"; +import { TaskOutput, TaskStatus } from "@/types/agent/types"; export interface CachedActionInput { actionType: string; @@ -30,6 +30,7 @@ export interface RunCachedStepParams { variables: Array<{ key: string; value: string; description: string }>; preferScriptBoundingBox?: boolean; cdpActionsEnabled?: boolean; + performFallback?: (instruction: string) => Promise; } export async function runCachedStep( @@ -113,77 +114,41 @@ export async function runCachedStep( }; } - for (let attempt = 0; attempt < maxSteps; attempt++) { - try { - await waitForSettledDOM(page); - const domState = await captureDOMState(page, { - useCache: false, - debug, - enableVisualMode: false, - }); - - const { cdpClient, frameContextManager } = await initializeRuntimeContext( - page, - debug - ); - const resolved = await resolveXPathWithCDP({ - xpath: cachedAction.xpath, - frameIndex: cachedAction.frameIndex ?? 0, - cdpClient, - frameContextManager, - debug, - }); - - const actionContext: ActionContext = { - domState, - page, - tokenLimit, - llm, - debug, - cdpActions: cdpActionsEnabled !== false, - cdp: { - client: cdpClient, - frameContextManager, - resolveElement, - dispatchCDPAction, - preferScriptBoundingBox: preferScriptBoundingBox ?? debug, - debug, - }, - debugDir: undefined, - mcpClient, - variables, - invalidateDomCache: () => markDomSnapshotDirty(page), - }; - - const encodedId = `${cachedAction.frameIndex ?? 0}-${resolved.backendNodeId}`; - domState.backendNodeMap = { - ...(domState.backendNodeMap || {}), - [encodedId]: resolved.backendNodeId, - }; - domState.xpathMap = { - ...(domState.xpathMap || {}), - [encodedId]: cachedAction.xpath, - }; + let lastError: unknown = null; - const methodArgs = (cachedAction.arguments ?? []).map((v) => - v == null ? "" : String(v) - ); - - const actionOutput = await performAction(actionContext, { - elementId: encodedId, - method: cachedAction.method, - arguments: methodArgs, - instruction, - confidence: 1, - }); + for (let attempt = 0; attempt < maxSteps; attempt++) { + const attemptIndex = attempt + 1; + const attemptResult = await runCachedAttempt({ + page, + instruction, + cachedAction, + debug, + tokenLimit, + llm, + mcpClient, + variables, + preferScriptBoundingBox, + cdpActionsEnabled, + }).catch((err) => { + lastError = err; + return null; + }); - if (!actionOutput.success) { - throw new Error(actionOutput.message); + if (!attemptResult) { + if (attempt < maxSteps - 1) { + continue; } - + // will fall through to fallback/final failure below + } else if (!attemptResult.success) { + lastError = new Error(attemptResult.message); + if (attempt < maxSteps - 1) { + continue; + } + // will fall through to fallback/final failure below + } else { await waitForSettledDOM(page); markDomSnapshotDirty(page); - + lastError = null; return { taskId, status: TaskStatus.COMPLETED, @@ -192,39 +157,136 @@ export async function runCachedStep( replayStepMeta: { usedCachedAction: true, fallbackUsed: false, - retries: attempt + 1, + retries: attemptIndex, cachedXPath: cachedAction.xpath ?? null, fallbackXPath: null, fallbackElementId: null, }, }; - } catch (error) { - if (attempt >= maxSteps - 1) { - return { - taskId, - status: TaskStatus.FAILED, - steps: [], - output: - (error as Error)?.message || "Failed to execute cached action", - replayStepMeta: { - usedCachedAction: true, - fallbackUsed: false, - retries: attempt + 1, - cachedXPath: cachedAction.xpath ?? null, - fallbackXPath: null, - fallbackElementId: null, - }, - }; - } } } + // All cached attempts failed; optionally fall back to LLM perform + if (params.performFallback) { + const fb = await params.performFallback(instruction); + return { + ...fb, + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: true, + retries: maxSteps, + cachedXPath: cachedAction.xpath ?? null, + fallbackXPath: fb.replayStepMeta?.fallbackXPath ?? null, + fallbackElementId: fb.replayStepMeta?.fallbackElementId ?? null, + }, + }; + } + return { taskId, status: TaskStatus.FAILED, steps: [], - output: "Failed to execute cached action", + output: + (lastError as Error | null)?.message || "Failed to execute cached action", + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: maxSteps, + cachedXPath: cachedAction.xpath ?? null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; +} + +async function runCachedAttempt(args: { + page: import("playwright-core").Page; + instruction: string; + cachedAction: CachedActionInput; + debug?: boolean; + tokenLimit: number; + llm: any; + mcpClient: any; + variables: Array<{ key: string; value: string; description: string }>; + preferScriptBoundingBox?: boolean; + cdpActionsEnabled?: boolean; +}): Promise<{ success: boolean; message: string }> { + const { + page, + instruction, + cachedAction, + debug, + tokenLimit, + llm, + mcpClient, + variables, + preferScriptBoundingBox, + cdpActionsEnabled, + } = args; + + await waitForSettledDOM(page); + const domState = await captureDOMState(page, { + useCache: false, + debug, + enableVisualMode: false, + }); + + const { cdpClient, frameContextManager } = await initializeRuntimeContext( + page, + debug + ); + const resolved = await resolveXPathWithCDP({ + xpath: cachedAction.xpath!, + frameIndex: cachedAction.frameIndex ?? 0, + cdpClient, + frameContextManager, + debug, + }); + + const actionContext: ActionContext = { + domState, + page, + tokenLimit, + llm, + debug, + cdpActions: cdpActionsEnabled !== false, + cdp: { + client: cdpClient, + frameContextManager, + resolveElement, + dispatchCDPAction, + preferScriptBoundingBox: preferScriptBoundingBox ?? debug, + debug, + }, + debugDir: undefined, + mcpClient, + variables, + invalidateDomCache: () => markDomSnapshotDirty(page), + }; + + const encodedId = `${cachedAction.frameIndex ?? 0}-${resolved.backendNodeId}`; + domState.backendNodeMap = { + ...(domState.backendNodeMap || {}), + [encodedId]: resolved.backendNodeId, + }; + domState.xpathMap = { + ...(domState.xpathMap || {}), + [encodedId]: cachedAction.xpath!, }; + + const methodArgs = (cachedAction.arguments ?? []).map((v) => + v == null ? "" : String(v) + ); + + const actionOutput = await performAction(actionContext, { + elementId: encodedId, + method: cachedAction.method!, + arguments: methodArgs, + instruction, + confidence: 1, + }); + + return { success: actionOutput.success, message: actionOutput.message }; } export async function performGoTo( diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index 57377fd..0bc71b2 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -148,7 +148,7 @@ export interface TaskState { error?: string; } -export interface HyperAgentInstance { +export interface AgentDeps { debug?: boolean; tokenLimit: number; llm: any; From a24ba782d0af629e08d20f3b789f35bf0f12ccf2 Mon Sep 17 00:00:00 2001 From: Devin Date: Sun, 7 Dec 2025 22:08:22 -0800 Subject: [PATCH 08/13] clean up --- src/agent/shared/action-cache-script.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/agent/shared/action-cache-script.ts b/src/agent/shared/action-cache-script.ts index a785bf3..06515f6 100644 --- a/src/agent/shared/action-cache-script.ts +++ b/src/agent/shared/action-cache-script.ts @@ -65,7 +65,6 @@ const formatCall = (step: ActionCacheEntry): string => { } const options: Record = { performInstruction: step.instruction, - maxSteps: 3, }; if (step.frameIndex !== null && step.frameIndex !== undefined && step.frameIndex !== 0) { options.frameIndex = step.frameIndex; From d98b0ef4f0302c7ae2e8b883381c89b3d772fc10 Mon Sep 17 00:00:00 2001 From: Devin Date: Mon, 8 Dec 2025 11:29:00 -0800 Subject: [PATCH 09/13] format the script better --- src/agent/index.ts | 5 +- src/agent/shared/action-cache-exec.ts | 5 +- src/agent/shared/action-cache-script.ts | 165 +++++++++++++----------- 3 files changed, 92 insertions(+), 83 deletions(-) diff --git a/src/agent/index.ts b/src/agent/index.ts index 8682265..38c2f22 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -56,12 +56,9 @@ import { markDomSnapshotDirty } from "@/context-providers/a11y-dom/dom-cache"; import { setDebugOptions } from "@/debug/options"; import { initializeRuntimeContext } from "./shared/runtime-context"; import { performAction } from "./actions/shared/perform-action"; -import { captureDOMState } from "./shared/dom-capture"; -import { executePlaywrightMethod } from "./shared/execute-playwright-method"; -import { resolveXPathWithCDP } from "./shared/xpath-cdp-resolver"; import { createScriptFromActionCache } from "./shared/action-cache-script"; import { attachCachedActionHelpers } from "./shared/action-cache-exec"; -import { AgentDeps, ReplayStepMeta } from "@/types/agent/types"; +import { AgentDeps } from "@/types/agent/types"; export class HyperAgent { // aiAction configuration constants diff --git a/src/agent/shared/action-cache-exec.ts b/src/agent/shared/action-cache-exec.ts index 6c67e9e..d8b49ef 100644 --- a/src/agent/shared/action-cache-exec.ts +++ b/src/agent/shared/action-cache-exec.ts @@ -62,7 +62,10 @@ function runCachedAction( }); } -export function attachCachedActionHelpers(agent: AgentDeps, page: HyperPage): void { +export function attachCachedActionHelpers( + agent: AgentDeps, + page: HyperPage +): void { page.performClick = (xpath: string, options?: PerformOptions) => runCachedAction( agent, diff --git a/src/agent/shared/action-cache-script.ts b/src/agent/shared/action-cache-script.ts index 06515f6..9348a20 100644 --- a/src/agent/shared/action-cache-script.ts +++ b/src/agent/shared/action-cache-script.ts @@ -18,89 +18,98 @@ export function createScriptFromActionCache( const dir = path.join(process.cwd(), "action-cache-scripts", id); fs.mkdirSync(dir, { recursive: true }); -const METHOD_TO_CALL: Record = { - click: { fn: "performClick" }, - fill: { fn: "performFill", needsValue: true, valueName: "text" }, - type: { fn: "performType", needsValue: true, valueName: "text" }, - press: { fn: "performPress", needsValue: true, valueName: "key" }, - selectOptionFromDropdown: { - fn: "performSelectOption", - needsValue: true, - valueName: "option", - }, - check: { fn: "performCheck" }, - uncheck: { fn: "performUncheck" }, - hover: { fn: "performHover" }, - scrollToElement: { fn: "performScrollToElement" }, - scrollToPercentage: { - fn: "performScrollToPercentage", - needsValue: true, - valueName: "position", - }, - nextChunk: { fn: "performNextChunk" }, - prevChunk: { fn: "performPrevChunk" }, -}; - -const formatCall = (step: ActionCacheEntry): string => { - if (step.actionType === "complete") { - return ` // Step ${step.stepIndex} (complete skipped in script)`; - } - - if (step.actionType === "goToUrl") { - const urlArg = - (step.arguments && step.arguments[0]) || "https://example.com"; - return ` // Step ${step.stepIndex} - await page.goto(${JSON.stringify( - urlArg - )}, { waitUntil: "domcontentloaded" });`; - } - - const call = step.method ? METHOD_TO_CALL[step.method] : undefined; - if (call) { - const args: string[] = []; - args.push(JSON.stringify(step.xpath)); - if (call.needsValue) { - const value = step.arguments?.[0] ?? ""; - args.push(JSON.stringify(value)); + const METHOD_TO_CALL: Record< + string, + { fn: string; needsValue?: boolean; valueName?: string } + > = { + click: { fn: "performClick" }, + fill: { fn: "performFill", needsValue: true, valueName: "text" }, + type: { fn: "performType", needsValue: true, valueName: "text" }, + press: { fn: "performPress", needsValue: true, valueName: "key" }, + selectOptionFromDropdown: { + fn: "performSelectOption", + needsValue: true, + valueName: "option", + }, + check: { fn: "performCheck" }, + uncheck: { fn: "performUncheck" }, + hover: { fn: "performHover" }, + scrollToElement: { fn: "performScrollToElement" }, + scrollToPercentage: { + fn: "performScrollToPercentage", + needsValue: true, + valueName: "position", + }, + nextChunk: { fn: "performNextChunk" }, + prevChunk: { fn: "performPrevChunk" }, + }; + + const formatCall = (step: ActionCacheEntry): string => { + const indent = " "; + const argIndent = `${indent} `; + + if (step.actionType === "complete") { + return `${indent}// Step ${step.stepIndex} (complete skipped in script)`; } - const options: Record = { - performInstruction: step.instruction, - }; - if (step.frameIndex !== null && step.frameIndex !== undefined && step.frameIndex !== 0) { - options.frameIndex = step.frameIndex; + + if (step.actionType === "goToUrl") { + const urlArg = + (step.arguments && step.arguments[0]) || "https://example.com"; + return `${indent}// Step ${step.stepIndex} +${indent}await page.goto( +${argIndent}${JSON.stringify(urlArg)}, +${argIndent}{ waitUntil: "domcontentloaded" } +${indent});`; } - const hasOptions = - options.performInstruction !== undefined || - options.frameIndex !== undefined; - if (hasOptions) { - args.push(JSON.stringify(options)); + + const call = step.method ? METHOD_TO_CALL[step.method] : undefined; + if (call) { + const args: string[] = []; + args.push(JSON.stringify(step.xpath)); + if (call.needsValue) { + const value = step.arguments?.[0] ?? ""; + args.push(JSON.stringify(value)); + } + const options: Record = {}; + if (step.instruction) { + options.performInstruction = step.instruction; + } + if ( + step.frameIndex !== null && + step.frameIndex !== undefined && + step.frameIndex !== 0 + ) { + options.frameIndex = step.frameIndex; + } + + const optionEntries = Object.entries(options).map( + ([key, value]) => `${argIndent} ${key}: ${JSON.stringify(value)},` + ); + const optionsBlock = + optionEntries.length > 0 + ? `${argIndent}{\n${optionEntries.join("\n")}\n${argIndent}}` + : ""; + + const callArgs = [ + `${argIndent}${JSON.stringify(step.xpath)},`, + call.needsValue + ? `${argIndent}${JSON.stringify(step.arguments?.[0] ?? "")},` + : null, + optionsBlock ? `${optionsBlock},` : null, + ] + .filter(Boolean) + .join("\n"); + + return `${indent}// Step ${step.stepIndex} +${indent}await page.${call.fn}( +${callArgs} +${indent});`; } - return ` // Step ${step.stepIndex} - await page.${call.fn}(${args.join(", ")});`; - } - - // Fallback to perform with cachedAction if no helper mapping exists - const cached = { - actionType: step.actionType, - method: step.method, - arguments: step.arguments ?? [], - frameIndex: step.frameIndex ?? 0, - xpath: step.xpath, - elementId: step.elementId, + throw new Error(`Unknown method: ${step.method}`); }; - return ` // Step ${step.stepIndex} - await page.perform(${JSON.stringify(step.instruction)}, { - cachedAction: ${JSON.stringify(cached, null, 2) - .split("\n") - .map((line, idx) => (idx === 0 ? line : " " + line)) - .join("\n")}, - });`; -}; - -const stepSnippets = steps - .map((step) => formatCall(step)) - .join("\n\n"); + + const stepSnippets = steps.map((step) => formatCall(step)).join("\n\n"); const script = `import { HyperAgent } from "@hyperbrowser/agent"; async function main() { From bf916beb12310c8a51ee951b02b2ee835df5bac6 Mon Sep 17 00:00:00 2001 From: Devin Date: Mon, 8 Dec 2025 12:01:56 -0800 Subject: [PATCH 10/13] output string only --- src/agent/index.ts | 19 ++++++++++--------- src/agent/shared/action-cache-script.ts | 14 ++------------ src/agent/shared/run-cached-action.ts | 11 +++++++++++ 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/agent/index.ts b/src/agent/index.ts index 38c2f22..f1d033c 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -1028,7 +1028,8 @@ export class HyperAgent { 400 ); } - let actionXPath: string | undefined; + let actionXPath: string | null = + domState?.xpathMap?.[element.elementId] ?? null; // Use shared runtime context const { cdpClient, frameContextManager } = await initializeRuntimeContext( @@ -1081,14 +1082,6 @@ export class HyperAgent { confidence: 1, // Implicit confidence for single action }); - if ( - actionOutput.debug && - typeof actionOutput.debug === "object" && - "requestedAction" in actionOutput.debug - ) { - actionXPath = (actionOutput.debug as any).elementMetadata?.xpath; - } - if (!actionOutput.success) { throw new Error(actionOutput.message); } @@ -1131,6 +1124,14 @@ export class HyperAgent { status: TaskStatus.COMPLETED, steps: [], output: `Successfully executed: ${instruction}`, + replayStepMeta: { + usedCachedAction: false, + fallbackUsed: false, + retries: 1, + cachedXPath: null, + fallbackXPath: actionXPath ?? null, + fallbackElementId: element.elementId ?? null, + }, }; } catch (error) { // If page switched during execution, prioritize that over the error diff --git a/src/agent/shared/action-cache-script.ts b/src/agent/shared/action-cache-script.ts index 9348a20..a48786c 100644 --- a/src/agent/shared/action-cache-script.ts +++ b/src/agent/shared/action-cache-script.ts @@ -1,5 +1,3 @@ -import fs from "fs"; -import path from "path"; import { ActionCacheEntry } from "@/types"; interface CreateScriptFromActionCacheParams { @@ -10,13 +8,7 @@ interface CreateScriptFromActionCacheParams { export function createScriptFromActionCache( params: CreateScriptFromActionCacheParams ): string { - const { taskId, steps } = params; - const id = - taskId && taskId.length > 0 - ? taskId - : new Date().toISOString().replace(/[:.]/g, "-"); - const dir = path.join(process.cwd(), "action-cache-scripts", id); - fs.mkdirSync(dir, { recursive: true }); + const { steps } = params; const METHOD_TO_CALL: Record< string, @@ -130,7 +122,5 @@ main().catch((err) => { }); `; - const outPath = path.join(dir, "run-cached-actions.ts"); - fs.writeFileSync(outPath, script); - return outPath; + return script; } diff --git a/src/agent/shared/run-cached-action.ts b/src/agent/shared/run-cached-action.ts index 6a7ef9b..17bae9d 100644 --- a/src/agent/shared/run-cached-action.ts +++ b/src/agent/shared/run-cached-action.ts @@ -169,6 +169,17 @@ export async function runCachedStep( // All cached attempts failed; optionally fall back to LLM perform if (params.performFallback) { const fb = await params.performFallback(instruction); + const cachedXPath = cachedAction.xpath || "N/A"; + const resolvedXPath = fb.replayStepMeta?.fallbackXPath || "N/A"; + // eslint-disable-next-line no-console + console.log( + ` +⚠️ [runCachedStep] Cached action failed. Falling back to LLM... + Instruction: "${instruction}" + ❌ Cached XPath Failed: "${cachedXPath}" + ✅ LLM Resolved New XPath: "${resolvedXPath}" +` + ); return { ...fb, replayStepMeta: { From 0801bdab03a5f565ebf668b6dec3453b443c9345 Mon Sep 17 00:00:00 2001 From: Devin Date: Mon, 8 Dec 2025 12:20:05 -0800 Subject: [PATCH 11/13] better typings --- src/agent/index.ts | 17 ++++++++++------- src/agent/tools/agent.ts | 21 +++++++++++---------- src/types/agent/types.ts | 5 ++++- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/agent/index.ts b/src/agent/index.ts index f1d033c..8e8062e 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -45,6 +45,7 @@ import type { HyperPage, HyperVariable, ActionCacheEntry, + AgentTaskOutput, } from "../types/agent/types"; import { z } from "zod"; import { ErrorEmitter } from "../utils"; @@ -455,9 +456,7 @@ export class HyperAgent { mergedParams ) .then((result) => { - if (result.actionCache) { - this.actionCacheByTaskId[taskId] = result.actionCache; - } + this.actionCacheByTaskId[taskId] = result.actionCache; cleanup(); }) .catch((error: Error) => { @@ -490,7 +489,7 @@ export class HyperAgent { task: string, params?: TaskParams, initPage?: Page - ): Promise { + ): Promise { const taskId = uuidv4(); let activeTaskPage = initPage || (await this.getCurrentPage()); @@ -537,9 +536,7 @@ export class HyperAgent { mergedParams ); this.context?.off("page", onPage); - if (result.actionCache) { - this.actionCacheByTaskId[taskId] = result.actionCache; - } + this.actionCacheByTaskId[taskId] = result.actionCache; return result; } catch (error) { this.context?.off("page", onPage); @@ -1124,6 +1121,12 @@ export class HyperAgent { status: TaskStatus.COMPLETED, steps: [], output: `Successfully executed: ${instruction}`, + actionCache: { + taskId, + createdAt: startTime, + status: TaskStatus.COMPLETED, + steps: [], + }, replayStepMeta: { usedCachedAction: false, fallbackUsed: false, diff --git a/src/agent/tools/agent.ts b/src/agent/tools/agent.ts index f5109ac..3bd9d47 100644 --- a/src/agent/tools/agent.ts +++ b/src/agent/tools/agent.ts @@ -1,4 +1,8 @@ -import { ActionCacheOutput, AgentStep } from "@/types/agent/types"; +import { + ActionCacheOutput, + AgentStep, + AgentTaskOutput, +} from "@/types/agent/types"; import fs from "fs"; import { performance } from "perf_hooks"; @@ -22,12 +26,7 @@ import { captureDOMState } from "../shared/dom-capture"; import { initializeRuntimeContext } from "../shared/runtime-context"; import { AgentOutputFn, endTaskStatuses } from "@hyperbrowser/agent/types"; -import { - TaskParams, - TaskOutput, - TaskState, - TaskStatus, -} from "@hyperbrowser/agent/types"; +import { TaskParams, TaskState, TaskStatus } from "@hyperbrowser/agent/types"; import { HyperagentError } from "../error"; import { buildAgentStepMessages } from "../messages/builder"; @@ -210,7 +209,7 @@ export const runAgentTask = async ( ctx: AgentCtx, taskState: TaskState, params?: TaskParams -): Promise => { +): Promise => { const taskStart = performance.now(); const taskId = taskState.id; const debugDir = params?.debugDir || `debug/${taskId}`; @@ -280,7 +279,9 @@ export const runAgentTask = async ( const newPage = await ctx.activePage(); if (newPage && newPage !== page) { if (ctx.debug) { - console.log(`[Agent] Switching active page context to ${newPage.url()}`); + console.log( + `[Agent] Switching active page context to ${newPage.url()}` + ); } cleanupDomListeners(page); page = newPage; @@ -681,7 +682,7 @@ export const runAgentTask = async ( JSON.stringify(actionCache, null, 2) ); - const taskOutput: TaskOutput = { + const taskOutput: AgentTaskOutput = { taskId, status: taskState.status, steps: taskState.steps, diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index 0bc71b2..bab52fb 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -114,6 +114,9 @@ export interface TaskOutput { replayStepMeta?: ReplayStepMeta; } +// Returned by full agent runs (e.g., page.ai()) where actionCache is always populated. +export type AgentTaskOutput = TaskOutput & { actionCache: ActionCacheOutput }; + export interface Task { id: string; getStatus: () => TaskStatus; @@ -269,7 +272,7 @@ export interface HyperPage extends Page { * Best for: Complex workflows, multi-step tasks, exploratory automation * Mode: Always visual (screenshots with overlays) */ - ai: (task: string, params?: TaskParams) => Promise; + ai: (task: string, params?: TaskParams) => Promise; /** * Execute a single granular action using a11y mode From 1185b505426574e8785bed6ee71e4374e0a104d0 Mon Sep 17 00:00:00 2001 From: Devin Date: Mon, 8 Dec 2025 15:29:20 -0800 Subject: [PATCH 12/13] bump package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ee120e2..0ba39b2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@hyperbrowser/agent", - "version": "1.0.8", + "version": "1.0.9", "description": "Hyperbrowsers Web Agent", "author": "", "main": "dist/index.js", From caa1e940d47057135e789b1fe6851071b3a40798 Mon Sep 17 00:00:00 2001 From: Devin Date: Mon, 8 Dec 2025 17:27:22 -0800 Subject: [PATCH 13/13] fix bugs --- src/agent/index.ts | 214 +++++++++++++++++++----- src/agent/shared/action-cache-script.ts | 21 ++- src/agent/shared/action-cache.ts | 11 +- src/agent/tools/agent.ts | 12 +- src/types/agent/types.ts | 99 +++-------- src/types/index.ts | 4 +- 6 files changed, 231 insertions(+), 130 deletions(-) diff --git a/src/agent/index.ts b/src/agent/index.ts index 8e8062e..ee8f821 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -46,6 +46,7 @@ import type { HyperVariable, ActionCacheEntry, AgentTaskOutput, + PerformOptions, } from "../types/agent/types"; import { z } from "zod"; import { ErrorEmitter } from "../utils"; @@ -560,21 +561,66 @@ export class HyperAgent { let replayStatus: TaskStatus.COMPLETED | TaskStatus.FAILED = TaskStatus.COMPLETED; - const helperMap: Record = { - click: "performClick", - fill: "performFill", - type: "performType", - press: "performPress", - selectOptionFromDropdown: "performSelectOption", - check: "performCheck", - uncheck: "performUncheck", - hover: "performHover", - scrollToElement: "performScrollToElement", - scrollToPercentage: "performScrollToPercentage", - nextChunk: "performNextChunk", - prevChunk: "performPrevChunk", + /** + * Type-safe dispatch for HyperPage perform* methods. + * Explicitly routes to the correct method with proper typing. + * + * Methods that require a value argument (second param): type, fill, press, selectOptionFromDropdown, scrollToPercentage + * Methods with only xpath and options: click, hover, check, uncheck, scrollToElement, nextChunk, prevChunk + */ + const dispatchPerformHelper = ( + hp: HyperPage, + method: string, + xpath: string, + value: string | undefined, + options: PerformOptions + ): Promise => { + switch (method) { + case "click": + return hp.performClick(xpath, options); + case "hover": + return hp.performHover(xpath, options); + case "type": + return hp.performType(xpath, value ?? "", options); + case "fill": + return hp.performFill(xpath, value ?? "", options); + case "press": + return hp.performPress(xpath, value ?? "", options); + case "selectOptionFromDropdown": + return hp.performSelectOption(xpath, value ?? "", options); + case "check": + return hp.performCheck(xpath, options); + case "uncheck": + return hp.performUncheck(xpath, options); + case "scrollToElement": + return hp.performScrollToElement(xpath, options); + case "scrollToPercentage": + return hp.performScrollToPercentage(xpath, value ?? "", options); + case "nextChunk": + return hp.performNextChunk(xpath, options); + case "prevChunk": + return hp.performPrevChunk(xpath, options); + default: + throw new Error(`Unknown perform helper method: ${method}`); + } }; + /** Set of valid method names that can be dispatched */ + const validHelperMethods = new Set([ + "click", + "fill", + "type", + "press", + "selectOptionFromDropdown", + "check", + "uncheck", + "hover", + "scrollToElement", + "scrollToPercentage", + "nextChunk", + "prevChunk", + ]); + for (const step of [...cache.steps].sort( (a, b) => a.stepIndex - b.stepIndex )) { @@ -628,14 +674,115 @@ export class HyperAgent { fallbackElementId: null, }, }; + } else if (step.actionType === "refreshPage") { + await hyperPage.reload({ waitUntil: "domcontentloaded" }); + await waitForSettledDOM(hyperPage); + markDomSnapshotDirty(hyperPage); + result = { + taskId: cache.taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: "Page refreshed", + actionCache: { + taskId: cache.taskId, + createdAt: cache.createdAt, + status: TaskStatus.COMPLETED, + steps: [], + }, + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } else if (step.actionType === "wait") { + const durationRaw = + (step.arguments && step.arguments[0]) || + (step.actionParams as any)?.duration; + const durationMs = + typeof durationRaw === "number" + ? durationRaw + : Number.parseInt(String(durationRaw ?? ""), 10); + const waitMs = Number.isFinite(durationMs) ? durationMs : 1000; + await hyperPage.waitForTimeout(waitMs); + result = { + taskId: cache.taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: `Waited ${waitMs}ms`, + actionCache: { + taskId: cache.taskId, + createdAt: cache.createdAt, + status: TaskStatus.COMPLETED, + steps: [], + }, + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } else if (step.actionType === "extract") { + try { + const extractResult = await hyperPage.extract(step.instruction); + result = { + taskId: cache.taskId, + status: TaskStatus.COMPLETED, + steps: [], + output: + typeof extractResult === "string" + ? extractResult + : JSON.stringify(extractResult), + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } catch (err: any) { + result = { + taskId: cache.taskId, + status: TaskStatus.FAILED, + steps: [], + output: `Extract failed: ${err?.message || String(err)}`, + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; + } + } else if (step.actionType === "analyzePdf") { + result = { + taskId: cache.taskId, + status: TaskStatus.FAILED, + steps: [], + output: "analyzePdf replay is not supported in runFromActionCache.", + replayStepMeta: { + usedCachedAction: true, + fallbackUsed: false, + retries: 0, + cachedXPath: null, + fallbackXPath: null, + fallbackElementId: null, + }, + }; } else { - const helperName = - step.method && helperMap[step.method] ? helperMap[step.method] : null; - if ( - helperName && - typeof (hyperPage as any)[helperName] === "function" - ) { - const options: any = { + const method = step.method; + if (method && validHelperMethods.has(method)) { + const options: PerformOptions = { performInstruction: step.instruction, maxSteps: maxXPathRetries, }; @@ -643,26 +790,13 @@ export class HyperAgent { options.frameIndex = step.frameIndex; } const valueArg = step.arguments?.[0]; - if ( - [ - "type", - "fill", - "press", - "selectOptionFromDropdown", - "scrollToPercentage", - ].includes(step.method ?? "") - ) { - result = await (hyperPage as any)[helperName]( - step.xpath ?? "", - valueArg, - options - ); - } else { - result = await (hyperPage as any)[helperName]( - step.xpath ?? "", - options - ); - } + result = await dispatchPerformHelper( + hyperPage, + method, + step.xpath ?? "", + valueArg, + options + ); } else { result = await hyperPage.perform(step.instruction); } diff --git a/src/agent/shared/action-cache-script.ts b/src/agent/shared/action-cache-script.ts index a48786c..61406aa 100644 --- a/src/agent/shared/action-cache-script.ts +++ b/src/agent/shared/action-cache-script.ts @@ -54,6 +54,25 @@ ${argIndent}{ waitUntil: "domcontentloaded" } ${indent});`; } + if (step.actionType === "refreshPage") { + return `${indent}// Step ${step.stepIndex} +${indent}await page.reload({ waitUntil: "domcontentloaded" });`; + } + + if (step.actionType === "wait") { + const waitMs = + (step.arguments && Number(step.arguments[0])) || + (step.actionParams as any)?.duration || + 1000; + return `${indent}// Step ${step.stepIndex} +${indent}await page.waitForTimeout(${waitMs});`; + } + + if (step.actionType === "extract") { + return `${indent}// Step ${step.stepIndex} +${indent}await page.extract("${step.instruction}");`; + } + const call = step.method ? METHOD_TO_CALL[step.method] : undefined; if (call) { const args: string[] = []; @@ -98,7 +117,7 @@ ${callArgs} ${indent});`; } - throw new Error(`Unknown method: ${step.method}`); + return `${indent}// Step ${step.stepIndex} (unsupported actionType=${step.actionType}, method=${step.method ?? "N/A"})`; }; const stepSnippets = steps.map((step) => formatCall(step)).join("\n\n"); diff --git a/src/agent/shared/action-cache.ts b/src/agent/shared/action-cache.ts index 5b4731d..11b7b5b 100644 --- a/src/agent/shared/action-cache.ts +++ b/src/agent/shared/action-cache.ts @@ -10,8 +10,11 @@ const TEXT_NODE_SUFFIX = /\/text\(\)(\[\d+\])?$/iu; const isString = (value: unknown): value is string => typeof value === "string"; -const isStringArray = (value: unknown): value is string[] => - Array.isArray(value) && value.every((item) => typeof item === "string"); +const isStringOrNumberArray = ( + value: unknown +): value is Array => + Array.isArray(value) && + value.every((item) => typeof item === "string" || typeof item === "number"); const normalizeXPath = (raw?: string | null): string | null => { if (!raw) { @@ -46,8 +49,8 @@ const extractMethod = (action: ActionType): string | null => { const extractArguments = (action: ActionType): string[] => { const params = action.params as Record; - if (isStringArray(params.arguments)) { - return params.arguments; + if (isStringOrNumberArray(params.arguments)) { + return params.arguments.map((item) => item.toString()); } return []; }; diff --git a/src/agent/tools/agent.ts b/src/agent/tools/agent.ts index 3bd9d47..ff38c35 100644 --- a/src/agent/tools/agent.ts +++ b/src/agent/tools/agent.ts @@ -676,11 +676,13 @@ export const runAgentTask = async ( status: taskState.status, steps: actionCacheSteps, }; - fs.mkdirSync(debugDir, { recursive: true }); - fs.writeFileSync( - `${debugDir}/action-cache.json`, - JSON.stringify(actionCache, null, 2) - ); + if (ctx.debug) { + fs.mkdirSync(debugDir, { recursive: true }); + fs.writeFileSync( + `${debugDir}/action-cache.json`, + JSON.stringify(actionCache, null, 2) + ); + } const taskOutput: AgentTaskOutput = { taskId, diff --git a/src/types/agent/types.ts b/src/types/agent/types.ts index bab52fb..638ead3 100644 --- a/src/types/agent/types.ts +++ b/src/types/agent/types.ts @@ -165,108 +165,51 @@ export interface HyperVariable { description: string; } +/** + * Common options for all perform* helper methods on HyperPage. + */ +export interface PerformOptions { + frameIndex?: number | null; + performInstruction?: string | null; + maxSteps?: number; +} + export interface HyperPage extends Page { - performClick: ( - xpath: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } - ) => Promise; - performHover: ( - xpath: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } - ) => Promise; + performClick: (xpath: string, options?: PerformOptions) => Promise; + performHover: (xpath: string, options?: PerformOptions) => Promise; performType: ( xpath: string, text: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } + options?: PerformOptions ) => Promise; performFill: ( xpath: string, text: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } + options?: PerformOptions ) => Promise; performPress: ( xpath: string, key: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } + options?: PerformOptions ) => Promise; performSelectOption: ( xpath: string, option: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } - ) => Promise; - performCheck: ( - xpath: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } - ) => Promise; - performUncheck: ( - xpath: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } + options?: PerformOptions ) => Promise; + performCheck: (xpath: string, options?: PerformOptions) => Promise; + performUncheck: (xpath: string, options?: PerformOptions) => Promise; performScrollToElement: ( xpath: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } + options?: PerformOptions ) => Promise; performScrollToPercentage: ( xpath: string, position: string | number, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } - ) => Promise; - performNextChunk: ( - xpath: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } - ) => Promise; - performPrevChunk: ( - xpath: string, - options?: { - frameIndex?: number | null; - performInstruction?: string | null; - maxSteps?: number; - } + options?: PerformOptions ) => Promise; + performNextChunk: (xpath: string, options?: PerformOptions) => Promise; + performPrevChunk: (xpath: string, options?: PerformOptions) => Promise; /** * Execute a complex multi-step task using visual mode * Best for: Complex workflows, multi-step tasks, exploratory automation diff --git a/src/types/index.ts b/src/types/index.ts index 0907dd8..9177fa7 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -16,8 +16,6 @@ import { ActionCacheOutput, ActionCacheReplayResult, ActionCacheReplayStepResult, - ReplayStepMeta, - CachedActionHint, RunFromActionCacheParams, TaskParams, TaskOutput, @@ -25,6 +23,7 @@ import { TaskStatus, TaskState, endTaskStatuses, + PerformOptions, } from "./agent/types"; // Config Types @@ -56,6 +55,7 @@ export { Task, TaskStatus, TaskState, + PerformOptions, // Config Types MCPServerConfig,