From 63ae37d2099d69328224e8c3601e2781434655fd Mon Sep 17 00:00:00 2001 From: Forge Date: Fri, 13 Mar 2026 15:19:37 -0700 Subject: [PATCH 1/2] fix: Playwright 1.58+ Chromium CDP compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three breaking changes in newer Chrome DOMSnapshot API: 1. strings array moved to top level - Old: result.documents[0].strings - New: result.strings (top-level, shared across docs) - Fix: snapshot.strings ?? doc.strings ?? [] 2. childNodeIndexes removed, replaced by parentIndex - Old: nodes.childNodeIndexes[nodeIndex] gave children directly - New: nodes.parentIndex[i] gives parent of node i - Fix: build childrenMap by inverting parentIndex on load 3. paintOrder renamed to paintOrders (plural) - Fix: layout.paintOrder ?? layout.paintOrders 4. inputValue entries can contain -1 (no value) sentinel - Fix: skip valueIdx < 0 5. querySelectorAll in content-extractor returns non-iterable object via Turndown DOM shim - Fix: wrap all querySelectorAll calls with Array.from() 6. CLI --max-steps option parsed as maxSteps but code read stepLimit → NaN → agent quit at step 0 - Fix: options.maxSteps ?? options.stepLimit ?? '25' Tested against https://news.ycombinator.com - agent now successfully extracts page content in step 1. --- bun.lock | 35 +++++----- packages/cli/src/commands/run.ts | 3 +- packages/cli/src/index.ts | 0 packages/core/package.json | 2 +- packages/core/src/agent/agent.ts | 16 +++-- packages/core/src/page/content-extractor.ts | 6 +- packages/core/src/page/snapshot-builder.ts | 39 +++++++++-- packages/core/src/page/types.ts | 10 +-- packages/core/test-extract-debug.ts | 41 ++++++++++++ packages/core/test-snapshot-diag.ts | 71 +++++++++++++++++++++ packages/core/test-snapshot.ts | 21 ++++++ packages/core/test-snapshot2.ts | 20 ++++++ packages/core/test-snapshot3.ts | 22 +++++++ 13 files changed, 252 insertions(+), 34 deletions(-) mode change 100644 => 100755 packages/cli/src/index.ts create mode 100644 packages/core/test-extract-debug.ts create mode 100644 packages/core/test-snapshot-diag.ts create mode 100644 packages/core/test-snapshot.ts create mode 100644 packages/core/test-snapshot2.ts create mode 100644 packages/core/test-snapshot3.ts diff --git a/bun.lock b/bun.lock index 3909f16..9aa29b8 100644 --- a/bun.lock +++ b/bun.lock @@ -1,40 +1,41 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "open-browser-monorepo", "devDependencies": { - "@biomejs/biome": "^1.9.0", - "@types/bun": "^1.1.0", - "typescript": "^5.6.0", + "@biomejs/biome": "^1.9.4", + "@types/bun": "^1.2.0", + "typescript": "^5.8.0", }, }, "packages/cli": { "name": "@open-browser/cli", - "version": "0.1.0", + "version": "1.1.0", "bin": { "open-browser": "src/index.ts", }, "dependencies": { - "chalk": "^5.3.0", - "commander": "^12.0.0", + "chalk": "^5.4.0", + "commander": "^12.1.0", "open-browser": "workspace:*", }, }, "packages/core": { "name": "open-browser", - "version": "0.1.0", + "version": "1.1.0", "dependencies": { - "@ai-sdk/anthropic": "^1.0.0", - "@ai-sdk/google": "^1.0.0", - "@ai-sdk/openai": "^1.0.0", - "ai": "^4.0.0", - "dotenv": "^16.4.0", + "@ai-sdk/anthropic": "^1.1.0", + "@ai-sdk/google": "^1.1.0", + "@ai-sdk/openai": "^1.1.0", + "ai": "^4.2.0", + "dotenv": "^16.5.0", "mitt": "^3.0.1", - "nanoid": "^5.0.0", - "playwright": "^1.48.0", - "turndown": "^7.2.0", - "zod": "^3.23.0", + "nanoid": "^5.1.0", + "playwright": "^1.51.0", + "turndown": "^7.2.1", + "zod": "^3.24.0", }, "devDependencies": { "@types/turndown": "^5.0.5", @@ -48,7 +49,7 @@ }, "packages/sandbox": { "name": "@open-browser/sandbox", - "version": "0.1.0", + "version": "1.1.0", "dependencies": { "open-browser": "workspace:*", }, diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts index b680e0c..e4aa0e3 100644 --- a/packages/cli/src/commands/run.ts +++ b/packages/cli/src/commands/run.ts @@ -23,6 +23,7 @@ interface RunOptions { provider: string; headless: boolean; stepLimit: number; + maxSteps: string; verbose: boolean; noCost: boolean; } @@ -76,7 +77,7 @@ export function registerRunCommand(program: Command): void { .option('-v, --verbose', 'Show detailed step information', false) .option('--no-cost', 'Hide cost tracking information') .action(async (task: string, options: RunOptions) => { - const stepLimit = Number.parseInt(String(options.stepLimit), 10); + const stepLimit = Number.parseInt(String(options.maxSteps ?? options.stepLimit ?? '25'), 10); displayHeader(`Agent Task: ${task}`); console.log( diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts old mode 100644 new mode 100755 diff --git a/packages/core/package.json b/packages/core/package.json index d8d5f35..91b4f82 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -20,7 +20,7 @@ "@ai-sdk/google": "^1.1.0", "zod": "^3.24.0", "playwright": "^1.51.0", - "mitt": "^3.0.2", + "mitt": "^3.0.1", "nanoid": "^5.1.0", "turndown": "^7.2.1", "dotenv": "^16.5.0" diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 8b98b71..50977dd 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -685,6 +685,10 @@ export class Agent { * Normalize the various output schema shapes into the standard AgentDecision. */ private normalizeOutput(output: Record): AgentDecision { + // Ensure actions is always an array (LLMs may return {} instead of []) + const ensureArray = (val: unknown): Record[] => + Array.isArray(val) ? val : []; + // Flash schema: { goal, actions } if ('goal' in output && !('currentState' in output)) { return { @@ -693,7 +697,7 @@ export class Agent { memory: '', nextGoal: String(output.goal ?? ''), }, - actions: (output.actions ?? []) as Record[], + actions: ensureArray(output.actions), }; } @@ -705,12 +709,16 @@ export class Agent { memory: '', nextGoal: '', }, - actions: (output.actions ?? []) as Record[], + actions: ensureArray(output.actions), }; } - // Standard schema passthrough - return output as AgentDecision; + // Standard schema passthrough — still guard actions + const decision = output as AgentDecision; + if (!Array.isArray(decision.actions)) { + decision.actions = []; + } + return decision; } // ──────────────────────────────────────── diff --git a/packages/core/src/page/content-extractor.ts b/packages/core/src/page/content-extractor.ts index 5c0f7f2..de10587 100644 --- a/packages/core/src/page/content-extractor.ts +++ b/packages/core/src/page/content-extractor.ts @@ -46,11 +46,13 @@ function getTurndown(): TurndownService { function htmlTableToMarkdown(table: HTMLTableElement): string { const rows: string[][] = []; - const tableRows = table.querySelectorAll('tr'); + // Turndown's DOM shim may return a non-iterable object from querySelectorAll, + // so wrap with Array.from to ensure iterability. + const tableRows = Array.from(table.querySelectorAll('tr') ?? []); for (const row of tableRows) { const cells: string[] = []; - for (const cell of row.querySelectorAll('th, td')) { + for (const cell of Array.from(row.querySelectorAll('th, td') ?? [])) { cells.push((cell.textContent ?? '').trim().replace(/\|/g, '\\|')); } if (cells.length > 0) { diff --git a/packages/core/src/page/snapshot-builder.ts b/packages/core/src/page/snapshot-builder.ts index 52ecb5f..4a1e927 100644 --- a/packages/core/src/page/snapshot-builder.ts +++ b/packages/core/src/page/snapshot-builder.ts @@ -63,20 +63,25 @@ export class SnapshotBuilder { }; } - const { nodes, layout, strings } = doc; + const { nodes, layout } = doc; + // In newer Chromium, `strings` is at the top level of the snapshot result, + // not nested inside each document. Fall back to doc.strings for older versions. + const strings: string[] = snapshot.strings ?? doc.strings ?? []; // Build backend node ID → AX node map const axNodeMap = new Map(); this.buildAXMap(axTree, axNodeMap); // Build layout index map + // In newer Chromium, `paintOrder` was renamed to `paintOrders` (plural). + const paintOrders = layout.paintOrder ?? (layout as unknown as { paintOrders?: number[] }).paintOrders; const layoutMap = new Map(); for (let i = 0; i < layout.nodeIndex.length; i++) { const nodeIdx = layout.nodeIndex[i]; layoutMap.set(nodeIdx, { bounds: layout.bounds[i], text: layout.text[i] !== -1 ? strings[layout.text[i]] : undefined, - paintOrder: layout.paintOrder?.[i], + paintOrder: paintOrders?.[i], }); } @@ -94,7 +99,28 @@ export class SnapshotBuilder { for (let i = 0; i < nodes.inputValue.index.length; i++) { const nodeIdx = nodes.inputValue.index[i]; const valueIdx = nodes.inputValue.value[i]; - inputValueMap.set(nodeIdx, strings[valueIdx]); + // Skip -1 values (no string) + if (valueIdx >= 0) { + inputValueMap.set(nodeIdx, strings[valueIdx]); + } + } + } + + // Build children map from parentIndex. + // In newer Chromium, `childNodeIndexes` no longer exists; instead, each node + // has a `parentIndex` entry pointing to its parent. We invert that to get children. + const childrenMap = new Map(); + if (nodes.parentIndex) { + for (let i = 0; i < nodes.parentIndex.length; i++) { + const parentIdx = nodes.parentIndex[i]; + if (parentIdx >= 0) { + let children = childrenMap.get(parentIdx); + if (!children) { + children = []; + childrenMap.set(parentIdx, children); + } + children.push(i); + } } } @@ -107,6 +133,7 @@ export class SnapshotBuilder { axNodeMap, clickableSet, inputValueMap, + childrenMap, viewportSize, capturedAttributes, ); @@ -122,6 +149,7 @@ export class SnapshotBuilder { axNodeMap: Map, clickableSet: Set, inputValueMap: Map, + childrenMap: Map, viewportSize: { width: number; height: number }, capturedAttributes: string[], ): PageTreeNode { @@ -199,8 +227,8 @@ export class SnapshotBuilder { node.highlightIndex = elementIndex(this.indexCounter++); } - // Build children - const childIndexes: number[] = nodes.childNodeIndexes?.[nodeIndex] ?? []; + // Build children using the pre-built childrenMap (derived from parentIndex) + const childIndexes = childrenMap.get(nodeIndex) ?? []; for (const childIdx of childIndexes) { const child = this.buildNodeTree( childIdx, @@ -210,6 +238,7 @@ export class SnapshotBuilder { axNodeMap, clickableSet, inputValueMap, + childrenMap, viewportSize, capturedAttributes, ); diff --git a/packages/core/src/page/types.ts b/packages/core/src/page/types.ts index 558d475..457b8f7 100644 --- a/packages/core/src/page/types.ts +++ b/packages/core/src/page/types.ts @@ -151,16 +151,17 @@ export interface CDPLayoutNode { } export interface CDPSnapshotResult { + // In newer Chromium (Playwright 1.58+), `strings` is at the top level + strings?: string[]; documents: Array<{ nodes: { nodeType: number[]; nodeName: number[]; nodeValue: number[]; backendNodeId: number[]; - childNodeIndexes?: number[][]; - attributes: Array; parentIndex: number[]; - contentDocumentIndex?: { index: number[] }; + attributes: Array; + contentDocumentIndex?: { index: number[]; value: number[] }; shadowRootType?: { index: number[]; value: number[] }; isClickable?: { index: number[] }; inputValue?: { index: number[]; value: number[] }; @@ -178,7 +179,8 @@ export interface CDPSnapshotResult { layoutIndex: number[]; bounds: number[][]; }; - strings: string[]; + // In older Chromium, `strings` was nested inside each document + strings?: string[]; }>; } diff --git a/packages/core/test-extract-debug.ts b/packages/core/test-extract-debug.ts new file mode 100644 index 0000000..ae507c4 --- /dev/null +++ b/packages/core/test-extract-debug.ts @@ -0,0 +1,41 @@ +import { chromium } from 'playwright'; +import { extractMarkdown } from './src/page/content-extractor.js'; + +async function main() { + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ viewport: { width: 1280, height: 720 } }); + const page = await context.newPage(); + + await page.goto('https://news.ycombinator.com', { waitUntil: 'networkidle' }); + + try { + console.log('Extracting markdown...'); + const markdown = await extractMarkdown(page); + console.log('Markdown length:', markdown.length); + console.log('First 500 chars:', markdown.slice(0, 500)); + } catch (error) { + console.error('extractMarkdown error:', error); + } + + // Now try invoking the model via the ContentExtractor + try { + const { createOpenAI } = await import('@ai-sdk/openai'); + const { VercelModelAdapter } = await import('./src/model/adapters/vercel.js'); + const { ContentExtractor } = await import('./src/commands/extraction/extractor.js'); + + const openai = createOpenAI({}); + const model = new VercelModelAdapter({ model: openai('gpt-4o') }); + const extractor = new ContentExtractor(model); + + console.log('\nExtracting with LLM...'); + const result = await extractor.extract(page, 'List the top 5 story titles'); + console.log('Result:', result); + } catch (error: any) { + console.error('ContentExtractor error:', error?.message ?? error); + console.error('Stack:', error?.stack); + } + + await browser.close(); +} + +main().catch(console.error); diff --git a/packages/core/test-snapshot-diag.ts b/packages/core/test-snapshot-diag.ts new file mode 100644 index 0000000..de9f584 --- /dev/null +++ b/packages/core/test-snapshot-diag.ts @@ -0,0 +1,71 @@ +import { chromium } from 'playwright'; +import { SnapshotBuilder } from './src/page/snapshot-builder.js'; + +async function main() { + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ viewport: { width: 1280, height: 720 } }); + const page = await context.newPage(); + const cdpSession = await context.newCDPSession(page); + + await page.goto('https://news.ycombinator.com', { waitUntil: 'networkidle' }); + + const builder = new SnapshotBuilder(); + const { domSnapshot, axTree } = await builder.captureSnapshot(cdpSession); + + console.log('=== Snapshot captured ==='); + console.log('Documents:', domSnapshot.documents.length); + console.log('Top-level strings:', domSnapshot.strings?.length ?? 'N/A'); + + const { root, indexCounter } = builder.buildTree( + domSnapshot, + axTree, + { width: 1280, height: 720 }, + ['title', 'type', 'name', 'role', 'tabindex', 'aria-label', 'placeholder', 'value', 'alt', 'aria-expanded'], + ); + + console.log('\n=== Tree built ==='); + console.log('Index counter (interactive elements):', indexCounter); + console.log('Root tag:', root.tagName); + console.log('Root children:', root.children.length); + + // Count total nodes and interactive nodes + let totalNodes = 0; + let interactiveNodes = 0; + let visibleNodes = 0; + const interactiveExamples: { index: number; tag: string; text?: string; ariaLabel?: string }[] = []; + + function walk(node: typeof root) { + totalNodes++; + if (node.isVisible) visibleNodes++; + if (node.highlightIndex !== undefined) { + interactiveNodes++; + if (interactiveExamples.length < 10) { + interactiveExamples.push({ + index: node.highlightIndex as number, + tag: node.tagName, + text: node.text?.slice(0, 60), + ariaLabel: node.ariaLabel?.slice(0, 60), + }); + } + } + for (const child of node.children) { + walk(child); + } + } + + walk(root); + + console.log('Total nodes in tree:', totalNodes); + console.log('Visible nodes:', visibleNodes); + console.log('Interactive nodes:', interactiveNodes); + + console.log('\n=== First 10 interactive elements ==='); + for (const ex of interactiveExamples) { + console.log(` [${ex.index}] <${ex.tag}> text="${ex.text ?? ''}" aria="${ex.ariaLabel ?? ''}" `); + } + + await browser.close(); + console.log('\nDone!'); +} + +main().catch(console.error); diff --git a/packages/core/test-snapshot.ts b/packages/core/test-snapshot.ts new file mode 100644 index 0000000..76fed66 --- /dev/null +++ b/packages/core/test-snapshot.ts @@ -0,0 +1,21 @@ +import { chromium } from 'playwright'; + +const browser = await chromium.launch({ headless: true }); +const page = await browser.newPage(); +await page.goto('https://news.ycombinator.com'); + +const cdp = await page.context().newCDPSession(page); +const result = await cdp.send('DOMSnapshot.captureSnapshot', { + computedStyles: ['display', 'visibility'], + includeDOMRects: true, + includePaintOrder: true, +}) as any; + +const doc = result.documents?.[0]; +console.log('Has documents:', !!result.documents, result.documents?.length); +console.log('Doc keys:', doc ? Object.keys(doc) : 'no doc'); +console.log('Has strings:', !!doc?.strings, 'length:', doc?.strings?.length); +console.log('Has nodes:', !!doc?.nodes); +console.log('nodes keys:', doc?.nodes ? Object.keys(doc.nodes) : 'none'); + +await browser.close(); diff --git a/packages/core/test-snapshot2.ts b/packages/core/test-snapshot2.ts new file mode 100644 index 0000000..aaa9fd6 --- /dev/null +++ b/packages/core/test-snapshot2.ts @@ -0,0 +1,20 @@ +import { chromium } from 'playwright'; + +const browser = await chromium.launch({ headless: true }); +const page = await browser.newPage(); +await page.goto('https://news.ycombinator.com'); + +const cdp = await page.context().newCDPSession(page); +const result = await cdp.send('DOMSnapshot.captureSnapshot', { + computedStyles: ['display', 'visibility'], + includeDOMRects: true, +}) as any; + +const doc = result.documents?.[0]; +// Check types of nodeName values +console.log('nodeName[0]:', doc.nodes.nodeName[0], typeof doc.nodes.nodeName[0]); +console.log('nodeName[1]:', doc.nodes.nodeName[1], typeof doc.nodes.nodeName[1]); +console.log('nodeValue[0]:', doc.nodes.nodeValue[0], typeof doc.nodes.nodeValue[0]); +console.log('layout.text[0]:', doc.layout.text[0], typeof doc.layout.text[0]); + +await browser.close(); diff --git a/packages/core/test-snapshot3.ts b/packages/core/test-snapshot3.ts new file mode 100644 index 0000000..d513483 --- /dev/null +++ b/packages/core/test-snapshot3.ts @@ -0,0 +1,22 @@ +import { chromium } from 'playwright'; + +const browser = await chromium.launch({ headless: true }); +const page = await browser.newPage(); +await page.goto('https://news.ycombinator.com'); + +const cdp = await page.context().newCDPSession(page); +const result = await cdp.send('DOMSnapshot.captureSnapshot', { + computedStyles: ['display', 'visibility'], + includeDOMRects: true, +}) as any; + +// Check top-level keys of result +console.log('Top-level keys:', Object.keys(result)); +// Maybe strings is at top level, not inside documents +if (result.strings) { + console.log('strings at top level, length:', result.strings.length); + console.log('strings[4]:', result.strings[4]); + console.log('strings[5]:', result.strings[5]); +} + +await browser.close(); From 44276fa355bd0059813d3d1e62964ed8277e2fd9 Mon Sep 17 00:00:00 2001 From: Forge Date: Fri, 13 Mar 2026 15:50:54 -0700 Subject: [PATCH 2/2] feat: add ollama + openrouter providers, fix agent done behavior - Add ollama provider (local, free, OLLAMA_BASE_URL configurable) - Add openrouter provider (OPENROUTER_API_KEY, access free models) - Install ollama-ai-provider@1.2.0 - Patch instructions.md: agent now calls done immediately on read/extract tasks once it has the answer, no more wandering - qwen2.5:7b pull initiated for local use --- bun.lock | 5 ++++ packages/cli/package.json | 5 ++-- packages/cli/src/commands/run.ts | 24 ++++++++++++++++++- .../src/agent/instructions/instructions.md | 2 ++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/bun.lock b/bun.lock index 9aa29b8..30caa79 100644 --- a/bun.lock +++ b/bun.lock @@ -19,6 +19,7 @@ "dependencies": { "chalk": "^5.4.0", "commander": "^12.1.0", + "ollama-ai-provider": "^1.2.0", "open-browser": "workspace:*", }, }, @@ -131,8 +132,12 @@ "nanoid": ["nanoid@5.1.6", "", { "bin": { "nanoid": "bin/nanoid.js" } }, "sha512-c7+7RQ+dMB5dPwwCp4ee1/iV/q2P6aK1mTZcfr1BTuVlyW9hJYiMPybJCcnBlQtuSmTIWNeazm/zqNoZSSElBg=="], + "ollama-ai-provider": ["ollama-ai-provider@1.2.0", "", { "dependencies": { "@ai-sdk/provider": "^1.0.0", "@ai-sdk/provider-utils": "^2.0.0", "partial-json": "0.1.7" }, "peerDependencies": { "zod": "^3.0.0" }, "optionalPeers": ["zod"] }, "sha512-jTNFruwe3O/ruJeppI/quoOUxG7NA6blG3ZyQj3lei4+NnJo7bi3eIRWqlVpRlu/mbzbFXeJSBuYQWF6pzGKww=="], + "open-browser": ["open-browser@workspace:packages/core"], + "partial-json": ["partial-json@0.1.7", "", {}, "sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA=="], + "playwright": ["playwright@1.58.2", "", { "dependencies": { "playwright-core": "1.58.2" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A=="], "playwright-core": ["playwright-core@1.58.2", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg=="], diff --git a/packages/cli/package.json b/packages/cli/package.json index 4f3404a..351a19e 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -13,9 +13,10 @@ "start": "bun run src/index.ts" }, "dependencies": { - "open-browser": "workspace:*", + "chalk": "^5.4.0", "commander": "^12.1.0", - "chalk": "^5.4.0" + "ollama-ai-provider": "^1.2.0", + "open-browser": "workspace:*" }, "license": "MIT" } diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts index e4aa0e3..2a7105e 100644 --- a/packages/cli/src/commands/run.ts +++ b/packages/cli/src/commands/run.ts @@ -54,10 +54,32 @@ async function createModel(provider: string, modelId: string): Promise