diff --git a/.changeset/browser-commander-integration.md b/.changeset/browser-commander-integration.md new file mode 100644 index 0000000..0d693c2 --- /dev/null +++ b/.changeset/browser-commander-integration.md @@ -0,0 +1,10 @@ +--- +'@link-assistant/web-capture': minor +--- + +Fully integrate browser-commander library for all browser operations + +- Use browser-commander's launchBrowser for both Puppeteer and Playwright +- Pass server-specific args (--no-sandbox, etc.) via the args option +- Configure headless mode and unique userDataDir for server environments +- Update browser-commander dependency from ^0.3.0 to ^0.4.0 diff --git a/package-lock.json b/package-lock.json index 108a50d..531d8d8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.1.3", "license": "UNLICENSED", "dependencies": { + "browser-commander": "^0.4.0", "cheerio": "^1.0.0", "express": "^4.18.2", "lino-arguments": "^0.2.1", @@ -3907,6 +3908,30 @@ "node": ">=8" } }, + "node_modules/browser-commander": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/browser-commander/-/browser-commander-0.4.0.tgz", + "integrity": "sha512-pEC3LqCySJqqxlwobPIyvrjlWFvkO6HEadhvgD1ZM0VVLuO0uYomICRibpZ3VT9RT85XG/suohO6pNrzMs3d5w==", + "license": "UNLICENSE", + "dependencies": { + "log-lazy": "^1.0.4" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "playwright": ">=1.40.0", + "puppeteer": ">=21.0.0" + }, + "peerDependenciesMeta": { + "playwright": { + "optional": true + }, + "puppeteer": { + "optional": true + } + } + }, "node_modules/browserslist": { "version": "4.24.5", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.5.tgz", @@ -7551,6 +7576,17 @@ "dev": true, "license": "MIT" }, + "node_modules/log-lazy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/log-lazy/-/log-lazy-1.0.4.tgz", + "integrity": "sha512-UHTdwIjZymnYIaNA5l6mW7YEQpJYXGhWsRwpblG+BpA8oThnsCj69JPy+CDRo6T2oi39u5TChlcqCfk+Qh6mHg==", + "license": "Unlicense", + "engines": { + "bun": ">=1.0.0", + "deno": ">=2.0.0", + "node": ">=20.0.0" + } + }, "node_modules/log-update": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/log-update/-/log-update-6.1.0.tgz", diff --git a/package.json b/package.json index 14077dd..5e438e8 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "changeset:status": "changeset status --since=origin/main" }, "dependencies": { + "browser-commander": "^0.4.0", "cheerio": "^1.0.0", "express": "^4.18.2", "lino-arguments": "^0.2.1", diff --git a/src/browser.js b/src/browser.js index fecbaea..3d0cb8d 100644 --- a/src/browser.js +++ b/src/browser.js @@ -1,6 +1,19 @@ -// Browser abstraction layer to support both Puppeteer and Playwright -import puppeteer from 'puppeteer'; -import playwright from 'playwright'; +// Browser abstraction layer using browser-commander for all browser operations +// See: https://github.com/link-foundation/browser-commander + +import { launchBrowser } from 'browser-commander'; +import os from 'os'; +import path from 'path'; + +/** + * Additional Chrome args needed for headless server environments + * These are appended to browser-commander's default CHROME_ARGS + */ +const SERVER_CHROME_ARGS = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', +]; /** * Unified browser interface that works with both Puppeteer and Playwright @@ -26,78 +39,52 @@ import playwright from 'playwright'; /** * Create a browser instance using the specified engine + * Uses browser-commander's launchBrowser for both Puppeteer and Playwright * @param {string} engine - 'puppeteer' or 'playwright' (defaults to puppeteer) * @param {Object} options - Browser launch options * @returns {Promise} */ export async function createBrowser(engine = 'puppeteer', options = {}) { const normalizedEngine = engine.toLowerCase(); + const engineType = + normalizedEngine === 'playwright' || normalizedEngine === 'pw' + ? 'playwright' + : 'puppeteer'; - if (normalizedEngine === 'playwright') { - return createPlaywrightBrowser(options); - } else { - return createPuppeteerBrowser(options); - } -} + // Generate unique userDataDir for this session to avoid conflicts + const userDataDir = path.join( + os.tmpdir(), + `web-capture-${engineType}-${Date.now()}` + ); -/** - * Create a Puppeteer browser instance - * @param {Object} options - Puppeteer launch options - * @returns {Promise} - */ -async function createPuppeteerBrowser(options = {}) { - const defaultOptions = { - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - ], - }; - - const browser = await puppeteer.launch({ ...defaultOptions, ...options }); - - return { - async newPage() { - const page = await browser.newPage(); - return createPuppeteerPageAdapter(page); - }, - async close() { - await browser.close(); - }, - type: 'puppeteer', - _browser: browser, - }; -} - -/** - * Create a Playwright browser instance - * @param {Object} options - Playwright launch options - * @returns {Promise} - */ -async function createPlaywrightBrowser(options = {}) { - const defaultOptions = { - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - ], - }; - - // Playwright uses chromium by default - const browser = await playwright.chromium.launch({ - ...defaultOptions, + // Use browser-commander's launchBrowser with server-specific args + // Default to headless for server environments + const { browser, page } = await launchBrowser({ + engine: engineType, + args: SERVER_CHROME_ARGS, + headless: true, + userDataDir, + slowMo: 0, // Disable slowMo for server operations ...options, }); + // Close the initial page since we'll create new ones via newPage() + await page.close(); + + const pageAdapter = + engineType === 'playwright' + ? createPlaywrightPageAdapter + : createPuppeteerPageAdapter; + return { async newPage() { - const page = await browser.newPage(); - return createPlaywrightPageAdapter(page); + const newPage = await browser.newPage(); + return pageAdapter(newPage); }, async close() { await browser.close(); }, - type: 'playwright', + type: engineType, _browser: browser, }; } diff --git a/yarn.lock b/yarn.lock index 4690eec..59201a7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2022,6 +2022,13 @@ braces@^3.0.3: dependencies: fill-range "^7.1.1" +browser-commander@^0.4.0: + version "0.4.0" + resolved "https://registry.npmjs.org/browser-commander/-/browser-commander-0.4.0.tgz" + integrity sha512-pEC3LqCySJqqxlwobPIyvrjlWFvkO6HEadhvgD1ZM0VVLuO0uYomICRibpZ3VT9RT85XG/suohO6pNrzMs3d5w== + dependencies: + log-lazy "^1.0.4" + browserslist@^4.24.0, browserslist@^4.24.4, "browserslist@>= 4.21.0": version "4.24.5" resolved "https://registry.npmjs.org/browserslist/-/browserslist-4.24.5.tgz" @@ -4078,6 +4085,11 @@ lodash.startcase@^4.4.0: resolved "https://registry.npmjs.org/lodash.startcase/-/lodash.startcase-4.4.0.tgz" integrity sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg== +log-lazy@^1.0.4: + version "1.0.4" + resolved "https://registry.npmjs.org/log-lazy/-/log-lazy-1.0.4.tgz" + integrity sha512-UHTdwIjZymnYIaNA5l6mW7YEQpJYXGhWsRwpblG+BpA8oThnsCj69JPy+CDRo6T2oi39u5TChlcqCfk+Qh6mHg== + log-update@^6.1.0: version "6.1.0" resolved "https://registry.npmjs.org/log-update/-/log-update-6.1.0.tgz" @@ -4537,7 +4549,7 @@ playwright-core@1.56.1: resolved "https://registry.npmjs.org/playwright-core/-/playwright-core-1.56.1.tgz" integrity sha512-hutraynyn31F+Bifme+Ps9Vq59hKuUCz7H1kDOcBs+2oGguKkWTU50bBWrtz34OUWmIwpBTWDxaRPXrIXkgvmQ== -playwright@^1.49.0: +playwright@^1.49.0, playwright@>=1.40.0: version "1.56.1" resolved "https://registry.npmjs.org/playwright/-/playwright-1.56.1.tgz" integrity sha512-aFi5B0WovBHTEvpM3DzXTUaeN6eN0qWnTkKx4NQaH4Wvcmc153PdaY2UBdSYKaGYw+UyWXSVyxDUg5DoPEttjw== @@ -4757,7 +4769,7 @@ puppeteer-core@24.8.2: typed-query-selector "^2.12.0" ws "^8.18.2" -puppeteer@^24.8.2: +puppeteer@^24.8.2, puppeteer@>=21.0.0: version "24.8.2" resolved "https://registry.npmjs.org/puppeteer/-/puppeteer-24.8.2.tgz" integrity sha512-Sn6SBPwJ6ASFvQ7knQkR+yG7pcmr4LfXzmoVp3NR0xXyBbPhJa8a8ybtb6fnw1g/DD/2t34//yirubVczko37w==