From 04a40757a0390f7d9aa2cfacd2c1134f3229eb9c Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 28 Dec 2025 05:37:07 +0100 Subject: [PATCH 1/7] Initial commit with task details Adding CLAUDE.md with task information for AI processing. This file will be removed when the task is complete. Issue: https://github.com/link-assistant/web-capture/issues/24 --- CLAUDE.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..330e2b0 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,5 @@ +Issue to solve: https://github.com/link-assistant/web-capture/issues/24 +Your prepared branch: issue-24-000cedc18c0c +Your prepared working directory: /tmp/gh-issue-solver-1766896626671 + +Proceed. \ No newline at end of file From 303f5601e0b23ed3df30b1fb88c0689563002eb5 Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 28 Dec 2025 05:43:49 +0100 Subject: [PATCH 2/7] feat: integrate browser-commander for browser operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use browser-commander library utilities (CHROME_ARGS) for browser operations. Currently using browser-commander's CHROME_ARGS constants combined with server-specific args until browser-commander adds support for custom args in headless server environments. - Add browser-commander as a dependency - Import and use CHROME_ARGS from browser-commander - Maintain server-specific args (--no-sandbox, etc.) separately - Reference browser-commander issue #11 for future improvements Relates to #24 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- package-lock.json | 36 ++++++++++++++++++++++++++++++++++++ package.json | 1 + src/browser.js | 41 ++++++++++++++++++++++++++++------------- yarn.lock | 16 ++++++++++++++-- 4 files changed, 79 insertions(+), 15 deletions(-) diff --git a/package-lock.json b/package-lock.json index 108a50d..8748811 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.1.3", "license": "UNLICENSED", "dependencies": { + "browser-commander": "^0.3.0", "cheerio": "^1.0.0", "express": "^4.18.2", "lino-arguments": "^0.2.1", @@ -3907,6 +3908,30 @@ "node": ">=8" } }, + "node_modules/browser-commander": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/browser-commander/-/browser-commander-0.3.0.tgz", + "integrity": "sha512-/1DfX8HsspBkKbQE+cjbt/Dzr/No50CZQ7IjgDk6pmq1M/SATY5cfLyImhkTEvSL7z898ios8qAdLpbEG2HFBQ==", + "license": "UNLICENSE", + "dependencies": { + "log-lazy": "^1.0.4" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "playwright": ">=1.40.0", + "puppeteer": ">=21.0.0" + }, + "peerDependenciesMeta": { + "playwright": { + "optional": true + }, + "puppeteer": { + "optional": true + } + } + }, "node_modules/browserslist": { "version": "4.24.5", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.5.tgz", @@ -7551,6 +7576,17 @@ "dev": true, "license": "MIT" }, + "node_modules/log-lazy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/log-lazy/-/log-lazy-1.0.4.tgz", + "integrity": "sha512-UHTdwIjZymnYIaNA5l6mW7YEQpJYXGhWsRwpblG+BpA8oThnsCj69JPy+CDRo6T2oi39u5TChlcqCfk+Qh6mHg==", + "license": "Unlicense", + "engines": { + "bun": ">=1.0.0", + "deno": ">=2.0.0", + "node": ">=20.0.0" + } + }, "node_modules/log-update": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/log-update/-/log-update-6.1.0.tgz", diff --git a/package.json b/package.json index 14077dd..0152c02 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "changeset:status": "changeset status --since=origin/main" }, "dependencies": { + "browser-commander": "^0.3.0", "cheerio": "^1.0.0", "express": "^4.18.2", "lino-arguments": "^0.2.1", diff --git a/src/browser.js b/src/browser.js index fecbaea..f8107a1 100644 --- a/src/browser.js +++ b/src/browser.js @@ -1,7 +1,29 @@ -// Browser abstraction layer to support both Puppeteer and Playwright +// Browser abstraction layer using browser-commander utilities +// Note: browser-commander's launchBrowser currently doesn't support custom args +// for headless server environments (--no-sandbox, etc). We use browser-commander's +// utilities (CHROME_ARGS) and maintain browser launch code here until this +// feature is added to browser-commander. +// See: https://github.com/link-foundation/browser-commander/issues/11 + +import { CHROME_ARGS } from 'browser-commander'; import puppeteer from 'puppeteer'; import playwright from 'playwright'; +/** + * Additional Chrome args needed for headless server environments + * These are not included in browser-commander's CHROME_ARGS yet + */ +const SERVER_CHROME_ARGS = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', +]; + +/** + * Combined Chrome args: browser-commander defaults + server-specific args + */ +const ALL_CHROME_ARGS = [...CHROME_ARGS, ...SERVER_CHROME_ARGS]; + /** * Unified browser interface that works with both Puppeteer and Playwright * @typedef {Object} BrowserAdapter @@ -34,9 +56,9 @@ export async function createBrowser(engine = 'puppeteer', options = {}) { const normalizedEngine = engine.toLowerCase(); if (normalizedEngine === 'playwright') { - return createPlaywrightBrowser(options); + return await createPlaywrightBrowser(options); } else { - return createPuppeteerBrowser(options); + return await createPuppeteerBrowser(options); } } @@ -47,11 +69,7 @@ export async function createBrowser(engine = 'puppeteer', options = {}) { */ async function createPuppeteerBrowser(options = {}) { const defaultOptions = { - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - ], + args: ALL_CHROME_ARGS, }; const browser = await puppeteer.launch({ ...defaultOptions, ...options }); @@ -76,11 +94,7 @@ async function createPuppeteerBrowser(options = {}) { */ async function createPlaywrightBrowser(options = {}) { const defaultOptions = { - args: [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage', - ], + args: ALL_CHROME_ARGS, }; // Playwright uses chromium by default @@ -177,6 +191,7 @@ function createPlaywrightPageAdapter(page) { /** * Get the browser engine from query parameters or environment variable + * Uses browser-commander's detectEngine when available, with fallback logic * @param {Object} req - Express request object * @returns {string} - 'puppeteer' or 'playwright' */ diff --git a/yarn.lock b/yarn.lock index 4690eec..023ceb8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2022,6 +2022,13 @@ braces@^3.0.3: dependencies: fill-range "^7.1.1" +browser-commander@^0.3.0: + version "0.3.0" + resolved "https://registry.npmjs.org/browser-commander/-/browser-commander-0.3.0.tgz" + integrity sha512-/1DfX8HsspBkKbQE+cjbt/Dzr/No50CZQ7IjgDk6pmq1M/SATY5cfLyImhkTEvSL7z898ios8qAdLpbEG2HFBQ== + dependencies: + log-lazy "^1.0.4" + browserslist@^4.24.0, browserslist@^4.24.4, "browserslist@>= 4.21.0": version "4.24.5" resolved "https://registry.npmjs.org/browserslist/-/browserslist-4.24.5.tgz" @@ -4078,6 +4085,11 @@ lodash.startcase@^4.4.0: resolved "https://registry.npmjs.org/lodash.startcase/-/lodash.startcase-4.4.0.tgz" integrity sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg== +log-lazy@^1.0.4: + version "1.0.4" + resolved "https://registry.npmjs.org/log-lazy/-/log-lazy-1.0.4.tgz" + integrity sha512-UHTdwIjZymnYIaNA5l6mW7YEQpJYXGhWsRwpblG+BpA8oThnsCj69JPy+CDRo6T2oi39u5TChlcqCfk+Qh6mHg== + log-update@^6.1.0: version "6.1.0" resolved "https://registry.npmjs.org/log-update/-/log-update-6.1.0.tgz" @@ -4537,7 +4549,7 @@ playwright-core@1.56.1: resolved "https://registry.npmjs.org/playwright-core/-/playwright-core-1.56.1.tgz" integrity sha512-hutraynyn31F+Bifme+Ps9Vq59hKuUCz7H1kDOcBs+2oGguKkWTU50bBWrtz34OUWmIwpBTWDxaRPXrIXkgvmQ== -playwright@^1.49.0: +playwright@^1.49.0, playwright@>=1.40.0: version "1.56.1" resolved "https://registry.npmjs.org/playwright/-/playwright-1.56.1.tgz" integrity sha512-aFi5B0WovBHTEvpM3DzXTUaeN6eN0qWnTkKx4NQaH4Wvcmc153PdaY2UBdSYKaGYw+UyWXSVyxDUg5DoPEttjw== @@ -4757,7 +4769,7 @@ puppeteer-core@24.8.2: typed-query-selector "^2.12.0" ws "^8.18.2" -puppeteer@^24.8.2: +puppeteer@^24.8.2, puppeteer@>=21.0.0: version "24.8.2" resolved "https://registry.npmjs.org/puppeteer/-/puppeteer-24.8.2.tgz" integrity sha512-Sn6SBPwJ6ASFvQ7knQkR+yG7pcmr4LfXzmoVp3NR0xXyBbPhJa8a8ybtb6fnw1g/DD/2t34//yirubVczko37w== From 181698035dc5d38b011c39b8bb1ed28ab605287e Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 28 Dec 2025 05:44:59 +0100 Subject: [PATCH 3/7] chore: add changeset for browser-commander integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/browser-commander-integration.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .changeset/browser-commander-integration.md diff --git a/.changeset/browser-commander-integration.md b/.changeset/browser-commander-integration.md new file mode 100644 index 0000000..b6ba558 --- /dev/null +++ b/.changeset/browser-commander-integration.md @@ -0,0 +1,9 @@ +--- +"@link-assistant/web-capture": minor +--- + +Integrate browser-commander library for browser operations + +- Add browser-commander as a dependency +- Use CHROME_ARGS from browser-commander for consistent Chrome launch arguments +- Combine with server-specific args for headless environments From 0d9b464cf43cec8f3bd0f2ea39c26740de8c7f10 Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 28 Dec 2025 05:47:46 +0100 Subject: [PATCH 4/7] style: fix changeset formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/browser-commander-integration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/browser-commander-integration.md b/.changeset/browser-commander-integration.md index b6ba558..97beafa 100644 --- a/.changeset/browser-commander-integration.md +++ b/.changeset/browser-commander-integration.md @@ -1,5 +1,5 @@ --- -"@link-assistant/web-capture": minor +'@link-assistant/web-capture': minor --- Integrate browser-commander library for browser operations From 041a4ce889b9f64e427a74aca7f02c89f3c9a7c3 Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 28 Dec 2025 05:51:15 +0100 Subject: [PATCH 5/7] Revert "Initial commit with task details" This reverts commit 04a40757a0390f7d9aa2cfacd2c1134f3229eb9c. --- CLAUDE.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 330e2b0..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,5 +0,0 @@ -Issue to solve: https://github.com/link-assistant/web-capture/issues/24 -Your prepared branch: issue-24-000cedc18c0c -Your prepared working directory: /tmp/gh-issue-solver-1766896626671 - -Proceed. \ No newline at end of file From d091d993b4bbf70da5132ce3820cacb8ceb00198 Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 28 Dec 2025 06:22:03 +0100 Subject: [PATCH 6/7] feat: use browser-commander launchBrowser for all browser operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that browser-commander v0.4.0 supports custom Chrome args via the `args` option, we can use its `launchBrowser` function directly instead of maintaining our own browser launch code. Changes: - Update browser-commander dependency from ^0.3.0 to ^0.4.0 - Use launchBrowser from browser-commander for both Puppeteer and Playwright - Pass server-specific args (--no-sandbox, etc.) via the args option - Configure headless: true for server environments - Use unique userDataDir per session to avoid conflicts This fully integrates browser-commander as requested in issue #24. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- package-lock.json | 8 ++-- package.json | 2 +- src/browser.js | 102 +++++++++++++++++----------------------------- yarn.lock | 8 ++-- 4 files changed, 46 insertions(+), 74 deletions(-) diff --git a/package-lock.json b/package-lock.json index 8748811..531d8d8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "1.1.3", "license": "UNLICENSED", "dependencies": { - "browser-commander": "^0.3.0", + "browser-commander": "^0.4.0", "cheerio": "^1.0.0", "express": "^4.18.2", "lino-arguments": "^0.2.1", @@ -3909,9 +3909,9 @@ } }, "node_modules/browser-commander": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/browser-commander/-/browser-commander-0.3.0.tgz", - "integrity": "sha512-/1DfX8HsspBkKbQE+cjbt/Dzr/No50CZQ7IjgDk6pmq1M/SATY5cfLyImhkTEvSL7z898ios8qAdLpbEG2HFBQ==", + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/browser-commander/-/browser-commander-0.4.0.tgz", + "integrity": "sha512-pEC3LqCySJqqxlwobPIyvrjlWFvkO6HEadhvgD1ZM0VVLuO0uYomICRibpZ3VT9RT85XG/suohO6pNrzMs3d5w==", "license": "UNLICENSE", "dependencies": { "log-lazy": "^1.0.4" diff --git a/package.json b/package.json index 0152c02..5e438e8 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,7 @@ "changeset:status": "changeset status --since=origin/main" }, "dependencies": { - "browser-commander": "^0.3.0", + "browser-commander": "^0.4.0", "cheerio": "^1.0.0", "express": "^4.18.2", "lino-arguments": "^0.2.1", diff --git a/src/browser.js b/src/browser.js index f8107a1..3d0cb8d 100644 --- a/src/browser.js +++ b/src/browser.js @@ -1,17 +1,13 @@ -// Browser abstraction layer using browser-commander utilities -// Note: browser-commander's launchBrowser currently doesn't support custom args -// for headless server environments (--no-sandbox, etc). We use browser-commander's -// utilities (CHROME_ARGS) and maintain browser launch code here until this -// feature is added to browser-commander. -// See: https://github.com/link-foundation/browser-commander/issues/11 +// Browser abstraction layer using browser-commander for all browser operations +// See: https://github.com/link-foundation/browser-commander -import { CHROME_ARGS } from 'browser-commander'; -import puppeteer from 'puppeteer'; -import playwright from 'playwright'; +import { launchBrowser } from 'browser-commander'; +import os from 'os'; +import path from 'path'; /** * Additional Chrome args needed for headless server environments - * These are not included in browser-commander's CHROME_ARGS yet + * These are appended to browser-commander's default CHROME_ARGS */ const SERVER_CHROME_ARGS = [ '--no-sandbox', @@ -19,11 +15,6 @@ const SERVER_CHROME_ARGS = [ '--disable-dev-shm-usage', ]; -/** - * Combined Chrome args: browser-commander defaults + server-specific args - */ -const ALL_CHROME_ARGS = [...CHROME_ARGS, ...SERVER_CHROME_ARGS]; - /** * Unified browser interface that works with both Puppeteer and Playwright * @typedef {Object} BrowserAdapter @@ -48,70 +39,52 @@ const ALL_CHROME_ARGS = [...CHROME_ARGS, ...SERVER_CHROME_ARGS]; /** * Create a browser instance using the specified engine + * Uses browser-commander's launchBrowser for both Puppeteer and Playwright * @param {string} engine - 'puppeteer' or 'playwright' (defaults to puppeteer) * @param {Object} options - Browser launch options * @returns {Promise} */ export async function createBrowser(engine = 'puppeteer', options = {}) { const normalizedEngine = engine.toLowerCase(); - - if (normalizedEngine === 'playwright') { - return await createPlaywrightBrowser(options); - } else { - return await createPuppeteerBrowser(options); - } -} - -/** - * Create a Puppeteer browser instance - * @param {Object} options - Puppeteer launch options - * @returns {Promise} - */ -async function createPuppeteerBrowser(options = {}) { - const defaultOptions = { - args: ALL_CHROME_ARGS, - }; - - const browser = await puppeteer.launch({ ...defaultOptions, ...options }); - - return { - async newPage() { - const page = await browser.newPage(); - return createPuppeteerPageAdapter(page); - }, - async close() { - await browser.close(); - }, - type: 'puppeteer', - _browser: browser, - }; -} - -/** - * Create a Playwright browser instance - * @param {Object} options - Playwright launch options - * @returns {Promise} - */ -async function createPlaywrightBrowser(options = {}) { - const defaultOptions = { - args: ALL_CHROME_ARGS, - }; - - // Playwright uses chromium by default - const browser = await playwright.chromium.launch({ - ...defaultOptions, + const engineType = + normalizedEngine === 'playwright' || normalizedEngine === 'pw' + ? 'playwright' + : 'puppeteer'; + + // Generate unique userDataDir for this session to avoid conflicts + const userDataDir = path.join( + os.tmpdir(), + `web-capture-${engineType}-${Date.now()}` + ); + + // Use browser-commander's launchBrowser with server-specific args + // Default to headless for server environments + const { browser, page } = await launchBrowser({ + engine: engineType, + args: SERVER_CHROME_ARGS, + headless: true, + userDataDir, + slowMo: 0, // Disable slowMo for server operations ...options, }); + // Close the initial page since we'll create new ones via newPage() + await page.close(); + + const pageAdapter = + engineType === 'playwright' + ? createPlaywrightPageAdapter + : createPuppeteerPageAdapter; + return { async newPage() { - const page = await browser.newPage(); - return createPlaywrightPageAdapter(page); + const newPage = await browser.newPage(); + return pageAdapter(newPage); }, async close() { await browser.close(); }, - type: 'playwright', + type: engineType, _browser: browser, }; } @@ -191,7 +164,6 @@ function createPlaywrightPageAdapter(page) { /** * Get the browser engine from query parameters or environment variable - * Uses browser-commander's detectEngine when available, with fallback logic * @param {Object} req - Express request object * @returns {string} - 'puppeteer' or 'playwright' */ diff --git a/yarn.lock b/yarn.lock index 023ceb8..59201a7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2022,10 +2022,10 @@ braces@^3.0.3: dependencies: fill-range "^7.1.1" -browser-commander@^0.3.0: - version "0.3.0" - resolved "https://registry.npmjs.org/browser-commander/-/browser-commander-0.3.0.tgz" - integrity sha512-/1DfX8HsspBkKbQE+cjbt/Dzr/No50CZQ7IjgDk6pmq1M/SATY5cfLyImhkTEvSL7z898ios8qAdLpbEG2HFBQ== +browser-commander@^0.4.0: + version "0.4.0" + resolved "https://registry.npmjs.org/browser-commander/-/browser-commander-0.4.0.tgz" + integrity sha512-pEC3LqCySJqqxlwobPIyvrjlWFvkO6HEadhvgD1ZM0VVLuO0uYomICRibpZ3VT9RT85XG/suohO6pNrzMs3d5w== dependencies: log-lazy "^1.0.4" From 5448fab48e6f1bf980987804c455168b06e58e25 Mon Sep 17 00:00:00 2001 From: konard Date: Sun, 28 Dec 2025 06:22:56 +0100 Subject: [PATCH 7/7] chore: update changeset description for full integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/browser-commander-integration.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.changeset/browser-commander-integration.md b/.changeset/browser-commander-integration.md index 97beafa..0d693c2 100644 --- a/.changeset/browser-commander-integration.md +++ b/.changeset/browser-commander-integration.md @@ -2,8 +2,9 @@ '@link-assistant/web-capture': minor --- -Integrate browser-commander library for browser operations +Fully integrate browser-commander library for all browser operations -- Add browser-commander as a dependency -- Use CHROME_ARGS from browser-commander for consistent Chrome launch arguments -- Combine with server-specific args for headless environments +- Use browser-commander's launchBrowser for both Puppeteer and Playwright +- Pass server-specific args (--no-sandbox, etc.) via the args option +- Configure headless mode and unique userDataDir for server environments +- Update browser-commander dependency from ^0.3.0 to ^0.4.0