From 0c986f4fe21e3b726c3c2702d0b7da7db5ec8881 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Thu, 22 Jan 2026 19:24:31 +0000 Subject: [PATCH 1/6] add skill for typescript --- .claude-plugin/marketplace.json | 8 ++ README.md | 32 ++++---- .../kernel-sdks/.claude-plugin/plugin.json | 11 +++ .../skills/typescript-sdk/SKILL.md | 75 +++++++++++++++++++ 4 files changed, 111 insertions(+), 15 deletions(-) create mode 100644 plugins/kernel-sdks/.claude-plugin/plugin.json create mode 100644 plugins/kernel-sdks/skills/typescript-sdk/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index b7fde54..8bd3506 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -14,6 +14,14 @@ }, "description": "CLI commands for managing browsers, deploying apps, and controlling browser instances. Use when working with the kernel command-line tool.", "source": "./plugins/kernel-cli" + }, + { + "name": "kernel-sdks", + "author": { + "name": "Kernel" + }, + "description": "TypeScript and Python SDK skills for building browser automation with Kernel's Typescript and Python SDKs. Use when writing code to control browsers programmatically.", + "source": "./plugins/kernel-sdks" } ] } diff --git a/README.md b/README.md index c4fad8f..8ec44c0 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,9 @@ Official AI agent skills from the Kernel for installing useful skills for our CL # Install the CLI skill /plugin install kernel-cli + +# Install the SDK skills (TypeScript & Python) +/plugin install kernel-sdks ``` ### Manual Installation @@ -19,6 +22,7 @@ Official AI agent skills from the Kernel for installing useful skills for our CL ```bash git clone https://github.com/kernel/skills.git cp -r skills/plugins/kernel-cli ~/.claude/skills/ +cp -r skills/plugins/kernel-sdks ~/.claude/skills/ ``` ## Usage Examples @@ -34,26 +38,15 @@ Before using these skills, ensure you have: 2. **Authenticated with Kernel**: ```bash + export KERNEL_API_KEY= or kernel login ``` -Once installed, your coding agent will automatically know how to use Kernel. Try prompts like: - -### CLI Usage - -> "Spin up a browser and take a screenshot of kernel.sh" - -Your agent will respond with: - -```bash -kernel browsers create -o json -# Extract session_id from output -kernel browsers computer screenshot --to screenshot.png -``` +## Available Skills -## Skill Structure +### kernel-cli -The kernel-cli skill is organized into focused sub-skills: +Command-line interface skills for using Kernel CLI commands. | Skill | Description | |-------|-------------| @@ -74,6 +67,15 @@ The kernel-cli skill is organized into focused sub-skills: Each sub-skill is loaded contextually based on your prompts, minimizing token usage while providing comprehensive Kernel knowledge. +### kernel-sdks + +SDK skills for building browser automation with TypeScript and Python. + +| Skill | Description | +|-------|-------------| +| **typescript-sdk** | Build automation with Kernel's Typescript SDK | +| **python-sdk** | Build automation with kernel's Python SDK | + ## Documentation - [Kernel Documentation](https://www.kernel.sh/docs) diff --git a/plugins/kernel-sdks/.claude-plugin/plugin.json b/plugins/kernel-sdks/.claude-plugin/plugin.json new file mode 100644 index 0000000..78fba7d --- /dev/null +++ b/plugins/kernel-sdks/.claude-plugin/plugin.json @@ -0,0 +1,11 @@ +{ + "name": "kernel-sdks", + "version": "1.0.0", + "description": "TypeScript and Python SDK skills for building browser automation with Kernel's Typescript and Python SDKs", + "author": { + "name": "Kernel", + "url": "www.kernel.sh" + }, + "repository": "https://github.com/kernel/skills", + "license": "MIT" +} diff --git a/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md new file mode 100644 index 0000000..a883632 --- /dev/null +++ b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md @@ -0,0 +1,75 @@ +--- +name: kernel-typescript-sdk +description: Build browser automation scripts using the Kernel TypeScript SDK with Playwright, CDP, and remote browser management. +--- + +## When to Use This Skill + +Use the Kernel TypeScript SDK when you need to: + +- **Build browser automation scripts** - Create TypeScript programs that control remote browsers +- **Execute server-side automation** - Run Playwright code directly in the browser VM without local dependencies +- **Manage browser sessions programmatically** - Create, configure, and control browsers from code +- **Build scalable scraping/testing tools** - Use browser pools and profiles for high-volume automation +- **Deploy automation as actions** - Package scripts as Kernel actions for invocation via API + +**When NOT to use:** +- For CLI commands (i.e. kernel browsers create), use the `kernel-cli` skill instead +- For quick one-off tasks, the CLI may be simpler than writing code + +## Core Concepts + +### SDK Architecture + +The SDK is organized into resource-based modules: + +- `kernel.browsers` - Browser session management (create, list, delete) +- `kernel.browsers.playwright` - Server-side Playwright execution +- `kernel.browsers.computer` - OS-level controls (mouse, keyboard, screenshots) +- `kernel.browserPools` - Pre-warmed browser pool management +- `kernel.profiles` - Persistent browser profiles (auth state) +- `kernel.proxies` - Proxy configuration +- `kernel.extensions` - Chrome extension management +- `kernel.deployments` - App deployment +- `kernel.invocations` - Action invocation + +### Two Automation Approaches + +**1. Server-side Execution (RECOMMENDED)** +- Execute Playwright code directly in browser VM using `await kernel.browsers.playwright.execute(browser.session_id, { code: ``}` + - Response form the playwright execute is accessed via `response.result as string` +- Code and browser run together in cloud +- No local Playwright installation needed +- Lower latency, higher throughput +- Best for: Most use cases, production automation, parallel execution, actions + +**2. CDP Connection (Client-side)** +- Connect Playwright/Puppeteer to browser via CDP WebSocket URL +- Code runs locally, browser runs remotely +- Requires local Playwright installation +- Full Playwright API available +- Best for: Complex debugging, specific local development needs + +#### Common Issues +- Use `snake_case` when accessing attributes (i.e. browser.session_id) +- Avoid using depcrecated functions +- Creating a browser: use these parameters and modify them as needed + ``` + // Create a new remote browser session + const browser = await kernel.browsers.create({ + stealth: true, + headless: true // or false for headful + }); + ``` + - create the browser before the try/catch scope +- Deleting a browser: `await kernel.browsers.deleteByID(browser.session_id);` +- Accessing the CDP URL: `browser.cdp_ws_url` +- No need to create `package.json`, provide instructions to the user on how to run the script + +## References + +- **API Reference**: https://www.kernel.sh/docs/api-reference/ +- **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates +- **TypeScript Types**: Available in `@onkernel/sdk` package +- **Kernel Documentation**: https://www.kernel.sh/docs +- **Quickstart Guide**: https://www.kernel.sh/docs/quickstart From 533e3ae6ebfb40d3dbe5f67ec68047e0ac3bd7e2 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Mon, 26 Jan 2026 12:16:42 +0000 Subject: [PATCH 2/6] feat: add python sdk skill --- .../kernel-sdks/skills/python-sdk/SKILL.md | 275 ++++++++++++++++++ .../skills/typescript-sdk/SKILL.md | 39 ++- 2 files changed, 312 insertions(+), 2 deletions(-) create mode 100644 plugins/kernel-sdks/skills/python-sdk/SKILL.md diff --git a/plugins/kernel-sdks/skills/python-sdk/SKILL.md b/plugins/kernel-sdks/skills/python-sdk/SKILL.md new file mode 100644 index 0000000..c3fcd6b --- /dev/null +++ b/plugins/kernel-sdks/skills/python-sdk/SKILL.md @@ -0,0 +1,275 @@ +--- +name: kernel-python-sdk +description: Build browser automation scripts using the Kernel Python SDK with Playwright and remote browser management. +context: fork +--- + +## When to Use This Skill + +Use the Kernel Python SDK when you need to: + +- **Build browser automation scripts** - Create Python programs that control remote browsers +- **Execute server-side automation** - Run Playwright code directly in the browser VM without local dependencies +- **Manage browser sessions programmatically** - Create, configure, and control browsers from code +- **Build scalable scraping/testing tools** - Use browser pools and profiles for high-volume automation +- **Deploy automation as actions** - Package scripts as Kernel actions for invocation via API + +**When NOT to use:** +- For CLI commands (i.e. kernel browsers create), use the `kernel-cli` skill instead +- For quick one-off tasks, the CLI may be simpler than writing code + +## Core Concepts + +### SDK Architecture + +The SDK is organized into resource-based modules: + +- `kernel.browsers` - Browser session management (create, list, delete) +- `kernel.browsers.playwright` - Server-side Playwright execution +- `kernel.browsers.computer` - OS-level controls (mouse, keyboard, screenshots) +- `kernel.browser_pools` - Pre-warmed browser pool management +- `kernel.profiles` - Persistent browser profiles (auth state) +- `kernel.proxies` - Proxy configuration +- `kernel.extensions` - Chrome extension management +- `kernel.deployments` - App deployment +- `kernel.invocations` - Action invocation + +### Two Automation Approaches + +**1. Server-side Execution (RECOMMENDED)** +- Execute Playwright code directly in browser VM using `kernel.browsers.playwright.execute(session_id, code="...")` + - `session_id` must be passed as a positional argument (first parameter), not as `id=` keyword argument + - Response from the playwright execute is accessed via `response.result` + - **CRITICAL: You MUST use `return` in your code to get data back** - without it, `response.result` is `None` +- Code and browser run together in cloud +- No local Playwright installation needed +- Lower latency, higher throughput +- Best for: Most use cases, production automation, parallel execution, actions + +**2. CDP Connection (Client-side)** +- Connect Playwright to browser via CDP WebSocket URL +- Code runs locally, browser runs remotely +- Requires local Playwright installation +- Full Playwright API available +- Best for: Complex debugging, specific local development needs + +## Best Practices & Common Patterns + +### Server-Side Execution Pattern + +When using server-side Playwright execution, ALWAYS follow this pattern to avoid common errors: + +```python +import asyncio +import time +from kernel import Kernel + +async def main(): + client = Kernel() + + # 1. Create browser OUTSIDE try block + kernel_browser = client.browsers.create( + stealth=True, + timeout_seconds=300 + ) + + try: + # 2. ALWAYS wait after browser creation + time.sleep(3) # Browser may not be immediately ready + + # 3. Use retry logic for Playwright execution + max_retries = 3 + for attempt in range(max_retries): + try: + response = client.browsers.playwright.execute( + kernel_browser.session_id, # MUST be positional argument + code=""" + // Your Playwright code here + await page.goto('https://example.com', { waitUntil: 'networkidle' }); + return await page.evaluate(() => document.title); + """ + ) + break # Success, exit retry loop + except Exception as e: + if attempt < max_retries - 1: + time.sleep(2) # Wait before retry + else: + raise # Re-raise on final attempt + + # 4. ALWAYS check response.success before accessing result + if response.success and response.result: + print(f"Result: {response.result}") + else: + print(f"Error: {response.error}") + if response.stderr: + print(f"Stderr: {response.stderr}") + + finally: + # 5. ALWAYS cleanup browser in finally block + client.browsers.delete_by_id(kernel_browser.session_id) + +if __name__ == "__main__": + asyncio.run(main()) +``` + +### Critical Rules for Server-Side Execution + +1. **Browser Readiness**: `client.browsers.create()` may return before the browser is fully ready + - ALWAYS add `time.sleep(3)` after browser creation + - ALWAYS use retry logic (3 attempts with 2-second delays) for Playwright execution + - Error `400 - browser not found` means browser wasn't ready yet + +2. **Return Values**: MUST use `return` in Playwright code to get data back + - Without `return`, `response.result` will be `None` + - Applies to ALL data: strings, objects, arrays, binary data (screenshots, PDFs) + +3. **Browser Cleanup**: ALWAYS delete browser in finally block + - Use `client.browsers.delete_by_id(kernel_browser.session_id)` + - Put in finally block so it runs even if errors occur + +4. **Error Handling**: ALWAYS check `response.success` before accessing `response.result` + - Check `response.error` and `response.stderr` for debugging + +### Common Issues & Solutions +- Use type safe python code +- **`400 - browser not found` error**: Browser not ready yet → Add `time.sleep(3)` and retry logic +- **`response.result` is `None`**: Missing `return` statement in Playwright code +- **`TypeError: 'NoneType' object is not subscriptable`**: Forgot to check `response.success` or missing `return` +- **Browser not cleaned up**: Always use try/finally pattern with deletion in finally block +**Import Patterns** +- Standard import: `from kernel import Kernel` +- For app actions: `import kernel` and `from kernel import Kernel, KernelContext` +- For typed payloads: `from typing import TypedDict` +- For CDP: `from playwright.async_api import async_playwright` + +**SDK Initialization** +- Always use `kernel = Kernel()` (reads `KERNEL_API_KEY` from environment automatically) +- Create client at module level: `client = Kernel()` +- Create app at module level: `app = kernel.App("app-name")` + +**CDP Connection Pattern (Client-side only)** +```python +async with async_playwright() as playwright: + browser = await playwright.chromium.connect_over_cdp(kernel_browser.cdp_ws_url) + context = browser.contexts[0] if browser.contexts else await browser.new_context() + page = context.pages[0] if context.pages else await context.new_page() +``` + +**Action Handler Pattern** +```python +class TaskInput(TypedDict): + task: str + +@app.action("action-name") +async def my_action(ctx: kernel.KernelContext, payload: TaskInput): + # Access payload with dict syntax: payload["task"] or payload.get("task") + ... +``` + +**Type Hints and Typing** +- Always use type hints for better IDE support, code clarity, and error detection +- For action handlers, use `TypedDict` for input/output types: +```python +from typing import TypedDict, Optional + +class TaskInput(TypedDict): + task: str + url: Optional[str] # Optional fields + +class TaskOutput(TypedDict): + result: str + success: bool + +@app.action("task") +async def my_action(ctx: kernel.KernelContext, payload: TaskInput) -> TaskOutput: + ... +``` +- For regular functions, use type annotations: +```python +from kernel import Kernel + +def process_browser(kernel_browser) -> str: + return kernel_browser.session_id + +async def create_and_configure_browser(client: Kernel, stealth: bool = True): + return client.browsers.create(stealth=stealth) +``` +- Common imports: `from typing import TypedDict, Optional, Dict, List, Any` +- Always include return type annotations for functions and async functions + +**Resource Cleanup Patterns** +- Always wrap browser usage in try/finally (see Best Practices section above) +- Delete browser in finally block: `client.browsers.delete_by_id(kernel_browser.session_id)` +- For CDP connections: Close Playwright browser before deleting Kernel browser: `await browser.close()` + +**Response Handling** +- Always check `response.success` before accessing `response.result` +- Access error info: `response.error`, `response.stderr` +- Common Playwright errors: + - `Identifier 'page' has already been declared` - tried to declare `const page` when it's already available in the execution context + +**Handling Binary Data (Screenshots, PDFs, etc.)** + +Follow the server-side execution pattern (see Best Practices above) with these additional considerations: + +- Binary data (screenshots, PDFs, files) from Playwright returns as a Node.js Buffer object +- The Buffer comes through as: `{'data': [byte_array], 'type': 'Buffer'}` +- Convert to Python bytes: `data = bytes(response.result['data'])` + +**Example - Taking a screenshot:** +```python +import time +from kernel import Kernel + +client = Kernel() +kernel_browser = client.browsers.create(stealth=True) + +try: + time.sleep(3) # Wait for browser readiness + + # Retry logic for reliability + max_retries = 3 + for attempt in range(max_retries): + try: + response = client.browsers.playwright.execute( + kernel_browser.session_id, + code=""" + await page.goto('https://example.com', { waitUntil: 'networkidle' }); + return await page.screenshot({ fullPage: true }); + """ # MUST use 'return' to get data back + ) + break + except Exception as e: + if attempt < max_retries - 1: + time.sleep(2) + else: + raise + + # Check success and convert buffer to bytes + if response.success and response.result: + screenshot_data = bytes(response.result['data']) + with open("screenshot.png", "wb") as f: + f.write(screenshot_data) + else: + print(f"Error: {response.error}") + +finally: + client.browsers.delete_by_id(kernel_browser.session_id) +``` + +**Common mistakes:** +- Missing `return` → `response.result` will be `None` +- Missing `time.sleep(3)` after browser creation → `400 - browser not found` +- Not checking `response.success` → accessing `None` causes crashes + +**No requirements.txt Needed** +- Provide instructions: `uv pip install kernel` or `pip install kernel` +- For Playwright: `uv pip install playwright` + + +## References + +- **Kernel Documentation**: https://www.kernel.sh/docs +- **API Reference**: https://www.kernel.sh/docs/api-reference/ +- **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates +- **Quickstart Guide**: https://www.kernel.sh/docs/quickstart diff --git a/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md index a883632..31f2a56 100644 --- a/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md +++ b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md @@ -1,6 +1,7 @@ --- name: kernel-typescript-sdk description: Build browser automation scripts using the Kernel TypeScript SDK with Playwright, CDP, and remote browser management. +context: fork --- ## When to Use This Skill @@ -58,17 +59,51 @@ The SDK is organized into resource-based modules: // Create a new remote browser session const browser = await kernel.browsers.create({ stealth: true, - headless: true // or false for headful + headless: false }); ``` - create the browser before the try/catch scope - Deleting a browser: `await kernel.browsers.deleteByID(browser.session_id);` - Accessing the CDP URL: `browser.cdp_ws_url` - No need to create `package.json`, provide instructions to the user on how to run the script +- **Playwright execute context**: When using `playwright.execute`, the variables `page`, `context`, and `browser` are already available in the execution context. Do NOT redeclare them (e.g., avoid `const page = await context.newPage()`). Use them directly: + ``` + await kernel.browsers.playwright.execute(browser.session_id, { + code: `await page.goto('https://example.com'); return page.url();` + }); + ``` +- **Error handling**: Always check `response.success` before accessing `response.result` when using `playwright.execute`: + ``` + const response = await kernel.browsers.playwright.execute(browser.session_id, { code: '...' }); + if (!response.success) { + throw new Error(response.error || 'Playwright execution failed'); + } + const result = response.result as YourType; + ``` +- **Screenshots**: Use the dedicated screenshot API instead of trying to return binary data through `playwright.execute`. Binary data (like screenshots, file contents) does not serialize properly through the Playwright execute API and will result in `undefined` values. Use `kernel.browsers.computer.captureScreenshot(browser.session_id)` which returns a Response with a blob: + ``` + // First navigate using playwright.execute + await kernel.browsers.playwright.execute(browser.session_id, { + code: `await page.goto('https://example.com');` + }); + + // Then capture screenshot using dedicated API + const screenshotResponse = await kernel.browsers.computer.captureScreenshot(browser.session_id); + const blob = await screenshotResponse.blob(); + const arrayBuffer = await blob.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + ``` +- **Binary data handling**: Never try to return binary data (screenshots, file buffers, images) through `playwright.execute`. The API only serializes JSON-compatible values. Attempting to return binary data will result in `undefined`, causing errors like `Buffer.from(undefined)` or `TypeError [ERR_INVALID_ARG_TYPE]`. Always use dedicated APIs for binary operations: + - Screenshots: `kernel.browsers.computer.captureScreenshot()` + - File operations: Use the filesystem APIs, not `playwright.execute` return values +- **Screenshot timing and blank screenshots**: If screenshots are blank, the page may not be fully rendered despite `waitUntil: 'networkidle'`. Solutions: + - Add `await page.waitForTimeout(3000-5000)` after navigation in `playwright.execute` + - Add a delay between `playwright.execute` and `captureScreenshot`: `await new Promise(resolve => setTimeout(resolve, 2000))` + - Try `headless: false` for better rendering on pages + - Add debugging to verify page loaded: `const title = await page.title(); const bodyHTML = await page.evaluate(() => document.body.innerHTML);` ## References -- **API Reference**: https://www.kernel.sh/docs/api-reference/ - **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates - **TypeScript Types**: Available in `@onkernel/sdk` package - **Kernel Documentation**: https://www.kernel.sh/docs From 12fa55cd2f0902a5498a6fddb8b44a5a4de895ea Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Mon, 26 Jan 2026 20:00:09 +0000 Subject: [PATCH 3/6] feat: update skill.md files --- .../kernel-sdks/skills/python-sdk/SKILL.md | 230 +++--------------- .../skills/typescript-sdk/SKILL.md | 96 +++----- 2 files changed, 65 insertions(+), 261 deletions(-) diff --git a/plugins/kernel-sdks/skills/python-sdk/SKILL.md b/plugins/kernel-sdks/skills/python-sdk/SKILL.md index c3fcd6b..a5d3223 100644 --- a/plugins/kernel-sdks/skills/python-sdk/SKILL.md +++ b/plugins/kernel-sdks/skills/python-sdk/SKILL.md @@ -15,7 +15,7 @@ Use the Kernel Python SDK when you need to: - **Deploy automation as actions** - Package scripts as Kernel actions for invocation via API **When NOT to use:** -- For CLI commands (i.e. kernel browsers create), use the `kernel-cli` skill instead +- For CLI commands (e.g., `kernel browsers create`), use the `kernel-cli` skill instead - For quick one-off tasks, the CLI may be simpler than writing code ## Core Concepts @@ -38,234 +38,65 @@ The SDK is organized into resource-based modules: **1. Server-side Execution (RECOMMENDED)** - Execute Playwright code directly in browser VM using `kernel.browsers.playwright.execute(session_id, code="...")` - - `session_id` must be passed as a positional argument (first parameter), not as `id=` keyword argument - - Response from the playwright execute is accessed via `response.result` - - **CRITICAL: You MUST use `return` in your code to get data back** - without it, `response.result` is `None` -- Code and browser run together in cloud -- No local Playwright installation needed -- Lower latency, higher throughput +- `session_id` must be passed as a positional argument (first parameter), not as `id=` keyword +- Response accessed via `response.result` - **MUST use `return` in code to get data back** - Best for: Most use cases, production automation, parallel execution, actions **2. CDP Connection (Client-side)** - Connect Playwright to browser via CDP WebSocket URL -- Code runs locally, browser runs remotely -- Requires local Playwright installation -- Full Playwright API available +- Code runs locally, browser runs remotely; requires local Playwright installation - Best for: Complex debugging, specific local development needs -## Best Practices & Common Patterns +## Patterns Reference -### Server-Side Execution Pattern - -When using server-side Playwright execution, ALWAYS follow this pattern to avoid common errors: - -```python -import asyncio -import time -from kernel import Kernel - -async def main(): - client = Kernel() - - # 1. Create browser OUTSIDE try block - kernel_browser = client.browsers.create( - stealth=True, - timeout_seconds=300 - ) - - try: - # 2. ALWAYS wait after browser creation - time.sleep(3) # Browser may not be immediately ready - - # 3. Use retry logic for Playwright execution - max_retries = 3 - for attempt in range(max_retries): - try: - response = client.browsers.playwright.execute( - kernel_browser.session_id, # MUST be positional argument - code=""" - // Your Playwright code here - await page.goto('https://example.com', { waitUntil: 'networkidle' }); - return await page.evaluate(() => document.title); - """ - ) - break # Success, exit retry loop - except Exception as e: - if attempt < max_retries - 1: - time.sleep(2) # Wait before retry - else: - raise # Re-raise on final attempt - - # 4. ALWAYS check response.success before accessing result - if response.success and response.result: - print(f"Result: {response.result}") - else: - print(f"Error: {response.error}") - if response.stderr: - print(f"Stderr: {response.stderr}") - - finally: - # 5. ALWAYS cleanup browser in finally block - client.browsers.delete_by_id(kernel_browser.session_id) - -if __name__ == "__main__": - asyncio.run(main()) -``` - -### Critical Rules for Server-Side Execution - -1. **Browser Readiness**: `client.browsers.create()` may return before the browser is fully ready - - ALWAYS add `time.sleep(3)` after browser creation - - ALWAYS use retry logic (3 attempts with 2-second delays) for Playwright execution - - Error `400 - browser not found` means browser wasn't ready yet - -2. **Return Values**: MUST use `return` in Playwright code to get data back - - Without `return`, `response.result` will be `None` - - Applies to ALL data: strings, objects, arrays, binary data (screenshots, PDFs) - -3. **Browser Cleanup**: ALWAYS delete browser in finally block - - Use `client.browsers.delete_by_id(kernel_browser.session_id)` - - Put in finally block so it runs even if errors occur - -4. **Error Handling**: ALWAYS check `response.success` before accessing `response.result` - - Check `response.error` and `response.stderr` for debugging - -### Common Issues & Solutions -- Use type safe python code -- **`400 - browser not found` error**: Browser not ready yet → Add `time.sleep(3)` and retry logic -- **`response.result` is `None`**: Missing `return` statement in Playwright code -- **`TypeError: 'NoneType' object is not subscriptable`**: Forgot to check `response.success` or missing `return` -- **Browser not cleaned up**: Always use try/finally pattern with deletion in finally block **Import Patterns** -- Standard import: `from kernel import Kernel` -- For app actions: `import kernel` and `from kernel import Kernel, KernelContext` +- Standard: `from kernel import Kernel` +- For actions: `import kernel` and `from kernel import Kernel` - For typed payloads: `from typing import TypedDict` - For CDP: `from playwright.async_api import async_playwright` **SDK Initialization** -- Always use `kernel = Kernel()` (reads `KERNEL_API_KEY` from environment automatically) -- Create client at module level: `client = Kernel()` -- Create app at module level: `app = kernel.App("app-name")` - -**CDP Connection Pattern (Client-side only)** -```python -async with async_playwright() as playwright: - browser = await playwright.chromium.connect_over_cdp(kernel_browser.cdp_ws_url) - context = browser.contexts[0] if browser.contexts else await browser.new_context() - page = context.pages[0] if context.pages else await context.new_page() -``` +- `client = Kernel()` reads `KERNEL_API_KEY` from environment automatically **Action Handler Pattern** ```python -class TaskInput(TypedDict): - task: str - -@app.action("action-name") -async def my_action(ctx: kernel.KernelContext, payload: TaskInput): - # Access payload with dict syntax: payload["task"] or payload.get("task") - ... -``` +from typing import TypedDict +from kernel import Kernel -**Type Hints and Typing** -- Always use type hints for better IDE support, code clarity, and error detection -- For action handlers, use `TypedDict` for input/output types: -```python -from typing import TypedDict, Optional +app = kernel.App("app-name") class TaskInput(TypedDict): task: str - url: Optional[str] # Optional fields - -class TaskOutput(TypedDict): - result: str - success: bool -@app.action("task") -async def my_action(ctx: kernel.KernelContext, payload: TaskInput) -> TaskOutput: +@app.action("action-name") +async def my_action(ctx: kernel.KernelContext, input_data: TaskInput): + # Access input: input_data["task"] or input_data.get("task") ... ``` -- For regular functions, use type annotations: -```python -from kernel import Kernel - -def process_browser(kernel_browser) -> str: - return kernel_browser.session_id -async def create_and_configure_browser(client: Kernel, stealth: bool = True): - return client.browsers.create(stealth=stealth) +**CDP Connection Pattern (Client-side)** +```python +async with async_playwright() as playwright: + browser = await playwright.chromium.connect_over_cdp(kernel_browser.cdp_ws_url) + context = browser.contexts[0] if browser.contexts else await browser.new_context() + page = context.pages[0] if context.pages else await context.new_page() ``` -- Common imports: `from typing import TypedDict, Optional, Dict, List, Any` -- Always include return type annotations for functions and async functions - -**Resource Cleanup Patterns** -- Always wrap browser usage in try/finally (see Best Practices section above) -- Delete browser in finally block: `client.browsers.delete_by_id(kernel_browser.session_id)` -- For CDP connections: Close Playwright browser before deleting Kernel browser: `await browser.close()` - -**Response Handling** -- Always check `response.success` before accessing `response.result` -- Access error info: `response.error`, `response.stderr` -- Common Playwright errors: - - `Identifier 'page' has already been declared` - tried to declare `const page` when it's already available in the execution context -**Handling Binary Data (Screenshots, PDFs, etc.)** +**Binary Data Handling** -Follow the server-side execution pattern (see Best Practices above) with these additional considerations: +Binary data (screenshots, PDFs) returns as Node.js Buffer: `{'data': [byte_array], 'type': 'Buffer'}` -- Binary data (screenshots, PDFs, files) from Playwright returns as a Node.js Buffer object -- The Buffer comes through as: `{'data': [byte_array], 'type': 'Buffer'}` -- Convert to Python bytes: `data = bytes(response.result['data'])` - -**Example - Taking a screenshot:** ```python -import time -from kernel import Kernel - -client = Kernel() -kernel_browser = client.browsers.create(stealth=True) - -try: - time.sleep(3) # Wait for browser readiness - - # Retry logic for reliability - max_retries = 3 - for attempt in range(max_retries): - try: - response = client.browsers.playwright.execute( - kernel_browser.session_id, - code=""" - await page.goto('https://example.com', { waitUntil: 'networkidle' }); - return await page.screenshot({ fullPage: true }); - """ # MUST use 'return' to get data back - ) - break - except Exception as e: - if attempt < max_retries - 1: - time.sleep(2) - else: - raise - - # Check success and convert buffer to bytes - if response.success and response.result: - screenshot_data = bytes(response.result['data']) - with open("screenshot.png", "wb") as f: - f.write(screenshot_data) - else: - print(f"Error: {response.error}") - -finally: - client.browsers.delete_by_id(kernel_browser.session_id) +# Follow canonical pattern above, then: +if response.success and response.result: + data = bytes(response.result['data']) + with open("output.png", "wb") as f: + f.write(data) ``` -**Common mistakes:** -- Missing `return` → `response.result` will be `None` -- Missing `time.sleep(3)` after browser creation → `400 - browser not found` -- Not checking `response.success` → accessing `None` causes crashes - -**No requirements.txt Needed** -- Provide instructions: `uv pip install kernel` or `pip install kernel` -- For Playwright: `uv pip install playwright` - +**Installation** +- `uv pip install kernel` or `pip install kernel` +- For CDP: `uv pip install playwright` ## References @@ -273,3 +104,4 @@ finally: - **API Reference**: https://www.kernel.sh/docs/api-reference/ - **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates - **Quickstart Guide**: https://www.kernel.sh/docs/quickstart +- **Examples**: ./examples/examples.md diff --git a/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md index 31f2a56..f440f04 100644 --- a/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md +++ b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md @@ -15,7 +15,7 @@ Use the Kernel TypeScript SDK when you need to: - **Deploy automation as actions** - Package scripts as Kernel actions for invocation via API **When NOT to use:** -- For CLI commands (i.e. kernel browsers create), use the `kernel-cli` skill instead +- For CLI commands (e.g., `kernel browsers create`), use the `kernel-cli` skill instead - For quick one-off tasks, the CLI may be simpler than writing code ## Core Concepts @@ -37,74 +37,46 @@ The SDK is organized into resource-based modules: ### Two Automation Approaches **1. Server-side Execution (RECOMMENDED)** -- Execute Playwright code directly in browser VM using `await kernel.browsers.playwright.execute(browser.session_id, { code: ``}` - - Response form the playwright execute is accessed via `response.result as string` -- Code and browser run together in cloud -- No local Playwright installation needed -- Lower latency, higher throughput +- Execute Playwright code directly in browser VM using `kernel.browsers.playwright.execute()` +- Response accessed via `response.result` - **MUST use `return` in code to get data back** - Best for: Most use cases, production automation, parallel execution, actions **2. CDP Connection (Client-side)** -- Connect Playwright/Puppeteer to browser via CDP WebSocket URL -- Code runs locally, browser runs remotely -- Requires local Playwright installation -- Full Playwright API available +- Connect Playwright/Puppeteer to browser via CDP WebSocket URL (`browser.cdp_ws_url`) +- Code runs locally, browser runs remotely; requires local Playwright installation - Best for: Complex debugging, specific local development needs -#### Common Issues -- Use `snake_case` when accessing attributes (i.e. browser.session_id) -- Avoid using depcrecated functions -- Creating a browser: use these parameters and modify them as needed - ``` - // Create a new remote browser session - const browser = await kernel.browsers.create({ - stealth: true, - headless: false - }); - ``` - - create the browser before the try/catch scope -- Deleting a browser: `await kernel.browsers.deleteByID(browser.session_id);` -- Accessing the CDP URL: `browser.cdp_ws_url` -- No need to create `package.json`, provide instructions to the user on how to run the script -- **Playwright execute context**: When using `playwright.execute`, the variables `page`, `context`, and `browser` are already available in the execution context. Do NOT redeclare them (e.g., avoid `const page = await context.newPage()`). Use them directly: - ``` - await kernel.browsers.playwright.execute(browser.session_id, { - code: `await page.goto('https://example.com'); return page.url();` - }); - ``` -- **Error handling**: Always check `response.success` before accessing `response.result` when using `playwright.execute`: - ``` - const response = await kernel.browsers.playwright.execute(browser.session_id, { code: '...' }); - if (!response.success) { - throw new Error(response.error || 'Playwright execution failed'); - } - const result = response.result as YourType; - ``` -- **Screenshots**: Use the dedicated screenshot API instead of trying to return binary data through `playwright.execute`. Binary data (like screenshots, file contents) does not serialize properly through the Playwright execute API and will result in `undefined` values. Use `kernel.browsers.computer.captureScreenshot(browser.session_id)` which returns a Response with a blob: - ``` - // First navigate using playwright.execute - await kernel.browsers.playwright.execute(browser.session_id, { - code: `await page.goto('https://example.com');` - }); - - // Then capture screenshot using dedicated API - const screenshotResponse = await kernel.browsers.computer.captureScreenshot(browser.session_id); - const blob = await screenshotResponse.blob(); - const arrayBuffer = await blob.arrayBuffer(); - const buffer = Buffer.from(arrayBuffer); - ``` -- **Binary data handling**: Never try to return binary data (screenshots, file buffers, images) through `playwright.execute`. The API only serializes JSON-compatible values. Attempting to return binary data will result in `undefined`, causing errors like `Buffer.from(undefined)` or `TypeError [ERR_INVALID_ARG_TYPE]`. Always use dedicated APIs for binary operations: - - Screenshots: `kernel.browsers.computer.captureScreenshot()` - - File operations: Use the filesystem APIs, not `playwright.execute` return values -- **Screenshot timing and blank screenshots**: If screenshots are blank, the page may not be fully rendered despite `waitUntil: 'networkidle'`. Solutions: - - Add `await page.waitForTimeout(3000-5000)` after navigation in `playwright.execute` - - Add a delay between `playwright.execute` and `captureScreenshot`: `await new Promise(resolve => setTimeout(resolve, 2000))` - - Try `headless: false` for better rendering on pages - - Add debugging to verify page loaded: `const title = await page.title(); const bodyHTML = await page.evaluate(() => document.body.innerHTML);` +## Patterns Reference + +**SDK Initialization** +```typescript +import Kernel from '@onkernel/sdk'; +const kernel = new Kernel(); // Reads KERNEL_API_KEY from environment +``` + +**Attribute Access**: Use `snake_case` (e.g., `browser.session_id`, `browser.cdp_ws_url`) + +**Binary Data Handling** + +Binary data does not serialize through `playwright.execute` (returns `undefined`). Use dedicated APIs: + +```typescript +// For screenshots: +const response = await kernel.browsers.computer.captureScreenshot(browser.session_id); +// For files: +const response = await kernel.browsers.filesystem.readFile(browser.session_id, { path: '/path/to/file' }); + +// Convert to buffer: +const blob = await response.blob(); +const buffer = Buffer.from(await blob.arrayBuffer()); +``` + +> **Note:** This differs from the Python SDK where binary data CAN be returned via `playwright.execute` as a Buffer object. In TypeScript, always use dedicated APIs. ## References -- **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates -- **TypeScript Types**: Available in `@onkernel/sdk` package - **Kernel Documentation**: https://www.kernel.sh/docs - **Quickstart Guide**: https://www.kernel.sh/docs/quickstart +- **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates +- **TypeScript Types**: Available in `@onkernel/sdk` package +- **Examples**: ./examples/examples.md From e4c0dd261b1e16ad90615a81937a59fef7791b9f Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Mon, 26 Jan 2026 20:28:52 +0000 Subject: [PATCH 4/6] feat: add python sdk examples --- .../skills/python-sdk/examples/examples.md | 241 ++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 plugins/kernel-sdks/skills/python-sdk/examples/examples.md diff --git a/plugins/kernel-sdks/skills/python-sdk/examples/examples.md b/plugins/kernel-sdks/skills/python-sdk/examples/examples.md new file mode 100644 index 0000000..5209415 --- /dev/null +++ b/plugins/kernel-sdks/skills/python-sdk/examples/examples.md @@ -0,0 +1,241 @@ +# Kernel Python SDK - Examples + +Concise patterns extracted from production templates showing how to integrate the Kernel SDK with popular libraries and frameworks. + +--- + +## Browser-Use Library Integration + +Connect the `browser-use` library to Kernel browsers for AI-powered browser automation. + +```python +from browser_use import Agent, Browser +from browser_use.llm import ChatOpenAI +from kernel import Kernel + +client = Kernel() +llm = ChatOpenAI(model="gpt-4.1") + +kernel_browser = client.browsers.create(stealth=True) +try: + # Connect browser-use to Kernel via CDP + browser = Browser( + cdp_url=kernel_browser.cdp_ws_url, + headless=False, + window_size={"width": 1920, "height": 1080}, + viewport={"width": 1920, "height": 1080}, + ) + + agent = Agent(task="Your task here", llm=llm, browser_session=browser) + result = await agent.run() + + # Handle results + if result.final_result(): + output = result.final_result() + else: + output = result.errors() +finally: + client.browsers.delete_by_id(kernel_browser.session_id) +``` + +--- + +## Reusable Async Context Manager + +Create a reusable context manager for browser lifecycle with optional replay recording. + +```python +from dataclasses import dataclass, field +from typing import Optional +from kernel import Kernel + +@dataclass +class KernelBrowserSession: + stealth: bool = True + timeout_seconds: int = 300 + record_replay: bool = False + replay_grace_period: float = 5.0 + + session_id: Optional[str] = field(default=None, init=False) + live_view_url: Optional[str] = field(default=None, init=False) + replay_id: Optional[str] = field(default=None, init=False) + replay_view_url: Optional[str] = field(default=None, init=False) + _kernel: Optional[Kernel] = field(default=None, init=False) + + async def __aenter__(self) -> "KernelBrowserSession": + self._kernel = Kernel() + browser = self._kernel.browsers.create( + stealth=self.stealth, + timeout_seconds=self.timeout_seconds, + ) + self.session_id = browser.session_id + self.live_view_url = browser.browser_live_view_url + + # Start replay recording if enabled + if self.record_replay: + replay = self._kernel.browsers.replays.start(self.session_id) + self.replay_id = replay.replay_id + + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + if self._kernel and self.session_id: + try: + if self.record_replay and self.replay_id: + await asyncio.sleep(self.replay_grace_period) + self._kernel.browsers.replays.stop( + replay_id=self.replay_id, + id=self.session_id, + ) + # Poll for replay URL + replays = self._kernel.browsers.replays.list(self.session_id) + for replay in replays: + if replay.replay_id == self.replay_id: + self.replay_view_url = replay.replay_view_url + finally: + self._kernel.browsers.delete_by_id(self.session_id) + + @property + def kernel(self) -> Kernel: + if self._kernel is None: + raise RuntimeError("Session not initialized") + return self._kernel +``` + +**Usage:** + +```python +async with KernelBrowserSession(record_replay=True) as session: + # Use session.session_id, session.kernel + result = session.kernel.browsers.playwright.execute( + session.session_id, + code="await page.goto('https://example.com'); return await page.title();" + ) +# Browser automatically cleaned up, replay available at session.replay_view_url +``` + +--- + +## Auto-CAPTCHA with CDP Connection + +Use CDP connection with stealth mode to leverage Kernel's automatic CAPTCHA solving. + +```python +from playwright.async_api import async_playwright +from kernel import Kernel + +client = Kernel() + +# Create browser with stealth mode enabled +kernel_browser = client.browsers.create(stealth=True) + +try: + async with async_playwright() as playwright: + browser = await playwright.chromium.connect_over_cdp( + kernel_browser.cdp_ws_url + ) + + # IMPORTANT: Get existing context/page instead of creating new ones + context = ( + browser.contexts[0] if browser.contexts + else await browser.new_context() + ) + page = ( + context.pages[0] if context.pages + else await context.new_page() + ) + + # Navigate to page with CAPTCHA + await page.goto("https://www.google.com/recaptcha/api2/demo") + + # Kernel automatically solves CAPTCHAs in stealth mode + # Watch via live view: kernel_browser.browser_live_view_url + + await browser.close() +finally: + client.browsers.delete_by_id(kernel_browser.session_id) +``` + +--- + +## Server-Side Execution as LLM Tool + +Wrap `playwright.execute` as a callable tool for LLM agent frameworks. + +```python +import json +from kernel import Kernel + +client = Kernel() + +def create_playwright_tool(session_id: str): + """ + Create a Playwright execution tool for LLM agents. + Returns a callable that executes Playwright code and returns formatted results. + """ + def execute_playwright(code: str, timeout_sec: int = 60) -> dict: + """ + Execute Playwright code against the browser. + + Args: + code: Playwright/JavaScript code to execute + timeout_sec: Execution timeout in seconds + + Returns: + Dict with 'content' (result text) and optional 'is_error' flag + """ + try: + result = client.browsers.playwright.execute( + session_id, + code=code, + timeout_sec=timeout_sec, + ) + + if result.success: + output = ( + json.dumps(result.result, indent=2) + if result.result is not None + else "Code executed successfully (no return value)" + ) + return {"content": [{"type": "text", "text": output}]} + else: + error_msg = f"Execution failed: {result.error or 'Unknown error'}\n{result.stderr or ''}" + return { + "content": [{"type": "text", "text": error_msg}], + "is_error": True, + } + except Exception as e: + return { + "content": [{"type": "text", "text": f"Failed to execute: {e}"}], + "is_error": True, + } + + return execute_playwright +``` + +**Usage with Claude Agent SDK:** + +```python +from claude_agent_sdk import tool, create_sdk_mcp_server + +# Create browser +browser = client.browsers.create(stealth=True) + +# Create tool +playwright_tool = create_playwright_tool(browser.session_id) + +# Register with agent framework +server = create_sdk_mcp_server( + name="kernel-playwright", + tools=[playwright_tool], +) +``` + +--- + +## Additional Patterns + +For more patterns including: +- Basic server-side execution with retry logic +- Action handler pattern for Kernel deployments +- Binary data handling (screenshots, PDFs) From 6d324450e5e39c478a325f86c83c64bb216e34b3 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Mon, 26 Jan 2026 20:29:58 +0000 Subject: [PATCH 5/6] feat: add ts sdk examples --- .../typescript-sdk/examples/examples.md | 351 ++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md diff --git a/plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md b/plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md new file mode 100644 index 0000000..f89e5eb --- /dev/null +++ b/plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md @@ -0,0 +1,351 @@ +# Kernel TypeScript SDK - Examples + +Concise patterns extracted from production templates showing how to integrate the Kernel SDK with popular libraries and frameworks. + +--- + +## Stagehand Library Integration + +Connect Stagehand to Kernel browsers for AI-powered browser automation with `act()` and `extract()`. + +```typescript +import { Stagehand } from "@browserbasehq/stagehand"; +import Kernel from "@onkernel/sdk"; +import { z } from "zod"; + +const kernel = new Kernel(); + +const kernelBrowser = await kernel.browsers.create({ stealth: true }); + +try { + const stagehand = new Stagehand({ + env: "LOCAL", + localBrowserLaunchOptions: { + cdpUrl: kernelBrowser.cdp_ws_url, + }, + model: "openai/gpt-4.1", + apiKey: process.env.OPENAI_API_KEY, + verbose: 1, + domSettleTimeout: 30_000, + }); + await stagehand.init(); + + const page = stagehand.context.pages()[0]; + await page.goto("https://example.com"); + + await stagehand.act("Click the login button"); + + const data = await stagehand.extract( + "Extract the main heading text", + z.object({ heading: z.string() }) + ); + + await stagehand.close(); +} finally { + await kernel.browsers.deleteByID(kernelBrowser.session_id); +} +``` + +--- + +## Reusable Browser Session Manager + +A class for managing browser lifecycle with optional replay recording. + +```typescript +import type { Kernel } from "@onkernel/sdk"; + +interface SessionOptions { + stealth?: boolean; + timeoutSeconds?: number; + recordReplay?: boolean; +} + +class KernelBrowserSession { + private kernel: Kernel; + private options: SessionOptions; + private _sessionId: string | null = null; + private _replayId: string | null = null; + + liveViewUrl: string | null = null; + replayViewUrl: string | null = null; + + constructor(kernel: Kernel, options: SessionOptions = {}) { + this.kernel = kernel; + this.options = { stealth: true, timeoutSeconds: 300, ...options }; + } + + get sessionId(): string { + if (!this._sessionId) throw new Error("Session not started"); + return this._sessionId; + } + + async start(): Promise { + const browser = await this.kernel.browsers.create({ + stealth: this.options.stealth, + timeout_seconds: this.options.timeoutSeconds, + }); + this._sessionId = browser.session_id; + this.liveViewUrl = browser.browser_live_view_url; + + if (this.options.recordReplay) { + const replay = await this.kernel.browsers.replays.start(this._sessionId); + this._replayId = replay.replay_id; + } + } + + async stop(): Promise { + if (!this._sessionId) return; + + try { + if (this._replayId) { + await this.kernel.browsers.replays.stop(this._replayId, { + id: this._sessionId, + }); + // Poll for replay URL + const replays = await this.kernel.browsers.replays.list(this._sessionId); + const replay = replays.find((r) => r.replay_id === this._replayId); + this.replayViewUrl = replay?.replay_view_url ?? null; + } + } finally { + await this.kernel.browsers.deleteByID(this._sessionId); + this._sessionId = null; + } + } +} +``` + +**Usage:** + +```typescript +const session = new KernelBrowserSession(kernel, { recordReplay: true }); +await session.start(); +try { + // Use session.sessionId for automation +} finally { + await session.stop(); + console.log("Replay:", session.replayViewUrl); +} +``` + +--- + +## CDP Connection Pattern + +Connect local Playwright to a Kernel browser via CDP WebSocket. + +```typescript +import Kernel from "@onkernel/sdk"; +import { chromium } from "playwright-core"; + +const kernel = new Kernel(); + +const kernelBrowser = await kernel.browsers.create({ stealth: true }); + +try { + const browser = await chromium.connectOverCDP(kernelBrowser.cdp_ws_url); + + // IMPORTANT: Get existing context/page instead of creating new ones + const context = browser.contexts()[0] || (await browser.newContext()); + const page = context.pages()[0] || (await context.newPage()); + + await page.goto("https://example.com"); + const title = await page.title(); + + await browser.close(); +} finally { + await kernel.browsers.deleteByID(kernelBrowser.session_id); +} +``` + +--- + +## Auto-CAPTCHA with Stealth Mode + +Use stealth mode to leverage Kernel's automatic CAPTCHA solving. + +```typescript +import Kernel from "@onkernel/sdk"; +import { chromium } from "playwright-core"; + +const kernel = new Kernel(); + +// Stealth mode enables auto-CAPTCHA solving +const kernelBrowser = await kernel.browsers.create({ stealth: true }); + +console.log("Live view:", kernelBrowser.browser_live_view_url); + +try { + const browser = await chromium.connectOverCDP(kernelBrowser.cdp_ws_url); + const context = browser.contexts()[0] || (await browser.newContext()); + const page = context.pages()[0] || (await context.newPage()); + + // Navigate to page with CAPTCHA - Kernel auto-solves it + await page.goto("https://www.google.com/recaptcha/api2/demo"); + + await browser.close(); +} finally { + await kernel.browsers.deleteByID(kernelBrowser.session_id); +} +``` + +--- + +## Server-Side Execution as LLM Tool + +Wrap `playwright.execute` as a callable tool for LLM agent frameworks. + +```typescript +import Kernel from "@onkernel/sdk"; + +const kernel = new Kernel(); + +function createPlaywrightTool(sessionId: string) { + return async (code: string, timeoutSec = 60) => { + try { + const result = await kernel.browsers.playwright.execute(sessionId, { + code, + timeout_sec: timeoutSec, + }); + + if (result.success) { + const output = + result.result !== undefined + ? JSON.stringify(result.result, null, 2) + : "Executed successfully (no return value)"; + return { content: [{ type: "text", text: output }] }; + } else { + return { + content: [{ type: "text", text: `Error: ${result.error}\n${result.stderr || ""}` }], + isError: true, + }; + } + } catch (error) { + return { + content: [{ type: "text", text: `Failed: ${error}` }], + isError: true, + }; + } + }; +} +``` + +**Usage with MCP server:** + +```typescript +import { createSdkMcpServer, tool } from "@anthropic-ai/claude-agent-sdk"; +import { z } from "zod"; + +const browser = await kernel.browsers.create({ stealth: true }); +const executeTool = createPlaywrightTool(browser.session_id); + +const server = createSdkMcpServer({ + name: "kernel-playwright", + tools: [ + tool( + "execute_playwright", + "Execute Playwright code. Use page, context, browser objects. Return values with 'return'.", + { code: z.string(), timeout_sec: z.number().optional() }, + async (args) => executeTool(args.code, args.timeout_sec) + ), + ], +}); +``` + +--- + +## Magnitude Library Integration + +Connect Magnitude's browser agent to Kernel for AI-driven automation. + +```typescript +import Kernel from "@onkernel/sdk"; +import { startBrowserAgent } from "magnitude-core"; +import { z } from "zod"; + +const kernel = new Kernel(); + +const kernelBrowser = await kernel.browsers.create({ stealth: true }); + +console.log("Live view:", kernelBrowser.browser_live_view_url); + +const agent = await startBrowserAgent({ + url: "https://example.com", + llm: { + provider: "anthropic", + options: { + model: "claude-sonnet-4-20250514", + apiKey: process.env.ANTHROPIC_API_KEY!, + }, + }, + browser: { cdp: kernelBrowser.cdp_ws_url }, + narrate: true, +}); + +try { + await agent.act("Scroll down and explore the page"); + + const urls = await agent.extract( + "Extract up to 5 URLs from the page", + z.array(z.string().url()) + ); + + console.log("Found URLs:", urls); +} finally { + await agent.stop(); + await kernel.browsers.deleteByID(kernelBrowser.session_id); +} +``` + +--- + +## Action Handler Pattern + +Standard Kernel action pattern for deployable automation. + +```typescript +import { Kernel, type KernelContext } from "@onkernel/sdk"; +import { chromium } from "playwright-core"; + +const kernel = new Kernel(); +const app = kernel.app("my-app"); + +interface Input { + url: string; +} + +interface Output { + title: string; +} + +app.action("get-title", async (ctx: KernelContext, payload?: Input): Promise => { + if (!payload?.url) throw new Error("URL required"); + + const kernelBrowser = await kernel.browsers.create({ + invocation_id: ctx.invocation_id, // Links browser to this invocation + stealth: true, + }); + + console.log("Live view:", kernelBrowser.browser_live_view_url); + + try { + const browser = await chromium.connectOverCDP(kernelBrowser.cdp_ws_url); + const context = browser.contexts()[0] || (await browser.newContext()); + const page = context.pages()[0] || (await context.newPage()); + + await page.goto(payload.url); + const title = await page.title(); + + return { title }; + } finally { + await kernel.browsers.deleteByID(kernelBrowser.session_id); + } +}); + +// Deploy: kernel deploy index.ts +// Invoke: kernel invoke my-app get-title -p '{"url": "https://example.com"}' +``` + +--- + +See the full templates at: `kernel create --list` From f9920eb6100f7304e13840cacc0d3a3e288512a7 Mon Sep 17 00:00:00 2001 From: Archan Datta Date: Mon, 26 Jan 2026 20:40:37 +0000 Subject: [PATCH 6/6] review --- plugins/kernel-sdks/skills/python-sdk/SKILL.md | 2 +- plugins/kernel-sdks/skills/python-sdk/examples/examples.md | 7 ------- plugins/kernel-sdks/skills/typescript-sdk/SKILL.md | 2 +- .../kernel-sdks/skills/typescript-sdk/examples/examples.md | 2 -- 4 files changed, 2 insertions(+), 11 deletions(-) diff --git a/plugins/kernel-sdks/skills/python-sdk/SKILL.md b/plugins/kernel-sdks/skills/python-sdk/SKILL.md index a5d3223..f22bac2 100644 --- a/plugins/kernel-sdks/skills/python-sdk/SKILL.md +++ b/plugins/kernel-sdks/skills/python-sdk/SKILL.md @@ -104,4 +104,4 @@ if response.success and response.result: - **API Reference**: https://www.kernel.sh/docs/api-reference/ - **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates - **Quickstart Guide**: https://www.kernel.sh/docs/quickstart -- **Examples**: ./examples/examples.md +- **Examples**: [examples](./examples/examples.md) diff --git a/plugins/kernel-sdks/skills/python-sdk/examples/examples.md b/plugins/kernel-sdks/skills/python-sdk/examples/examples.md index 5209415..9548a84 100644 --- a/plugins/kernel-sdks/skills/python-sdk/examples/examples.md +++ b/plugins/kernel-sdks/skills/python-sdk/examples/examples.md @@ -232,10 +232,3 @@ server = create_sdk_mcp_server( ``` --- - -## Additional Patterns - -For more patterns including: -- Basic server-side execution with retry logic -- Action handler pattern for Kernel deployments -- Binary data handling (screenshots, PDFs) diff --git a/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md index f440f04..3b126c9 100644 --- a/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md +++ b/plugins/kernel-sdks/skills/typescript-sdk/SKILL.md @@ -79,4 +79,4 @@ const buffer = Buffer.from(await blob.arrayBuffer()); - **Quickstart Guide**: https://www.kernel.sh/docs/quickstart - **Templates**: https://www.kernel.sh/docs/reference/cli/create#available-templates - **TypeScript Types**: Available in `@onkernel/sdk` package -- **Examples**: ./examples/examples.md +- **Examples**: [examples](./examples/examples.md) diff --git a/plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md b/plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md index f89e5eb..6be7988 100644 --- a/plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md +++ b/plugins/kernel-sdks/skills/typescript-sdk/examples/examples.md @@ -347,5 +347,3 @@ app.action("get-title", async (ctx: KernelContext, payload?: Inpu ``` --- - -See the full templates at: `kernel create --list`