diff --git a/README.md b/README.md index 545c8746..b8d17de2 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ A collection of examples for building browser automations with [Intuned](https:/ | [EHR-integration](./python-examples/ehr-integration-python/) | Data Extraction from Openimis Website| | [playwright-basics](./python-examples/playwright-python/) | Playwright Basics | | [e-commerece-category](./python-examples/e-commerece-category/) | E-commerce category and product scraper | -| [hyprid-automation](./python-examples/hyprid-automation/) | Hybrid automation combining Intuned Browser SDK with AI-powered tools like Stagehand and extract_structured_data | +| [hybrid-automation](./python-examples/hybrid-automation/) | Hybrid automation combining Intuned Browser SDK with AI-powered tools like Stagehand and extract_structured_data | | [computer-use](./python-examples/computer-use/) | AI-powered browser automation with Anthropic, OpenAI, Gemini, and Browser Use | | [cdp-connection](./python-examples/cdp-connection/) | Basic example demonstrating Chrome DevTools Protocol (CDP) connection | | [setup-hooks](./python-examples/setup-hooks/) | Demonstrates setup hooks for preparing data before API execution | diff --git a/python-examples/README.md b/python-examples/README.md index 579fa15d..a4384bfe 100644 --- a/python-examples/README.md +++ b/python-examples/README.md @@ -14,7 +14,7 @@ Intuned sample projects in Python. | [e-commerce-auth-scrapingcourse](./e-commerce-auth-scrapingcourse/) | Authenticated e-commerce scraper with Auth Sessions | | [e-commerece-shopify](./e-commerece-shopify/) | Shopify store product scraper | | [e-commerece-category](./e-commerece-category/) | E-commerce category and product scraper | -| [hyprid-automation](./hyprid-automation/) | Hybrid automation combining Intuned Browser SDK with AI-powered tools like Stagehand and extract_structured_data | +| [hybrid-automation](./hybrid-automation/) | Hybrid automation combining Intuned Browser SDK with AI-powered tools like Stagehand and extract_structured_data | | [computer-use](./computer-use/) | AI-powered browser automation with Anthropic, OpenAI, and Gemini | | [cdp-connection](./cdp-connection/) | Basic example demonstrating Chrome DevTools Protocol (CDP) connection | | [setup-hooks](./setup-hooks/) | Demonstrates setup hooks for preparing data before API execution | diff --git a/python-examples/browser-sdk-showcase/api/ai/extract-structured-data.py b/python-examples/browser-sdk-showcase/api/ai/extract-structured-data.py index 106ed4ea..72d8be3b 100644 --- a/python-examples/browser-sdk-showcase/api/ai/extract-structured-data.py +++ b/python-examples/browser-sdk-showcase/api/ai/extract-structured-data.py @@ -26,7 +26,7 @@ class Book(BaseModel): # Extract from the Page directly using Pydantic model. # You can also extract from a specific locator or by passing TextContentItem. - # Check https://docs.intunedhq.com/automation-sdks/intuned-sdk/python/helpers/functions/extract_structured_data for more details. + # Check https://docs.intunedhq.com/automation-sdks/intuned-sdk/python/ai/functions/extract_structured_data for more details. product = await extract_structured_data( source=page, strategy="HTML", diff --git a/python-examples/hyprid-automation/.env.example b/python-examples/hybrid-automation/.env.example similarity index 100% rename from python-examples/hyprid-automation/.env.example rename to python-examples/hybrid-automation/.env.example diff --git a/python-examples/hyprid-automation/.gitignore b/python-examples/hybrid-automation/.gitignore similarity index 100% rename from python-examples/hyprid-automation/.gitignore rename to python-examples/hybrid-automation/.gitignore diff --git a/python-examples/hyprid-automation/.parameters/api/hyprid-crawler/crawl/default.json b/python-examples/hybrid-automation/.parameters/api/crawler/crawl/default.json similarity index 100% rename from python-examples/hyprid-automation/.parameters/api/hyprid-crawler/crawl/default.json rename to python-examples/hybrid-automation/.parameters/api/crawler/crawl/default.json diff --git a/python-examples/hyprid-automation/.parameters/api/hyprid-crawler/crawl/job-posting.json b/python-examples/hybrid-automation/.parameters/api/crawler/crawl/job-posting.json similarity index 100% rename from python-examples/hyprid-automation/.parameters/api/hyprid-crawler/crawl/job-posting.json rename to python-examples/hybrid-automation/.parameters/api/crawler/crawl/job-posting.json diff --git a/python-examples/hyprid-automation/.parameters/api/hyprid-crawler/crawl/not-lever.json b/python-examples/hybrid-automation/.parameters/api/crawler/crawl/not-lever.json similarity index 100% rename from python-examples/hyprid-automation/.parameters/api/hyprid-crawler/crawl/not-lever.json rename to python-examples/hybrid-automation/.parameters/api/crawler/crawl/not-lever.json diff --git a/python-examples/hyprid-automation/.parameters/api/hyprid-rpa/fill-form/default.json b/python-examples/hybrid-automation/.parameters/api/rpa/fill-form/default.json similarity index 100% rename from python-examples/hyprid-automation/.parameters/api/hyprid-rpa/fill-form/default.json rename to python-examples/hybrid-automation/.parameters/api/rpa/fill-form/default.json diff --git a/python-examples/hyprid-automation/.parameters/api/hyprid-scraper/details/default.json b/python-examples/hybrid-automation/.parameters/api/scraper/details/default.json similarity index 100% rename from python-examples/hyprid-automation/.parameters/api/hyprid-scraper/details/default.json rename to python-examples/hybrid-automation/.parameters/api/scraper/details/default.json diff --git a/python-examples/hyprid-automation/.parameters/api/hyprid-scraper/details/example2.json b/python-examples/hybrid-automation/.parameters/api/scraper/details/example2.json similarity index 100% rename from python-examples/hyprid-automation/.parameters/api/hyprid-scraper/details/example2.json rename to python-examples/hybrid-automation/.parameters/api/scraper/details/example2.json diff --git a/python-examples/hyprid-automation/.parameters/api/hyprid-scraper/list/default.json b/python-examples/hybrid-automation/.parameters/api/scraper/list/default.json similarity index 100% rename from python-examples/hyprid-automation/.parameters/api/hyprid-scraper/list/default.json rename to python-examples/hybrid-automation/.parameters/api/scraper/list/default.json diff --git a/python-examples/hybrid-automation/Intuned.jsonc b/python-examples/hybrid-automation/Intuned.jsonc new file mode 100644 index 00000000..81310cf4 --- /dev/null +++ b/python-examples/hybrid-automation/Intuned.jsonc @@ -0,0 +1,34 @@ +// For more information, see our Intuned settings reference +// https://docs.intunedhq.com/docs/05-references/intuned-json +{ + // "workspaceId": "your-workspace-id", // Add your workspace ID here for local development + // "projectName": "your-project-name", // Add your project name here for local development + "apiAccess": { + "enabled": false + }, + "authSessions": { + "enabled": false + }, + "replication": { + "maxConcurrentRequests": 1, + "size": "standard" + }, + "metadata": { + "template": { + "name": "hybrid-automation", + "description": "Hybrid automation combining Intuned Browser SDK with AI-powered tools like Stagehand and extract_structured_data", + "tags": ["hybrid", "ai", "scraping", "rpa", "crawler", "stagehand"] + }, + "defaultRunPlaygroundInput": { + "apiName": "rpa/fill-form", + "parameters": { + "name": "Sarah Williams", + "email": "sarah.w@startup.com", + "phone": "+1-555-7890", + "date": "2025-01-05", + "time": "11:47", + "topic": "api-integration" + } + } + } +} diff --git a/python-examples/hybrid-automation/README.md b/python-examples/hybrid-automation/README.md new file mode 100644 index 00000000..ed00c318 --- /dev/null +++ b/python-examples/hybrid-automation/README.md @@ -0,0 +1,156 @@ +# Hybrid Automation + +Flexible automation combining the [Intuned Browser SDK](https://docs.intunedhq.com/automation-sdks/overview) with AI-powered tools like [Stagehand](https://docs.stagehand.dev/) and `extract_structured_data` for speed, reliability, and adaptability. + +## Key Features + +- **Best of Both Worlds**: Combines fast, reliable SDK automation with AI adaptability +- **Smart Fallbacks**: Uses deterministic methods first, falls back to AI when needed +- **Three Use Cases**: RPA form filling, e-commerce scraping, and job board crawling +- **Production Ready**: Cost-effective primary path with AI safety net for edge cases + +## Why Hybrid? + +| Approach | Pros | Cons | +|----------|------|------| +| **Deterministic (Intuned Browser SDK)** | Fast, reliable, cost-effective | Breaks when site structure changes | +| **AI-Driven (Stagehand, extract_structured_data)** | Adapts to layout changes | Slower, less predictable | +| **Hybrid (This example)** | Best of both worlds | Slightly more complex | + +The hybrid pattern: Use Intuned Browser SDK first (fast path), fall back to AI tools when needed. + +Learn more: [Flexible Automations](https://docs.intunedhq.com/docs/02-features/flexible-automation) + + +## `intuned-browser`: Intuned Browser SDK + +This project uses Intuned browser SDK. For more information, check out the [Intuned Browser SDK documentation](https://docs.intunedhq.com/automation-sdks/overview). + + +## Run on Intuned + +[![Run on Intuned](https://cdn1.intuned.io/button.svg)](https://app.intuned.io?repo=https://github.com/Intuned/cookbook/tree/main/python-examples/hybrid-automation) + +## Getting Started + +To get started developing browser automation projects with Intuned, check out our [Quick Starts Guide](https://docs.intunedhq.com/docs/00-getting-started/quickstarts). + +## Development + +> **_NOTE:_** All commands support `--help` flag to get more information about the command and its arguments and options. + +### Setup + +**Important:** This template uses Intuned's AI gateway for AI-powered features (Stagehand and `extract_structured_data`). The AI gateway requires the project to be saved before running any APIs. + + +To save the project to intuned, you need to set up your Intuned workspace: + +1. **Create a workspace** - Follow the [workspace management guide](https://docs.intunedhq.com/docs/03-how-to/manage/manage-workspace) to create your Intuned workspace + +2. **Get your API key** - Generate an API key from the [API keys page](https://docs.intunedhq.com/docs/03-how-to/manage/manage-api-keys#how-to-manage-api-keys) in your Intuned dashboard + +3. **Configure workspace ID** - Add your workspace ID to `Intuned.jsonc`: + ```jsonc + { + "workspaceId": "your-workspace-id", + "projectName": "your-project-name", // Will be used as the name of this project. + // ... rest of config + } + ``` + +4. **Set environment variable** - Add your API key as an environment variable: + ```bash + export INTUNED_API_KEY=your-api-key + ``` + +### Install dependencies +```bash +uv sync +``` + +After installing dependencies, `intuned` command should be available in your environment. + +### Initialize project + +Run the save command to upload your project and set up the required `.env` file: + +```bash +uv run intuned save +``` + +This will configure your local environment and prepare the AI gateway for running. + +Reference for saving project [here](https://docs.intunedhq.com/docs/02-features/local-development-cli#use-runtime-sdk-and-browser-sdk-helpers) + +### Run an API + +Now you're ready to run the APIs: + +```bash +uv run intuned run api rpa/fill-form .parameters/api/rpa/fill-form/default.json +uv run intuned run api scraper/list .parameters/api/scraper/list/default.json +uv run intuned run api scraper/details .parameters/api/scraper/details/default.json +uv run intuned run api crawler/crawl .parameters/api/crawler/crawl/default.json +uv run intuned run api crawler/crawl .parameters/api/crawler/crawl/job-posting.json +uv run intuned run api crawler/crawl .parameters/api/crawler/crawl/not-lever.json +``` + +### Deploy project +```bash +uv run intuned deploy +``` + + + + +## Project Structure +``` +/ +├── .parameters/ # Test parameters for APIs +│ └── api/ +│ ├── rpa/ +│ │ └── fill-form/ +│ │ └── default.json +│ ├── scraper/ +│ │ ├── list/ +│ │ │ └── default.json +│ │ └── details/ +│ │ ├── default.json +│ │ └── example2.json +│ └── crawler/ +│ └── crawl/ +│ ├── default.json +│ ├── job-posting.json +│ └── not-lever.json +├── api/ # API endpoints +│ ├── rpa/ +│ │ └── fill-form.py # Form filling with Stagehand fallback +│ ├── scraper/ +│ │ ├── list.py # Product list with pagination +│ │ └── details.py # Product details with AI extraction +│ └── crawler/ +│ └── crawl.py # Job board crawler (hybrid extraction) +├── hooks/ +│ └── setup_context.py # CDP URL setup for Stagehand +├── utils/ +│ └── crawler/ # Crawler utilities +├── Intuned.jsonc # Intuned project configuration +└── pyproject.toml # Python project dependencies +``` + +## APIs + +| API | Description | +|-----|-------------| +| `rpa/fill-form` | RPA automation that fills consultation booking forms. Uses Playwright via Intuned Browser SDK for form fields, falls back to `stagehand.page.act()` if selectors fail. Verifies success with Playwright, falls back to `stagehand.page.extract()` | +| `scraper/list` | E-commerce product list scraping. Uses Intuned Browser SDK for pagination and link extraction with AI-powered adaptability | +| `scraper/details` | Product details extraction combining SDK methods with `extract_structured_data` for unstructured fields like descriptions and specifications | +| `crawler/crawl` | Job board crawler that extracts structured job postings. Uses static Playwright extraction for Lever (`jobs.lever.co`), AI extraction with `extract_structured_data` for other boards (Greenhouse, etc.). Extracts title, location, department, team, description, commitment, workplace type | + +## Learn More + +- [Flexible Automations](https://docs.intunedhq.com/docs/02-features/flexible-automation) +- [Intuned Browser SDK](https://docs.intunedhq.com/automation-sdks/overview) +- [Extract Structured Data](https://docs.intunedhq.com/automation-sdks/intuned-sdk/python/ai/functions/extract_structured_data) +- [Stagehand act/extract/observe](https://docs.stagehand.dev/v2/basics/act) diff --git a/python-examples/hyprid-automation/api/hyprid-crawler/crawl.py b/python-examples/hybrid-automation/api/crawler/crawl.py similarity index 100% rename from python-examples/hyprid-automation/api/hyprid-crawler/crawl.py rename to python-examples/hybrid-automation/api/crawler/crawl.py diff --git a/python-examples/hyprid-automation/api/hyprid-rpa/fill-form.py b/python-examples/hybrid-automation/api/rpa/fill-form.py similarity index 86% rename from python-examples/hyprid-automation/api/hyprid-rpa/fill-form.py rename to python-examples/hybrid-automation/api/rpa/fill-form.py index f4aa26f8..ff603200 100644 --- a/python-examples/hyprid-automation/api/hyprid-rpa/fill-form.py +++ b/python-examples/hybrid-automation/api/rpa/fill-form.py @@ -85,7 +85,8 @@ async def automation( print("✓ Filled name with Playwright") except Exception as e: print(f"Playwright failed for name, using Stagehand act: {e}") - await stagehand.act(f'Type "{name}" in the name input field') + stagehand_page = stagehand.page + await stagehand_page.act(f'Type "{name}" in the name input field') print("✓ Filled name with Stagehand act") # Step 2: Fill email field @@ -94,7 +95,8 @@ async def automation( print("✓ Filled email with Playwright") except Exception as e: print(f"Playwright failed for email, using Stagehand act: {e}") - await stagehand.act(f'Type "{email}" in the email input field') + stagehand_page = stagehand.page + await stagehand_page.act(f'Type "{email}" in the email input field') print("✓ Filled email with Stagehand act") # Step 3: Fill phone field @@ -103,7 +105,8 @@ async def automation( print("✓ Filled phone with Playwright") except Exception as e: print(f"Playwright failed for phone, using Stagehand act: {e}") - await stagehand.act(f'Type "{phone}" in the phone input field') + stagehand_page = stagehand.page + await stagehand_page.act(f'Type "{phone}" in the phone input field') print("✓ Filled phone with Stagehand act") # Step 4: Fill date field @@ -112,7 +115,8 @@ async def automation( print("✓ Filled date with Playwright") except Exception as e: print(f"Playwright failed for date, using Stagehand act: {e}") - await stagehand.act(f'Type "{date}" in the date input field') + stagehand_page = stagehand.page + await stagehand_page.act(f'Type "{date}" in the date input field') print("✓ Filled date with Stagehand act") # Step 5: Fill time field @@ -121,7 +125,8 @@ async def automation( print("✓ Filled time with Playwright") except Exception as e: print(f"Playwright failed for time, using Stagehand act: {e}") - await stagehand.act(f'Type "{time}" in the time input field') + stagehand_page = stagehand.page + await stagehand_page.act(f'Type "{time}" in the time input field') print("✓ Filled time with Stagehand act") # Step 6: Select the consultation topic from dropdown @@ -130,7 +135,8 @@ async def automation( print("✓ Selected topic with Playwright") except Exception as e: print(f"Playwright failed for topic selection, using Stagehand act: {e}") - await stagehand.act(f'Select "{topic}" from the topic dropdown') + stagehand_page = stagehand.page + await stagehand_page.act(f'Select "{topic}" from the topic dropdown') print("✓ Selected topic with Stagehand act") # Step 7: Submit the booking form @@ -139,7 +145,8 @@ async def automation( print("✓ Submitted form with Playwright") except Exception as e: print(f"Playwright failed for submit, using Stagehand act: {e}") - await stagehand.act("Click the submit button to submit the booking form") + stagehand_page = stagehand.page + await stagehand_page.act("Click the submit button to submit the booking form") print("✓ Submitted form with Stagehand act") # Step 8: Wait for and verify the success modal diff --git a/python-examples/hyprid-automation/api/hyprid-scraper/details.py b/python-examples/hybrid-automation/api/scraper/details.py similarity index 100% rename from python-examples/hyprid-automation/api/hyprid-scraper/details.py rename to python-examples/hybrid-automation/api/scraper/details.py diff --git a/python-examples/hyprid-automation/api/hyprid-scraper/list.py b/python-examples/hybrid-automation/api/scraper/list.py similarity index 99% rename from python-examples/hyprid-automation/api/hyprid-scraper/list.py rename to python-examples/hybrid-automation/api/scraper/list.py index 367ebd7c..eaeaee0f 100644 --- a/python-examples/hyprid-automation/api/hyprid-scraper/list.py +++ b/python-examples/hybrid-automation/api/scraper/list.py @@ -149,7 +149,7 @@ async def automation( for product in all_products: extend_payload( { - "api": "hyprid-scraper/details", + "api": "hybrid-scraper/details", "parameters": dict(product), } ) diff --git a/python-examples/hyprid-automation/hooks/setup_context.py b/python-examples/hybrid-automation/hooks/setup_context.py similarity index 100% rename from python-examples/hyprid-automation/hooks/setup_context.py rename to python-examples/hybrid-automation/hooks/setup_context.py diff --git a/python-examples/hyprid-automation/pyproject.toml b/python-examples/hybrid-automation/pyproject.toml similarity index 100% rename from python-examples/hyprid-automation/pyproject.toml rename to python-examples/hybrid-automation/pyproject.toml diff --git a/python-examples/hyprid-automation/utils/crawler/__init__.py b/python-examples/hybrid-automation/utils/crawler/__init__.py similarity index 79% rename from python-examples/hyprid-automation/utils/crawler/__init__.py rename to python-examples/hybrid-automation/utils/crawler/__init__.py index ffa5d61b..b769cd2d 100644 --- a/python-examples/hyprid-automation/utils/crawler/__init__.py +++ b/python-examples/hybrid-automation/utils/crawler/__init__.py @@ -1,4 +1,3 @@ -from .content import extract_page_content from .helpers import get_job_run_id, sanitize_key from .links import extract_links, get_base_domain, is_file_url, normalize_url @@ -6,7 +5,6 @@ "extract_links", "normalize_url", "get_base_domain", - "extract_page_content", "is_file_url", "sanitize_key", "get_job_run_id", diff --git a/python-examples/hyprid-automation/utils/crawler/helpers.py b/python-examples/hybrid-automation/utils/crawler/helpers.py similarity index 100% rename from python-examples/hyprid-automation/utils/crawler/helpers.py rename to python-examples/hybrid-automation/utils/crawler/helpers.py diff --git a/python-examples/hyprid-automation/utils/crawler/links.py b/python-examples/hybrid-automation/utils/crawler/links.py similarity index 100% rename from python-examples/hyprid-automation/utils/crawler/links.py rename to python-examples/hybrid-automation/utils/crawler/links.py diff --git a/python-examples/hyprid-automation/Intuned.jsonc b/python-examples/hyprid-automation/Intuned.jsonc deleted file mode 100644 index 0afb019c..00000000 --- a/python-examples/hyprid-automation/Intuned.jsonc +++ /dev/null @@ -1,26 +0,0 @@ -// For more information, see our Intuned settings reference -// https://docs.intunedhq.com/docs/05-references/intuned-json -{ - "projectName": "hyprid-automation", - "apiAccess": { - "enabled": false - }, - "authSessions": { - "enabled": false - }, - "replication": { - "maxConcurrentRequests": 1, - "size": "standard" - }, - "metadata": { - "template": { - "name": "default-template", - "description": "Default Intuned project template for browser automation", - "tags": ["intuned-browser-sdk"] - }, - "defaultRunPlaygroundInput": { - "apiName": "hyprid-rpa/fill-form", - "parameters": {} - } - } -} diff --git a/python-examples/hyprid-automation/README.md b/python-examples/hyprid-automation/README.md deleted file mode 100644 index 302be843..00000000 --- a/python-examples/hyprid-automation/README.md +++ /dev/null @@ -1,90 +0,0 @@ -# Hybrid Automation - -This example demonstrates **hybrid automation** - a flexible approach that combines the [Intuned Browser SDK](https://docs.intunedhq.com/automation-sdks/overview) with AI-powered tools like [Stagehand](https://docs.stagehand.dev/) and `extract_structured_data`. This gives you the speed and reliability of traditional automation with the adaptability of AI when needed. - -## Run on Intuned - -Open this project in Intuned by clicking the button below. - -Run on Intuned - -## Why Hybrid? - -| Approach | Pros | Cons | -|----------|------|------| -| **Deterministic (Intuned Browser SDK)** | Fast, reliable, cost-effective | Breaks when site structure changes | -| **AI-Driven (Stagehand, extract_structured_data)** | Adapts to layout changes | Slower, less predictable | -| **Hybrid (This example)** | Best of both worlds | Slightly more complex | - -The hybrid pattern: Use Intuned Browser SDK first (fast path), fall back to AI tools when needed. - -Learn more: [Flexible Automations](https://docs.intunedhq.com/docs/02-features/flexible-automation) - -## APIs - -### 1. Form Filling (`hyprid-rpa/fill-form`) -RPA automation that fills a consultation booking form: -- Uses Playwright via Intuned Browser SDK for form fields -- Falls back to `stagehand.page.act()` if selector fails -- Verifies success with Playwright, falls back to `stagehand.page.extract()` - -### 2. Product Scraper (`hyprid-scraper/list` & `details`) -E-commerce scraping with AI-powered data extraction: -- `list`: Uses Intuned Browser SDK for pagination and link extraction -- `details`: Combines SDK methods with `extract_structured_data` for unstructured fields - -### 3. Job Board Crawler (`hyprid-crawler/crawl`) -Crawls job boards and extracts structured job postings: -- **Lever** (`jobs.lever.co`): Static Playwright extraction via Intuned Browser SDK -- **Other boards** (Greenhouse, etc.): AI extraction with `extract_structured_data` -- Extracts: title, location, department, team, description, commitment, workplace type - -## Getting Started - -### Install -```bash -uv sync -``` - -### Run APIs - -```bash -# Form filling with AI fallback -uv run intuned run api hyprid-rpa/fill-form .parameters/api/hyprid-rpa/fill-form/default.json - -# Product scraping -uv run intuned run api hyprid-scraper/list .parameters/api/hyprid-scraper/list/default.json -uv run intuned run api hyprid-scraper/details .parameters/api/hyprid-scraper/details/default.json - -# Job board crawling (Lever - static extraction) -uv run intuned run api hyprid-crawler/crawl .parameters/api/hyprid-crawler/crawl/default.json - -# Job board crawling (Greenhouse - AI extraction) -uv run intuned run api hyprid-crawler/crawl .parameters/api/hyprid-crawler/crawl/not-lever.json -``` - -### Deploy -```bash -uv run intuned deploy -``` - -## Project Structure -``` -api/ -├── hyprid-rpa/fill-form.py # Form filling with Stagehand fallback -├── hyprid-scraper/ -│ ├── list.py # Product list with pagination -│ └── details.py # Product details with AI extraction -└── hyprid-crawler/crawl.py # Job board crawler (hybrid extraction) - -hooks/setup_context.py # CDP URL setup for Stagehand -utils/crawler/ # Crawler utilities -.parameters/api/ # Test parameters for each API -``` - -## Learn More - -- [Flexible Automations](https://docs.intunedhq.com/docs/02-features/flexible-automation) -- [Intuned Browser SDK](https://docs.intunedhq.com/automation-sdks/overview) -- [Extract Structured Data](https://docs.intunedhq.com/automation-sdks/intuned-sdk/python/helpers/functions/extract_structured_data) -- [Stagehand act/extract/observe](https://docs.stagehand.dev/v2/basics/act) diff --git a/python-examples/hyprid-automation/utils/crawler/content.py b/python-examples/hyprid-automation/utils/crawler/content.py deleted file mode 100644 index ecbae7d6..00000000 --- a/python-examples/hyprid-automation/utils/crawler/content.py +++ /dev/null @@ -1,20 +0,0 @@ -from intuned_browser import extract_markdown -from intuned_browser.ai import extract_structured_data -from playwright.async_api import Page - - -async def extract_page_content( - page: Page, - schema: dict | None = None, -) -> dict: - if schema: - return await extract_structured_data(source=page, data_schema=schema, model="gpt-5-mini") - - title = await page.title() - markdown = await extract_markdown(page) - - return { - "title": title, - "markdown": markdown, - "markdown_length": len(markdown) if markdown else 0, - } diff --git a/typescript-examples/browser-sdk-showcase/api/ai/extract-structured-data.ts b/typescript-examples/browser-sdk-showcase/api/ai/extract-structured-data.ts index 2244bf72..583b5b2e 100644 --- a/typescript-examples/browser-sdk-showcase/api/ai/extract-structured-data.ts +++ b/typescript-examples/browser-sdk-showcase/api/ai/extract-structured-data.ts @@ -28,7 +28,6 @@ export default async function handler( // Extract from the Page directly using Zod schema. // You can also extract from a specific locator or by passing TextContentItem. - // Check https://docs.intunedhq.com/automation-sdks/intuned-sdk/typescript/helpers/functions/extractStructuredData for more details. const product = await extractStructuredData({ source: page, strategy: "HTML", diff --git a/typescript-examples/hybrid-automation/.parameters/api/hybrid-crawler/crawl/default.json b/typescript-examples/hybrid-automation/.parameters/api/crawler/crawl/default.json similarity index 100% rename from typescript-examples/hybrid-automation/.parameters/api/hybrid-crawler/crawl/default.json rename to typescript-examples/hybrid-automation/.parameters/api/crawler/crawl/default.json diff --git a/typescript-examples/hybrid-automation/.parameters/api/crawler/crawl/job-posting.json b/typescript-examples/hybrid-automation/.parameters/api/crawler/crawl/job-posting.json new file mode 100644 index 00000000..e2766901 --- /dev/null +++ b/typescript-examples/hybrid-automation/.parameters/api/crawler/crawl/job-posting.json @@ -0,0 +1,5 @@ +{ + "url": "https://jobs.lever.co/joshstoysandgames/be458514-55c1-4dbf-b355-13f1fe7d1932", + "depth": 1, + "include_external": false +} \ No newline at end of file diff --git a/typescript-examples/hybrid-automation/.parameters/api/hybrid-crawler/crawl/not-lever.json b/typescript-examples/hybrid-automation/.parameters/api/crawler/crawl/not-lever.json similarity index 100% rename from typescript-examples/hybrid-automation/.parameters/api/hybrid-crawler/crawl/not-lever.json rename to typescript-examples/hybrid-automation/.parameters/api/crawler/crawl/not-lever.json diff --git a/typescript-examples/hybrid-automation/.parameters/api/hybrid-rpa/fill-form/default.json b/typescript-examples/hybrid-automation/.parameters/api/rpa/fill-form/default.json similarity index 100% rename from typescript-examples/hybrid-automation/.parameters/api/hybrid-rpa/fill-form/default.json rename to typescript-examples/hybrid-automation/.parameters/api/rpa/fill-form/default.json diff --git a/typescript-examples/hybrid-automation/.parameters/api/hybrid-scraper/details/default.json b/typescript-examples/hybrid-automation/.parameters/api/scraper/details/default.json similarity index 100% rename from typescript-examples/hybrid-automation/.parameters/api/hybrid-scraper/details/default.json rename to typescript-examples/hybrid-automation/.parameters/api/scraper/details/default.json diff --git a/typescript-examples/hybrid-automation/.parameters/api/hybrid-scraper/list/default.json b/typescript-examples/hybrid-automation/.parameters/api/scraper/list/default.json similarity index 100% rename from typescript-examples/hybrid-automation/.parameters/api/hybrid-scraper/list/default.json rename to typescript-examples/hybrid-automation/.parameters/api/scraper/list/default.json diff --git a/typescript-examples/hybrid-automation/Intuned.jsonc b/typescript-examples/hybrid-automation/Intuned.jsonc index d4b53580..5d49ba34 100644 --- a/typescript-examples/hybrid-automation/Intuned.jsonc +++ b/typescript-examples/hybrid-automation/Intuned.jsonc @@ -1,8 +1,8 @@ // For more information, see our Intuned settings reference // https://docs.intunedhq.com/docs/05-references/intuned-json { - - "projectName": "hybrid-automation-ts", + // "workspaceId": "your-workspace-id", // Add your workspace ID here for local development + // "projectName": "your-project-name", // Add your project name here for local development "apiAccess": { "enabled": false }, @@ -15,20 +15,21 @@ }, "metadata": { "template": { - "name": "hybrid-automation-ts", - "description": "Hybrid automation combining Playwright with Stagehand AI fallback for resilient browser automation", - "tags": ["stagehand", "ai", "rpa", "scraping", "flexible-automation"] + "name": "hybrid-automation", + "description": "Hybrid automation combining Intuned Browser SDK with AI-powered tools like Stagehand and extractStructuredData", + "tags": ["hybrid", "ai", "scraping", "rpa", "crawler", "stagehand"] }, "defaultRunPlaygroundInput": { - "apiName": "hybrid-rpa/fill-form", + "apiName": "rpa/fill-form", "parameters": { - "name": "John Doe", - "email": "john@example.com", - "phone": "+1-555-1234", - "date": "2025-01-15", - "time": "10:00", - "topic": "automation" + "name": "Sarah Williams", + "email": "sarah.w@startup.com", + "phone": "+1-555-7890", + "date": "2025-01-05", + "time": "11:30", + "topic": "api-integration" } + } } } diff --git a/typescript-examples/hybrid-automation/README.md b/typescript-examples/hybrid-automation/README.md index 86cb6599..188e7c98 100644 --- a/typescript-examples/hybrid-automation/README.md +++ b/typescript-examples/hybrid-automation/README.md @@ -1,12 +1,13 @@ # Hybrid Automation -This example demonstrates **hybrid automation** - a flexible approach that combines the [Intuned Browser SDK](https://docs.intunedhq.com/automation-sdks/overview) (`@intuned/browser`) with AI-powered tools like [Stagehand](https://docs.stagehand.dev/) and `extractStructuredData`. This gives you the speed and reliability of traditional automation with the adaptability of AI when needed. +Flexible automation combining the [Intuned Browser SDK](https://docs.intunedhq.com/automation-sdks/overview) with AI-powered tools like [Stagehand](https://docs.stagehand.dev/) and `extractStructuredData` for speed, reliability, and adaptability. -## Run on Intuned - -Open this project in Intuned by clicking the button below. +## Key Features -Run on Intuned +- **Best of Both Worlds**: Combines fast, reliable SDK automation with AI adaptability +- **Smart Fallbacks**: Uses deterministic methods first, falls back to AI when needed +- **Three Use Cases**: RPA form filling, e-commerce scraping, and job board crawling +- **Production Ready**: Cost-effective primary path with AI safety net for edge cases ## Why Hybrid? @@ -20,71 +21,137 @@ The hybrid pattern: Use Intuned Browser SDK first (fast path), fall back to AI t Learn more: [Flexible Automations](https://docs.intunedhq.com/docs/02-features/flexible-automation) -## APIs -### 1. Form Filling (`hybrid-rpa/fill-form`) -RPA automation that fills a consultation booking form: -- Uses Playwright via Intuned Browser SDK for form fields -- Falls back to `stagehand.page.act()` if selector fails -- Verifies success with Playwright, falls back to `stagehand.page.extract()` +## `@intuned/browser`: Intuned Browser SDK -### 2. Product Scraper (`hybrid-scraper/list` & `details`) -E-commerce scraping with AI-powered data extraction: -- `list`: Uses Intuned Browser SDK for pagination and link extraction -- `details`: Combines SDK methods with `extractStructuredData` for unstructured fields +This project uses Intuned browser SDK. For more information, check out the [Intuned Browser SDK documentation](https://docs.intunedhq.com/automation-sdks/overview). -### 3. Job Board Crawler (`hybrid-crawler/crawl`) -Crawls job boards and extracts structured job postings: -- **Lever** (`jobs.lever.co`): Static Playwright extraction via Intuned Browser SDK -- **Other boards** (Greenhouse, etc.): AI extraction with `extractStructuredData` -- Extracts: title, location, department, team, description, commitment, workplace type + +## Run on Intuned + +[![Run on Intuned](https://cdn1.intuned.io/button.svg)](https://app.intuned.io?repo=https://github.com/Intuned/cookbook/tree/main/typescript-examples/hybrid-automation) ## Getting Started -### Install +To get started developing browser automation projects with Intuned, check out our [Quick Starts Guide](https://docs.intunedhq.com/docs/00-getting-started/quickstarts). + +## Development + +> **_NOTE:_** All commands support `--help` flag to get more information about the command and its arguments and options. + +### Setup + +**Important:** This template uses Intuned's AI gateway for AI-powered features (Stagehand and `extractStructuredData`). The AI gateway requires the project to be saved before running any APIs. + + +To save the project to intuned, you need to set up your Intuned workspace: + +1. **Create a workspace** - Follow the [workspace management guide](https://docs.intunedhq.com/docs/03-how-to/manage/manage-workspace) to create your Intuned workspace + +2. **Get your API key** - Generate an API key from the [API keys page](https://docs.intunedhq.com/docs/03-how-to/manage/manage-api-keys#how-to-manage-api-keys) in your Intuned dashboard + +3. **Configure workspace ID** - Add your workspace ID and Project Name to `Intuned.jsonc`: + ```jsonc + { + "workspaceId": "your-workspace-id", + "projectName": "your-project-name" // Will be used as the name of this project. + // ... rest of config + } + ``` + +4. **Set environment variable** - Add your API key as an environment variable: + ```bash + export INTUNED_API_KEY=your-api-key + ``` + +### Install dependencies ```bash npm install ``` -### Run APIs +After installing dependencies, `intuned` command should be available in your environment. + +### Initialize project + +Run the save command to upload your project and set up the required `.env` file: ```bash -# Form filling with AI fallback -npx intuned run api hybrid-rpa/fill-form .parameters/api/hybrid-rpa/fill-form/default.json +npx intuned save +``` + +Reference for saving project [here](https://docs.intunedhq.com/docs/02-features/local-development-cli#use-runtime-sdk-and-browser-sdk-helpers) + -# Product scraping -npx intuned run api hybrid-scraper/list .parameters/api/hybrid-scraper/list/default.json -npx intuned run api hybrid-scraper/details .parameters/api/hybrid-scraper/details/default.json +This will configure your local environment and prepare the AI gateway for running. -# Job board crawling (Lever - static extraction) -npx intuned run api hybrid-crawler/crawl .parameters/api/hybrid-crawler/crawl/default.json +### Run an API -# Job board crawling (Greenhouse - AI extraction) -npx intuned run api hybrid-crawler/crawl .parameters/api/hybrid-crawler/crawl/not-lever.json +Now you're ready to run the APIs: + +```bash +npx intuned run api rpa/fill-form .parameters/api/rpa/fill-form/default.json +npx intuned run api scraper/list .parameters/api/scraper/list/default.json +npx intuned run api scraper/details .parameters/api/scraper/details/default.json +npx intuned run api crawler/crawl .parameters/api/crawler/crawl/default.json +npx intuned run api crawler/crawl .parameters/api/crawler/crawl/job-posting.json +npx intuned run api crawler/crawl .parameters/api/crawler/crawl/not-lever.json ``` -### Deploy +### Deploy project ```bash npx intuned deploy ``` + + ## Project Structure + ``` -api/ -├── hybrid-rpa/fill-form.ts # Form filling with Stagehand fallback -├── hybrid-scraper/ -│ ├── list.ts # Product list with pagination -│ └── details.ts # Product details with AI extraction -└── hybrid-crawler/crawl.ts # Job board crawler (hybrid extraction) - -hooks/setupContext.ts # CDP URL setup for Stagehand -utils/crawler/ # Crawler utilities -.parameters/api/ # Test parameters for each API +/ +├── .parameters/ # Test parameters for APIs +│ └── api/ +│ ├── rpa/ +│ │ └── fill-form/ +│ │ └── default.json +│ ├── scraper/ +│ │ ├── list/ +│ │ │ └── default.json +│ │ └── details/ +│ │ └── default.json +│ └── crawler/ +│ └── crawl/ +│ ├── default.json + └── job-posting.json +│ └── not-lever.json +├── api/ # API endpoints +│ ├── rpa/ +│ │ └── fill-form.ts # Form filling with Stagehand fallback +│ ├── scraper/ +│ │ ├── list.ts # Product list with pagination +│ │ └── details.ts # Product details with AI extraction +│ └── crawler/ +│ └── crawl.ts # Job board crawler (hybrid extraction) +├── hooks/ +│ └── setupContext.ts # CDP URL setup for Stagehand +├── utils/ +│ └── crawler/ # Crawler utilities +├── Intuned.jsonc # Intuned project configuration +├── package.json # Node.js project dependencies +└── tsconfig.json # TypeScript configuration ``` +## APIs + +| API | Description | +|-----|-------------| +| `rpa/fill-form` | RPA automation that fills consultation booking forms. Uses Playwright via Intuned Browser SDK for form fields, falls back to `stagehand.page.act()` if selectors fail. Verifies success with Playwright, falls back to `stagehand.page.extract()` | +| `scraper/list` | E-commerce product list scraping. Uses Intuned Browser SDK for pagination and link extraction with AI-powered adaptability | +| `scraper/details` | Product details extraction combining SDK methods with `extractStructuredData` for unstructured fields like descriptions and specifications | +| `crawler/crawl` | Job board crawler that extracts structured job postings. Uses static Playwright extraction for Lever (`jobs.lever.co`), AI extraction with `extractStructuredData` for other boards (Greenhouse, etc.). Extracts title, location, department, team, description, commitment, workplace type | + ## Learn More - [Flexible Automations](https://docs.intunedhq.com/docs/02-features/flexible-automation) - [Intuned Browser SDK](https://docs.intunedhq.com/automation-sdks/overview) -- [Extract Structured Data](https://docs.intunedhq.com/automation-sdks/intuned-sdk/typescript/helpers/functions/extractStructuredData) +- [Extract Structured Data](https://docs.intunedhq.com/automation-sdks/intuned-sdk/typescript/ai/functions/extractStructuredData) - [Stagehand act/extract/observe](https://docs.stagehand.dev/v2/basics/act) diff --git a/typescript-examples/hybrid-automation/api/hybrid-crawler/crawl.ts b/typescript-examples/hybrid-automation/api/crawler/crawl.ts similarity index 100% rename from typescript-examples/hybrid-automation/api/hybrid-crawler/crawl.ts rename to typescript-examples/hybrid-automation/api/crawler/crawl.ts diff --git a/typescript-examples/hybrid-automation/api/hybrid-rpa/fill-form.ts b/typescript-examples/hybrid-automation/api/rpa/fill-form.ts similarity index 100% rename from typescript-examples/hybrid-automation/api/hybrid-rpa/fill-form.ts rename to typescript-examples/hybrid-automation/api/rpa/fill-form.ts diff --git a/typescript-examples/hybrid-automation/api/hybrid-scraper/details.ts b/typescript-examples/hybrid-automation/api/scraper/details.ts similarity index 100% rename from typescript-examples/hybrid-automation/api/hybrid-scraper/details.ts rename to typescript-examples/hybrid-automation/api/scraper/details.ts diff --git a/typescript-examples/hybrid-automation/api/hybrid-scraper/list.ts b/typescript-examples/hybrid-automation/api/scraper/list.ts similarity index 100% rename from typescript-examples/hybrid-automation/api/hybrid-scraper/list.ts rename to typescript-examples/hybrid-automation/api/scraper/list.ts diff --git a/typescript-examples/hybrid-automation/utils/crawler/content.ts b/typescript-examples/hybrid-automation/utils/crawler/content.ts deleted file mode 100644 index 46fc8cd5..00000000 --- a/typescript-examples/hybrid-automation/utils/crawler/content.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { Page } from "playwright"; -import { extractMarkdown } from "@intuned/browser"; -import { extractStructuredData } from "@intuned/browser/ai"; - -export async function extractPageContent( - page: Page, - schema?: any -): Promise { - if (schema) { - return await extractStructuredData({ - source: page, - dataSchema: schema, - model: "gpt-5-mini", - }); - } - - const title = await page.title(); - const markdown = await extractMarkdown({ source: page }); - - return { - title, - markdown, - markdown_length: markdown?.length || 0, - }; -} diff --git a/typescript-examples/hybrid-automation/utils/crawler/index.ts b/typescript-examples/hybrid-automation/utils/crawler/index.ts index 7f17eb9b..0ad7544e 100644 --- a/typescript-examples/hybrid-automation/utils/crawler/index.ts +++ b/typescript-examples/hybrid-automation/utils/crawler/index.ts @@ -1,3 +1,2 @@ export { extractLinks, normalizeUrl, getBaseDomain, isFileUrl } from "./links"; -export { extractPageContent } from "./content"; export { sanitizeKey, getJobRunId } from "./helpers";