From 6b77176bd111f93089e0431a44e57ffdb0e621a5 Mon Sep 17 00:00:00 2001 From: jmanhype Date: Thu, 6 Mar 2025 13:23:25 -0600 Subject: [PATCH] Add proxy-lite and instructor_ex integration with improved answer extraction --- .gitmodules | 1 + INTEGRATION_README.md | 189 ++++++++++++++++++++++++++++ example_usage.exs | 98 +++++++++++++++ proxy_client.ex | 91 ++++++++++++++ proxy_instructor.ex | 275 +++++++++++++++++++++++++++++++++++++++++ proxy_schemas.ex | 142 +++++++++++++++++++++ proxy_service.py | 214 ++++++++++++++++++++++++++++++++ setup.sh | 57 +++++++++ start_llama_server.sh | 21 ++++ start_proxy_service.sh | 13 ++ 10 files changed, 1101 insertions(+) create mode 100644 .gitmodules create mode 100644 INTEGRATION_README.md create mode 100755 example_usage.exs create mode 100644 proxy_client.ex create mode 100644 proxy_instructor.ex create mode 100644 proxy_schemas.ex create mode 100644 proxy_service.py create mode 100755 setup.sh create mode 100755 start_llama_server.sh create mode 100755 start_proxy_service.sh diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/.gitmodules @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/INTEGRATION_README.md b/INTEGRATION_README.md new file mode 100644 index 0000000..13f2a83 --- /dev/null +++ b/INTEGRATION_README.md @@ -0,0 +1,189 @@ +# Proxy-lite + Instructor_ex Integration + +This repository contains an integration between `proxy-lite` (a FastAPI-based web automation service) and `instructor_ex` (an Elixir port of the Instructor library for structured data extraction). + +## Overview + +The integration allows for: +1. Web automation tasks using Playwright via the proxy-lite service +2. Structured data extraction from web content using instructor_ex and a local LLM +3. Composable functions that combine these capabilities for various use cases + +## Components + +- **proxy_service.py**: A FastAPI service that provides web automation functionality +- **proxy_client.ex**: An Elixir client for interacting with the proxy-lite service +- **proxy_schemas.ex**: Ecto schemas for structured data extraction +- **proxy_instructor.ex**: Integration logic combining proxy-lite and instructor_ex + +## Setup + +### Prerequisites + +- Python 3.10+ with FastAPI and Playwright +- Elixir 1.14+ +- A running llama.cpp server with a compatible model + +### Starting the Services + +1. Start the llama.cpp server: + ``` + ./start_llama_server.sh /path/to/model.gguf + ``` + +2. Start the proxy-lite service: + ``` + ./start_proxy_service.sh + ``` + +## Usage + +Run the example script to see the integration in action: + +``` +./example_usage.exs +``` + +The script demonstrates searching for weather information and extracting structured data from the results. + +## Key Features + +- **Task Automation**: Perform complex web tasks like searching, navigation, and data extraction +- **Structured Data Extraction**: Extract typed data from web content using LLM capabilities +- **Error Handling**: Robust handling of timeouts and failures during web automation +- **Configurable Options**: Control browser visibility, timeout duration, and more + +## Implementation Details + +The integration follows these steps: +1. proxy_client.ex sends a task request to the proxy-lite service +2. proxy_service.py executes the task using Playwright +3. The results (including HTML and screenshots) are returned to the client +4. proxy_instructor.ex uses instructor_ex to extract structured data +5. The final results are presented with complete details and formatting + +## Recent Improvements + +- Enhanced error handling and timeout management +- Improved extraction of complete answers from automation results +- Better prompt engineering for the LLM to ensure complete information +- Fixed variable warnings and improved code organization + +## Architecture + +The integration uses a service-oriented architecture: + +- **Python Service**: A FastAPI server that exposes proxy-lite functionality +- **Elixir Client**: Modules for instructor_ex to communicate with the service +- **Ecto Schemas**: Well-defined structures for extracting data from web pages + +## Prerequisites + +- Python 3.11+ +- Elixir 1.14+ +- llama.cpp installed with a compatible model +- Playwright installed + +## Setup Instructions + +### 1. Set Up llama.cpp + +First, make sure you have llama.cpp installed and running: + +```bash +# Install llama.cpp +brew install llama.cpp + +# Start the server with a suitable model +llama-server --port 8080 -ngl 999 -hf Qwen/Qwen2.5-7B-Instruct-GGUF +``` + +### 2. Set Up the proxy-lite Service + +Make the service script executable and start it: + +```bash +chmod +x start_proxy_service.sh +./start_proxy_service.sh +``` + +This will: +- Install necessary Python dependencies +- Start the proxy-lite service on port 8000 + +### 3. Run the Example Script + +Make the example script executable and run it: + +```bash +chmod +x example_usage.exs +./example_usage.exs +``` + +## Using the Integration in Your Code + +### Simple Usage Example + +```elixir +# Configure your LLM adapter +config = [ + adapter: Instructor.Adapters.Llamacpp, + api_url: "http://localhost:8080" +] + +# Perform a web search and get structured results +{:ok, results} = ProxyInstructor.search_web("Best restaurants in San Francisco") + +# Extract entities from a specific webpage +{:ok, page_data} = ProxyInstructor.extract_page_info("https://en.wikipedia.org/wiki/San_Francisco") + +# Perform a more complex task with automated browsing +{:ok, summary} = ProxyInstructor.perform_task("Find the current weather in New York City") +``` + +## Integration Files + +- `proxy_service.py`: FastAPI service for proxy-lite +- `start_proxy_service.sh`: Script to start the service +- `proxy_client.ex`: Elixir client for the service API +- `proxy_schemas.ex`: Ecto schemas for structured data +- `proxy_instructor.ex`: Main integration module +- `example_usage.exs`: Example script + +## Best Practices for Production Use + +1. **Resource Management**: Properly manage browser instances +2. **Error Handling**: Implement retries and timeouts +3. **Security**: Be careful with user-provided inputs +4. **Performance**: Consider caching results for repeated queries +5. **Isolation**: Run the Python service in a container + +## Customization + +You can customize this integration by: + +1. Extending the schemas in `proxy_schemas.ex` +2. Adding new functions to `proxy_instructor.ex` +3. Configuring different models for llama.cpp +4. Modifying the proxy-lite configuration + +## Troubleshooting + +### Service Not Starting + +- Check if the required ports (8000 for the service, 8080 for llama.cpp) are available +- Ensure all dependencies are installed correctly + +### Browser Issues + +- If you encounter "browser executable not found" errors, run `playwright install` manually +- For headless mode issues on Mac, try running with `headless: false` + +### Model Performance + +- For better results, use larger models with llama.cpp (if your hardware supports it) +- Balance between model size and performance based on your needs + +## License + +This integration is provided under the MIT license. \ No newline at end of file diff --git a/example_usage.exs b/example_usage.exs new file mode 100755 index 0000000..43c66c0 --- /dev/null +++ b/example_usage.exs @@ -0,0 +1,98 @@ +#!/usr/bin/env elixir + +# First, ensure dependencies are available +Mix.install([ + {:instructor, path: "instructor_ex"}, + {:httpoison, "~> 2.0"}, + {:jason, "~> 1.4"}, + {:ecto, "~> 3.12"} +]) + +# Load our integration modules +Code.require_file("proxy_client.ex") +Code.require_file("proxy_schemas.ex") +Code.require_file("proxy_instructor.ex") + +IO.puts("========================================================") +IO.puts(" Proxy-lite + instructor_ex Integration Example") +IO.puts("========================================================") + +# Check if the llama.cpp server is running +llama_server_url = "http://localhost:8090" + +llama_server_running = try do + HTTPoison.get!(llama_server_url <> "/v1/models", [], [recv_timeout: 5000]) + true +rescue + _ -> false +end + +unless llama_server_running do + IO.puts("\nāŒ ERROR: llama.cpp server is not running at #{llama_server_url}") + IO.puts("Please start the server with:") + IO.puts(" llama-server --port 8090 -ngl 1 -m /Users/speed/Library/Caches/llama.cpp/Qwen_Qwen2.5-7B-Instruct-GGUF_qwen2.5-7b-instruct-q2_k.gguf") + System.halt(1) +end + +# Check if the proxy-lite service is running +proxy_service_running = ProxyClient.available?() + +unless proxy_service_running do + IO.puts("\nāŒ ERROR: proxy-lite service is not running") + IO.puts("Please start it with:") + IO.puts(" ./start_proxy_service.sh") + System.halt(1) +end + +IO.puts("\nāœ… Services are running!") + +# Define some example tasks +tasks = [ + "Find the current weather in San Francisco", + "Search for the top 3 Italian restaurants in New York City", + "Get the latest news about artificial intelligence" +] + +# Ask the user to select a task +IO.puts("\nPlease select a task to perform:") + +Enum.with_index(tasks, 1) +|> Enum.each(fn {task, index} -> + IO.puts(" #{index}. #{task}") +end) + +IO.puts(" #{length(tasks) + 1}. Custom task (enter your own)") + +selected_task_input = IO.gets("\nEnter task number: ") |> String.trim() +{selected_index, _} = Integer.parse(selected_task_input) + +task = if selected_index <= length(tasks) do + Enum.at(tasks, selected_index - 1) +else + IO.puts("\nEnter your custom task:") + IO.gets("") |> String.trim() +end + +# Configure options +headless = false +IO.puts("\nRunning with task: \"#{task}\"") +IO.puts(if headless, do: "Browser will run in headless mode", else: "Browser will be visible") + +# Execute the task +IO.puts("\nšŸš€ Executing task...") + +case ProxyInstructor.perform_task(task, [headless: headless]) do + {:ok, result} -> + IO.puts("\nāœ… Task completed!") + IO.puts("\nSummary: #{result.summary}") + + if result.answer && result.answer != "" do + IO.puts("\nAnswer: #{result.answer}") + end + + steps = result.steps_taken || 0 + IO.puts("\nSteps taken: #{steps}") + + {:error, reason} -> + IO.puts("\nāŒ ERROR: #{reason}") +end \ No newline at end of file diff --git a/proxy_client.ex b/proxy_client.ex new file mode 100644 index 0000000..77aaf8f --- /dev/null +++ b/proxy_client.ex @@ -0,0 +1,91 @@ +defmodule ProxyClient do + @moduledoc """ + Client for interacting with the proxy-lite service. + + This module provides functions to send requests to the proxy-lite service + which handles web automation via Playwright. + """ + + @proxy_service_url "http://localhost:8001" + + @doc """ + Performs a web automation task using proxy-lite and returns the results. + + ## Parameters + + - `query`: The task to perform (e.g., "Find the weather in New York") + - `opts`: Additional options for the task + - `:homepage` - The starting URL (default: "https://www.google.com") + - `:headless` - Whether to run in headless mode (default: false) + - `:include_html` - Whether to include HTML content in response (default: true) + - `:annotate_image` - Whether to annotate screenshots (default: true) + - `:max_steps` - Maximum number of steps to take (default: 50) + + ## Returns + + {:ok, result} on success, {:error, reason} on failure + + ## Examples + + iex> ProxyClient.run_task("Find the top 3 Italian restaurants in San Francisco") + {:ok, %{ + "status" => "success", + "screenshots" => [...], + "html_content" => "...", + "results" => %{...} + }} + """ + def run_task(query, opts \\ []) do + # Get options with defaults + homepage = Keyword.get(opts, :homepage, "https://www.google.com") + headless = Keyword.get(opts, :headless, false) + include_html = Keyword.get(opts, :include_html, true) + annotate_image = Keyword.get(opts, :annotate_image, true) + max_steps = Keyword.get(opts, :max_steps, 50) + + # Prepare the request body + body = %{ + query: query, + homepage: homepage, + headless: headless, + include_html: include_html, + annotate_image: annotate_image, + max_steps: max_steps + } + + # Make the HTTP request + case HTTPoison.post( + "#{@proxy_service_url}/run", + Jason.encode!(body), + [{"Content-Type", "application/json"}], + recv_timeout: 300_000 # 5-minute timeout + ) do + {:ok, %HTTPoison.Response{status_code: 200, body: response_body}} -> + # Parse the response + case Jason.decode(response_body) do + {:ok, parsed} -> {:ok, parsed} + {:error, _} = error -> error + end + + {:ok, %HTTPoison.Response{status_code: code, body: body}} -> + {:error, "HTTP Error #{code}: #{body}"} + + {:error, %HTTPoison.Error{reason: reason}} -> + {:error, "HTTP Request Failed: #{inspect(reason)}"} + end + end + + @doc """ + Checks if the proxy-lite service is available. + + ## Returns + + true if the service is available, false otherwise + """ + def available? do + case HTTPoison.get("#{@proxy_service_url}/") do + {:ok, %HTTPoison.Response{status_code: 200}} -> true + _ -> false + end + end +end \ No newline at end of file diff --git a/proxy_instructor.ex b/proxy_instructor.ex new file mode 100644 index 0000000..c525be3 --- /dev/null +++ b/proxy_instructor.ex @@ -0,0 +1,275 @@ +defmodule ProxyInstructor do + @moduledoc """ + Integration between proxy-lite and instructor_ex. + + This module provides functions that combine proxy-lite web automation + with instructor_ex structured data extraction using llama.cpp. + """ + + alias ProxySchemas.WebPageInfo + alias ProxySchemas.WebSearchResult + alias ProxySchemas.DataExtraction + alias ProxySchemas.WebAutomationSummary + + @doc """ + Performs a web search and returns structured results. + + This function uses proxy-lite to perform web automation and then + processes the results using instructor_ex and llama.cpp. + + ## Parameters + + - `query`: The search query to perform + - `opts`: Options for the task (see `ProxyClient.run_task/2`) + + ## Returns + + {:ok, result} on success, {:error, reason} on failure + """ + def search_web(query, opts \\ []) do + with {:ok, result} <- ProxyClient.run_task(query, opts), + true <- result["status"] == "success" do + + # Extract relevant data from the result + html_content = result["html_content"] || "" + _screenshots = result["screenshots"] || [] + + # Configure instructor to use llama.cpp + config = [ + adapter: Instructor.Adapters.Llamacpp, + api_url: "http://localhost:8090" # Updated from 8080 to 8090 + ] + + # Generate a summary of the search results + Instructor.chat_completion( + [ + response_model: WebSearchResult, + mode: :json_schema, + messages: [ + %{ + role: "user", + content: """ + I performed a web search for: #{query} + + The HTML content of the last page is: + + #{html_content |> String.slice(0, 8000)} + + Based on this content, extract structured search results. + """ + } + ] + ], + config + ) + else + {:error, _reason} = error -> error + _ -> {:error, "Failed to get successful response from proxy-lite"} + end + end + + @doc """ + Extracts entities and information from a web page. + + ## Parameters + + - `url`: The URL to visit and extract information from + - `opts`: Options for the task (see `ProxyClient.run_task/2`) + + ## Returns + + {:ok, result} on success, {:error, reason} on failure + """ + def extract_page_info(url, opts \\ []) do + # Merge options with default homepage + opts = Keyword.merge([homepage: url], opts) + + with {:ok, result} <- ProxyClient.run_task("Extract information from this page", opts), + true <- result["status"] == "success" do + + # Extract relevant data from the result + html_content = result["html_content"] || "" + + # Configure instructor to use llama.cpp + config = [ + adapter: Instructor.Adapters.Llamacpp, + api_url: "http://localhost:8090" # Updated from 8080 to 8090 + ] + + # Extract information from the page + Instructor.chat_completion( + [ + response_model: WebPageInfo, + mode: :json_schema, + messages: [ + %{ + role: "user", + content: """ + Extract structured information from the following web page HTML: + + #{html_content |> String.slice(0, 8000)} + + Please identify the title, content, and key elements of this page. + """ + } + ] + ], + config + ) + else + {:error, _reason} = error -> error + _ -> {:error, "Failed to get successful response from proxy-lite"} + end + end + + @doc """ + Performs a task with browser automation and summarizes the results. + + ## Parameters + + - `task`: The task to perform (e.g., "Find the weather forecast for San Francisco") + - `opts`: Options for the task (see `ProxyClient.run_task/2`) + + ## Returns + + {:ok, summary} on success, {:error, reason} on failure + """ + def perform_task(task, opts \\ []) do + with {:ok, result} <- ProxyClient.run_task(task, opts), + true <- result["status"] == "success" do + + # Extract relevant data from the result + html_content = result["html_content"] || "" + steps = result["results"]["steps"] || [] + steps_count = length(steps) + screenshots = result["screenshots"] || [] + has_screenshots = length(screenshots) > 0 + + # Extract the final answer if available + final_answer = cond do + # Try to get from the output field + is_map(result["results"]) && is_binary(result["results"]["output"]) -> + result["results"]["output"] + + # Try to get from the last step's output + is_list(steps) && length(steps) > 0 -> + last_step = List.last(steps) + if is_map(last_step) && is_binary(last_step["output"]), do: last_step["output"], else: nil + + # Try to get from the task_output field if it exists + is_map(result["results"]) && is_binary(result["results"]["task_output"]) -> + result["results"]["task_output"] + + # No final answer found + true -> nil + end + + # Prepare step descriptions for the summary + step_descriptions = steps + |> Enum.map(fn step -> + if Map.has_key?(step, "thinking"), do: step["thinking"], else: "" + end) + |> Enum.filter(fn s -> s != "" end) + |> Enum.join("\n") + + # Configure instructor to use llama.cpp + config = [ + adapter: Instructor.Adapters.Llamacpp, + api_url: "http://localhost:8090" # Updated from 8080 to 8090 + ] + + # Summarize the task execution + Instructor.chat_completion( + [ + response_model: WebAutomationSummary, + mode: :json_schema, + messages: [ + %{ + role: "user", + content: """ + I performed the following task using web automation: "#{task}" + + The automation performed #{steps_count} steps, with the following thinking: + + #{step_descriptions} + + The final page HTML content was: + + #{html_content |> String.slice(0, 6000)} + + #{if final_answer, do: "The final answer from the automation was: #{final_answer}", else: ""} + #{if has_screenshots, do: "The automation captured #{length(screenshots)} screenshots during the process.", else: ""} + + Please provide a detailed summary of what was accomplished. + In the 'answer' field, provide the COMPLETE answer to the original task. + If the task was to find information (like weather, news, etc.), include ALL the relevant details in the answer. + """ + } + ] + ], + config + ) + else + {:error, _reason} = error -> error + _ -> {:error, "Failed to get successful response from proxy-lite"} + end + end + + @doc """ + Extract specific types of entities from a web page. + + ## Parameters + + - `url`: The URL to visit + - `entity_types`: List of entity types to extract (e.g., ["people", "organizations"]) + - `opts`: Options for the task (see `ProxyClient.run_task/2`) + + ## Returns + + {:ok, entities} on success, {:error, reason} on failure + """ + def extract_entities(url, entity_types \\ [], opts \\ []) do + # Merge options with default homepage + opts = Keyword.merge([homepage: url], opts) + + entity_str = Enum.join(entity_types, ", ") + task = if entity_str == "", do: "Extract all entities from this page", else: "Extract #{entity_str} from this page" + + with {:ok, result} <- ProxyClient.run_task(task, opts), + true <- result["status"] == "success" do + + # Extract relevant data from the result + html_content = result["html_content"] || "" + + # Configure instructor to use llama.cpp + config = [ + adapter: Instructor.Adapters.Llamacpp, + api_url: "http://localhost:8090" # Updated from 8080 to 8090 + ] + + # Extract entities from the page + Instructor.chat_completion( + [ + response_model: DataExtraction, + mode: :json_schema, + messages: [ + %{ + role: "user", + content: """ + Extract entities from the following web page HTML: + + #{html_content |> String.slice(0, 8000)} + + Please identify people, organizations, locations, dates, prices, and any other important information. + """ + } + ] + ], + config + ) + else + {:error, _reason} = error -> error + _ -> {:error, "Failed to get successful response from proxy-lite"} + end + end +end \ No newline at end of file diff --git a/proxy_schemas.ex b/proxy_schemas.ex new file mode 100644 index 0000000..05b31f8 --- /dev/null +++ b/proxy_schemas.ex @@ -0,0 +1,142 @@ +defmodule ProxySchemas do + @moduledoc """ + Schemas for processing proxy-lite results with instructor_ex. + + This module defines Ecto schemas for structuring data from proxy-lite + for use with instructor_ex and the local llama.cpp model. + """ + + defmodule WebPageInfo do + @moduledoc """ + Schema for structured web page information. + """ + use Ecto.Schema + use Instructor + + @llm_doc """ + Schema for web page information extracted from HTML and screenshots. + + ## Field Descriptions: + - title: The title of the web page + - url: The URL of the web page + - main_content: The main content of the web page + - key_elements: A list of key UI elements found on the page + - identified_data: Structured data extracted from the page content + """ + @primary_key false + embedded_schema do + field(:title, :string) + field(:url, :string) + field(:main_content, :string) + field(:key_elements, {:array, :string}) + field(:identified_data, :map) + end + + @impl true + def validate_changeset(changeset) do + changeset + |> Ecto.Changeset.validate_required([:title, :url, :main_content]) + end + end + + defmodule WebSearchResult do + @moduledoc """ + Schema for search results from web automation. + """ + use Ecto.Schema + use Instructor + + @llm_doc """ + Schema for web search results extracted from proxy-lite automation. + + ## Field Descriptions: + - query: The original search query + - results: A list of search result items with titles and descriptions + - summary: A concise summary of the search results + - top_result: The most relevant result from the search + """ + @primary_key false + embedded_schema do + field(:query, :string) + field(:results, {:array, :map}) + field(:summary, :string) + field(:top_result, :string) + end + + @impl true + def validate_changeset(changeset) do + changeset + |> Ecto.Changeset.validate_required([:query, :summary]) + end + end + + defmodule DataExtraction do + @moduledoc """ + Schema for structured data extraction from web pages. + """ + use Ecto.Schema + use Instructor + + @llm_doc """ + Schema for extracting specific data types from web page content. + + ## Field Descriptions: + - people: Names of people mentioned + - organizations: Names of organizations mentioned + - locations: Locations mentioned + - dates: Dates mentioned + - prices: Prices mentioned + - contact_info: Contact information found (email, phone, etc.) + - key_facts: Important facts extracted from the content + """ + @primary_key false + embedded_schema do + field(:people, {:array, :string}, default: []) + field(:organizations, {:array, :string}, default: []) + field(:locations, {:array, :string}, default: []) + field(:dates, {:array, :string}, default: []) + field(:prices, {:array, :string}, default: []) + field(:contact_info, :map, default: %{}) + field(:key_facts, {:array, :string}, default: []) + end + + @impl true + def validate_changeset(changeset) do + # All fields are optional for this schema + changeset + end + end + + defmodule WebAutomationSummary do + @moduledoc """ + Schema for summarizing the entire web automation session. + """ + use Ecto.Schema + use Instructor + + @llm_doc """ + Schema for summarizing the entire web automation session results. + + ## Field Descriptions: + - task_completed: Whether the task was completed successfully + - steps_taken: Number of steps taken during automation + - summary: A concise summary of what was accomplished + - extracted_info: Structured information extracted from the pages visited + - answer: The direct answer to the original query, if applicable + """ + @primary_key false + embedded_schema do + field(:task_completed, :boolean) + field(:steps_taken, :integer) + field(:summary, :string) + field(:extracted_info, :map) + field(:answer, :string) + end + + @impl true + def validate_changeset(changeset) do + changeset + |> Ecto.Changeset.validate_required([:task_completed, :summary]) + end + end +end \ No newline at end of file diff --git a/proxy_service.py b/proxy_service.py new file mode 100644 index 0000000..0379529 --- /dev/null +++ b/proxy_service.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +""" +Proxy-lite service for instructor_ex integration +================================================ + +This service exposes proxy-lite functionality through a REST API, +allowing the Elixir instructor_ex application to use web automation. +""" + +import asyncio +import base64 +import json +import os +from typing import Any, Dict, List, Optional + +import uvicorn +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel + +from proxy_lite import Runner, RunnerConfig + + +class TaskRequest(BaseModel): + """Request model for web automation tasks.""" + query: str + homepage: Optional[str] = "https://www.google.com" + headless: Optional[bool] = False + include_html: Optional[bool] = True + annotate_image: Optional[bool] = True + max_steps: Optional[int] = 50 + + +class TaskResponse(BaseModel): + """Response model for web automation tasks.""" + status: str + message: Optional[str] = None + results: Optional[Dict[str, Any]] = None + screenshots: Optional[List[str]] = None + html_content: Optional[str] = None + + +app = FastAPI( + title="Proxy-lite API", + description="API for web automation using proxy-lite and Playwright", + version="0.1.0", +) + +# Add CORS middleware to allow requests from Elixir app +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # In production, restrict this to your Elixir app's domain + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/") +async def root(): + """Root endpoint to verify service is running.""" + return {"status": "ok", "message": "Proxy-lite service is running"} + + +@app.post("/run", response_model=TaskResponse) +async def run_task(task: TaskRequest): + """Run a web automation task using proxy-lite and Playwright.""" + try: + # Configure the runner based on the request + config = RunnerConfig.from_dict({ + "environment": { + "name": "webbrowser", + "headless": task.headless, + "homepage": task.homepage, + "annotate_image": task.annotate_image, + "include_html": task.include_html, + "viewport_width": 1280, + "viewport_height": 960, + "screenshot_delay": 1.5, + }, + "solver": { + "name": "simple", + "agent": { + "name": "proxy_lite", + "client": { + "name": "convergence", + "model_id": "convergence-ai/proxy-lite-3b", + # Use local endpoint if available, otherwise use demo endpoint + "api_base": os.environ.get( + "PROXY_LITE_API_BASE", + "https://convergence-ai-demo-api.hf.space/v1" + ), + }, + }, + }, + "max_steps": task.max_steps, + "action_timeout": 300, + "environment_timeout": 120, # Increasing from 60 to 120 seconds to handle complex pages + "task_timeout": 1800, # 30 minutes + "verbose": True, + }) + + # Create and run the proxy-lite runner + runner = Runner(config=config) + result = await runner.run(task.query) + + # Extract screenshots as base64 strings + screenshots = [] + html_content = None + + # Prepare the result dictionary using model_dump() (Pydantic v2) + try: + # Try model_dump first (Pydantic v2) + result_dict = result.model_dump() + except AttributeError: + try: + # Fall back to dict() if model_dump is not available (Pydantic v1) + result_dict = result.dict() + except Exception as e: + # If neither work, create a basic dictionary with essential information + result_dict = { + "query": task.query, + "success": True, + "message": getattr(result, "message", "Task completed successfully"), + } + + # Initialize steps list - don't rely on result having this attribute + steps = [] + + # Process the actions from the result if available + if hasattr(result, 'actions') and result.actions: + for action in result.actions: + step_dict = { + "action": action.text if hasattr(action, "text") else "Unknown action", + "tool_calls": [] + } + + # Extract tool calls if they exist + if hasattr(action, "tool_calls") and action.tool_calls: + for tool_call in action.tool_calls: + if hasattr(tool_call, "function"): + step_dict["tool_calls"].append({ + "name": tool_call.function.get("name", "unknown"), + "arguments": tool_call.function.get("arguments", {}) + }) + + steps.append(step_dict) + + # Process observations from the result if available + if hasattr(result, 'observations') and result.observations: + for idx, observation in enumerate(result.observations): + if hasattr(observation, "state") and observation.state: + # Extract image if available + if hasattr(observation.state, "image") and observation.state.image: + # Add the screenshot with index to ensure uniqueness + screenshots.append(observation.state.image) + + # Extract HTML content if available + if hasattr(observation.state, "html") and observation.state.html and task.include_html: + # Use the last HTML state as the response HTML + html_content = observation.state.html + + # Add the extracted information to the result dictionary + result_dict['steps'] = steps + result_dict['steps_taken'] = len(steps) if steps else 0 + + # Add the final answer/response if available + if hasattr(result, 'answer') and result.answer: + result_dict['answer'] = result.answer + elif hasattr(result, 'response') and result.response: + result_dict['answer'] = result.response + + # Log debugging information + print(f"Task completed: {task.query}") + print(f"Steps taken: {len(steps) if steps else 0}") + print(f"Screenshots captured: {len(screenshots)}") + + # Return the response + return TaskResponse( + status="success", + results=result_dict, + screenshots=screenshots, + html_content=html_content, + ) + + except asyncio.TimeoutError: + # Specifically handle timeout errors with a clearer message + error_msg = f"Operation Environment response timed out after {config.environment_timeout} seconds" + print(f"Timeout error: {error_msg}") + return TaskResponse( + status="error", + message=error_msg, + ) + except Exception as e: + # Log the error for debugging + import traceback + print(f"Error running task: {str(e)}") + print(traceback.format_exc()) + # Return error response + return TaskResponse( + status="error", + message=str(e), + ) + + +if __name__ == "__main__": + # Get port from environment variable or use default + port = int(os.environ.get("PORT", 8001)) + # Add --debug flag to command line arguments to enable debug mode + import sys + debug_mode = "--debug" in sys.argv + + print(f"Starting server on port {port}" + (" in debug mode" if debug_mode else "")) + uvicorn.run(app, host="0.0.0.0", port=port, log_level="debug" if debug_mode else "info") \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..9d2a2b8 --- /dev/null +++ b/setup.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Setup script for proxy-lite + instructor_ex integration + +echo "Setting up proxy-lite + instructor_ex integration..." + +# Check Python version +python_version=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))') +required_version="3.10" + +if [[ "$(printf '%s\n' "$required_version" "$python_version" | sort -V | head -n1)" != "$required_version" ]]; then + echo "Error: Python $required_version or higher is required (found $python_version)" + exit 1 +fi + +# Check if Elixir is installed +if ! command -v elixir &> /dev/null; then + echo "Error: Elixir is not installed" + echo "Please install Elixir: https://elixir-lang.org/install.html" + exit 1 +fi + +# Install Python dependencies +echo "Installing Python dependencies..." +pip install fastapi[all] uvicorn playwright + +# Setup Playwright +echo "Setting up Playwright..." +python -m playwright install + +# Clone instructor_ex if not present +if [ ! -d "instructor_ex" ]; then + echo "Cloning instructor_ex repository..." + git clone https://github.com/thmsmlr/instructor_ex.git +else + echo "instructor_ex directory already exists, updating..." + cd instructor_ex + git pull + cd .. +fi + +# Check if llama.cpp server is installed +if ! command -v llama-server &> /dev/null; then + echo "Warning: llama-server not found in PATH" + echo "Please install llama.cpp: https://github.com/ggerganov/llama.cpp" +fi + +# Make scripts executable +chmod +x start_llama_server.sh +chmod +x start_proxy_service.sh +chmod +x example_usage.exs + +echo "" +echo "Setup complete! You can now start the services:" +echo "1. Start the llama server: ./start_llama_server.sh /path/to/model.gguf" +echo "2. Start the proxy service: ./start_proxy_service.sh" +echo "3. Run the example: ./example_usage.exs" \ No newline at end of file diff --git a/start_llama_server.sh b/start_llama_server.sh new file mode 100755 index 0000000..3b2ece1 --- /dev/null +++ b/start_llama_server.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Script to start the llama.cpp server for instructor_ex integration + +# Get the model path from the user or use the default +if [ -z "$1" ]; then + # Try to find a model in the default location + model_path=$(find $HOME/.cache/huggingface/hub -name "*qwen2.5-7b-instruct-q*.gguf" -type f | head -n 1) + + if [ -z "$model_path" ]; then + echo "Error: No GGUF model found. Please provide a path to a GGUF model file." + echo "Usage: $0 /path/to/model.gguf" + exit 1 + fi +else + model_path="$1" +fi + +# Start the llama.cpp server +echo "Starting llama.cpp server on http://localhost:8090 with model: $model_path" +llama-server --port 8090 -ngl 1 -m "$model_path" \ No newline at end of file diff --git a/start_proxy_service.sh b/start_proxy_service.sh new file mode 100755 index 0000000..648096f --- /dev/null +++ b/start_proxy_service.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Script to start the proxy-lite service for instructor_ex integration + +# Activate the Python environment +source .venv/bin/activate + +# Install required dependencies if not already installed +pip install "fastapi[all]" uvicorn + +# Start the proxy-lite service +echo "Starting proxy-lite service on http://localhost:8001" +python proxy_service.py \ No newline at end of file