-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
84 lines (63 loc) · 2.22 KB
/
main.py
File metadata and controls
84 lines (63 loc) · 2.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Minimal browser-use: open a URL and save one full-page screenshot.
Usage: python fullpage_screenshot_minimal.py <URL>
"""
import asyncio
import sys
import base64
from datetime import datetime
from pathlib import Path
from pydantic import BaseModel
from dotenv import load_dotenv
from browser_use import Agent, Tools, Browser, BrowserSession
from browser_use.agent.views import ActionResult
from browser_use.llm import ChatOpenAI
load_dotenv()
SCREENSHOTS_DIR = Path("./screenshots")
SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True)
tools: Tools = Tools()
class ScreenshotParams(BaseModel):
pass
@tools.registry.action(description="Full-page screenshot via Chrome DevTools Protocol", param_model=ScreenshotParams)
async def take_fullpage_screenshot(params: ScreenshotParams, browser_session: BrowserSession) -> ActionResult:
page = await browser_session.get_current_page()
if not page:
return ActionResult(error="No current page", extracted_content="")
filename = f"{datetime.now().strftime("%Y%m%d_%H%M%S")}.png"
filepath = SCREENSHOTS_DIR / filename
session_id = await page._ensure_session()
cdp_client = page._client
result = await cdp_client.send.Page.captureScreenshot(
params={"format": "png", "captureBeyondViewport": True},
session_id=session_id,
)
data = base64.b64decode(result["data"])
with open(str(filepath), "wb") as f:
f.write(data)
return ActionResult(extracted_content=f"Saved {filename}")
async def main_async(url: str) -> None:
llm = ChatOpenAI(model="gpt-4o-mini")
browser = Browser(headless=True)
task = (
"Open the target URL, wait for it to load, use scroll action to scroll to bottom, "
"call 'take_fullpage_screenshot', then stop.\n\n"
f"Target URL: {url}"
)
agent: Agent = Agent(
task=task,
llm=llm,
browser=browser,
tools=tools,
use_vision=True,
directly_open_url=True,
max_actions_per_step=5,
)
await agent.run(max_steps=5)
def main() -> None:
if len(sys.argv) < 2:
print("Usage: uv run python main.py <URL>")
sys.exit(1)
url = sys.argv[1]
asyncio.run(main_async(url))
if __name__ == "__main__":
main()