diff --git a/README.md b/README.md index 53c5ed2b..1fd01a95 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,34 @@ flight.delay? # may not be present flight.price ``` +### Using Playwright with Docker Container + +First, run a Playwright browser server in Docker: + +```bash +docker run -it --rm -p 3000:3000 mcr.microsoft.com/playwright:v1.53.0-noble /bin/bash -c "cd /home/pwuser && npx playwright install && npx -y playwright@1.53.0 run-server --port=3000" +``` + +Then use it in your Python code: + +```python +from fast_flights import FlightData, Passengers, Result, get_flights, PlaywrightConfig + +# Configure remote Playwright connection +playwright_config = PlaywrightConfig(url="ws://localhost:3000") + +result: Result = get_flights( + flight_data=[ + FlightData(date="2025-01-01", from_airport="TPE", to_airport="MYJ") + ], + trip="one-way", + seat="economy", + passengers=Passengers(adults=2, children=1, infants_in_seat=0, infants_on_lap=0), + fetch_mode="local", # or "fallback" + playwright_config=playwright_config, +) +``` + **Useless enums**: Additionally, you can use the `Airport` enum to search for airports in code (as you type)! See `_generated_enum.py` in source. ```python diff --git a/fast_flights/core.py b/fast_flights/core.py index 6b11aafb..9ab9f6f0 100644 --- a/fast_flights/core.py +++ b/fast_flights/core.py @@ -1,5 +1,6 @@ import re import json +from dataclasses import dataclass from typing import List, Literal, Optional, Union, overload from selectolax.lexbor import LexborHTMLParser, LexborNode @@ -13,7 +14,19 @@ from .primp import Client, Response -DataSource = Literal['html', 'js'] +DataSource = Literal["html", "js"] + + +@dataclass +class PlaywrightConfig: + """Configuration for Playwright browser automation. + + Args: + url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance. + """ + + url: str + def fetch(params: dict) -> Response: client = Client(impersonate="chrome_126", verify=False) @@ -21,30 +34,42 @@ def fetch(params: dict) -> Response: assert res.status_code == 200, f"{res.status_code} Result: {res.text_markdown}" return res + @overload def get_flights_from_filter( filter: TFSData, currency: str = "", *, - mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", - data_source: Literal['js'] = ..., + mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", + data_source: Literal["js"] = ..., + playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[DecodedResult, None]: ... + @overload def get_flights_from_filter( filter: TFSData, currency: str = "", *, - mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", - data_source: Literal['html'], + mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", + data_source: Literal["html"], + playwright_config: Optional[PlaywrightConfig] = None, ) -> Result: ... + def get_flights_from_filter( filter: TFSData, currency: str = "", *, - mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", - data_source: DataSource = 'html', + mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", + data_source: DataSource = "html", + playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[Result, DecodedResult, None]: data = filter.as_b64() @@ -60,26 +85,31 @@ def get_flights_from_filter( res = fetch(params) except AssertionError as e: if mode == "fallback": - res = fallback_playwright_fetch(params) + playwright_url = playwright_config.url if playwright_config else None + res = fallback_playwright_fetch(params, playwright_url) else: raise e elif mode == "local": from .local_playwright import local_playwright_fetch - res = local_playwright_fetch(params) + playwright_url = playwright_config.url if playwright_config else None + res = local_playwright_fetch(params, playwright_url) elif mode == "bright-data": res = bright_data_fetch(params) else: - res = fallback_playwright_fetch(params) + playwright_url = playwright_config.url if playwright_config else None + res = fallback_playwright_fetch(params, playwright_url) try: return parse_response(res, data_source) except RuntimeError as e: if mode == "fallback": - return get_flights_from_filter(filter, mode="force-fallback") + return get_flights_from_filter( + filter, mode="force-fallback", playwright_config=playwright_config + ) raise e @@ -89,9 +119,12 @@ def get_flights( trip: Literal["round-trip", "one-way", "multi-city"], passengers: Passengers, seat: Literal["economy", "premium-economy", "business", "first"], - fetch_mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", + fetch_mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", max_stops: Optional[int] = None, - data_source: DataSource = 'html', + data_source: DataSource = "html", + playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[Result, DecodedResult, None]: return get_flights_from_filter( TFSData.from_interface( @@ -103,6 +136,7 @@ def get_flights( ), mode=fetch_mode, data_source=data_source, + playwright_config=playwright_config, ) @@ -126,11 +160,11 @@ def safe(n: Optional[LexborNode]): parser = LexborHTMLParser(r.text) - if data_source == 'js': - script = parser.css_first(r'script.ds\:1').text() + if data_source == "js": + script = parser.css_first(r"script.ds\:1").text() - match = re.search(r'^.*?\{.*?data:(\[.*\]).*\}', script) - assert match, 'Malformed js data, cannot find script data' + match = re.search(r"^.*?\{.*?data:(\[.*\]).*\}", script) + assert match, "Malformed js data, cannot find script data" data = json.loads(match.group(1)) return ResultDecoder.decode(data) if data is not None else None diff --git a/fast_flights/fallback_playwright.py b/fast_flights/fallback_playwright.py index a2a26bc9..480c27fc 100644 --- a/fast_flights/fallback_playwright.py +++ b/fast_flights/fallback_playwright.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Optional from .primp import Client @@ -26,7 +26,23 @@ async def main(): """ -def fallback_playwright_fetch(params: dict) -> Any: +def fallback_playwright_fetch(params: dict, playwright_url: Optional[str] = None) -> Any: + """ + Fetch Google Flights data using try.playwright.tech service or local playwright. + + Args: + params: Query parameters for the Google Flights URL + playwright_url: WebSocket endpoint for remote Playwright. If provided, uses local playwright instead of try.playwright.tech + + Returns: + DummyResponse object with fetched content + """ + if playwright_url: + # Use local playwright with the specified URL + from .local_playwright import local_playwright_fetch + return local_playwright_fetch(params, playwright_url) + + # Original fallback behavior using try.playwright.tech client = Client(impersonate="chrome_100", verify=False) res = client.post( diff --git a/fast_flights/flights_impl.py b/fast_flights/flights_impl.py index 5bd49e0d..ff3181d9 100644 --- a/fast_flights/flights_impl.py +++ b/fast_flights/flights_impl.py @@ -12,6 +12,7 @@ AIRLINE_ALLIANCES = ["SKYTEAM", "STAR_ALLIANCE", "ONEWORLD"] + class FlightData: """Represents flight data. @@ -194,11 +195,14 @@ def from_interface( seat=seat_t, trip=trip_t, passengers=passengers, - max_stops=max_stops # Pass max_stops into TFSData + max_stops=max_stops, # Pass max_stops into TFSData ) def __repr__(self) -> str: - return f"TFSData(flight_data={self.flight_data!r}, max_stops={self.max_stops!r})" + return ( + f"TFSData(flight_data={self.flight_data!r}, max_stops={self.max_stops!r})" + ) + @dataclass class ItinerarySummary: @@ -207,7 +211,7 @@ class ItinerarySummary: currency: str @classmethod - def from_b64(cls, b64_string: str) -> 'ItinerarySummary': + def from_b64(cls, b64_string: str) -> "ItinerarySummary": raw = base64.b64decode(b64_string) pb = PB.ItinerarySummary() pb.ParseFromString(raw) diff --git a/fast_flights/local_playwright.py b/fast_flights/local_playwright.py index c6113656..58e60794 100644 --- a/fast_flights/local_playwright.py +++ b/fast_flights/local_playwright.py @@ -1,10 +1,27 @@ -from typing import Any +from typing import Any, Optional import asyncio from playwright.async_api import async_playwright -async def fetch_with_playwright(url: str) -> str: +async def fetch_with_playwright(url: str, playwright_url: Optional[str] = None) -> str: + """ + Fetch content from a URL using Playwright browser automation. + + Args: + url: Target URL to fetch + playwright_url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance. + If None, launches local Chromium browser. + + Returns: + HTML content from the page's main role element + """ async with async_playwright() as p: - browser = await p.chromium.launch() + if playwright_url: + # Connect to remote Playwright instance (e.g., Docker container) + browser = await p.chromium.connect(playwright_url) + else: + # Launch local Chromium instance + browser = await p.chromium.launch() + page = await browser.new_page() await page.goto(url) if page.url.startswith("https://consent.google.com"): @@ -14,12 +31,27 @@ async def fetch_with_playwright(url: str) -> str: body = await page.evaluate( "() => document.querySelector('[role=\"main\"]').innerHTML" ) - await browser.close() + + if not playwright_url: + # Only close browser if we launched it locally + # Remote browsers should be managed by their container + await browser.close() return body -def local_playwright_fetch(params: dict) -> Any: +def local_playwright_fetch(params: dict, playwright_url: Optional[str] = None) -> Any: + """ + Fetch Google Flights data using Playwright. + + Args: + params: Query parameters for the Google Flights URL + playwright_url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance. + If None, uses local Chromium browser. + + Returns: + DummyResponse object with fetched content + """ url = "https://www.google.com/travel/flights?" + "&".join(f"{k}={v}" for k, v in params.items()) - body = asyncio.run(fetch_with_playwright(url)) + body = asyncio.run(fetch_with_playwright(url, playwright_url)) class DummyResponse: status_code = 200