From b543ad0a8ae6f7dc9a4f7e10d20f17deaacb879b Mon Sep 17 00:00:00 2001 From: Evan Huang Date: Wed, 16 Jul 2025 01:16:38 -0400 Subject: [PATCH 1/6] - Add playwright_url parameter to enable remote Playwright connections - Support both local and remote (Docker/container) browser instances --- fast_flights/local_playwright.py | 44 +++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/fast_flights/local_playwright.py b/fast_flights/local_playwright.py index c6113656..58e60794 100644 --- a/fast_flights/local_playwright.py +++ b/fast_flights/local_playwright.py @@ -1,10 +1,27 @@ -from typing import Any +from typing import Any, Optional import asyncio from playwright.async_api import async_playwright -async def fetch_with_playwright(url: str) -> str: +async def fetch_with_playwright(url: str, playwright_url: Optional[str] = None) -> str: + """ + Fetch content from a URL using Playwright browser automation. + + Args: + url: Target URL to fetch + playwright_url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance. + If None, launches local Chromium browser. + + Returns: + HTML content from the page's main role element + """ async with async_playwright() as p: - browser = await p.chromium.launch() + if playwright_url: + # Connect to remote Playwright instance (e.g., Docker container) + browser = await p.chromium.connect(playwright_url) + else: + # Launch local Chromium instance + browser = await p.chromium.launch() + page = await browser.new_page() await page.goto(url) if page.url.startswith("https://consent.google.com"): @@ -14,12 +31,27 @@ async def fetch_with_playwright(url: str) -> str: body = await page.evaluate( "() => document.querySelector('[role=\"main\"]').innerHTML" ) - await browser.close() + + if not playwright_url: + # Only close browser if we launched it locally + # Remote browsers should be managed by their container + await browser.close() return body -def local_playwright_fetch(params: dict) -> Any: +def local_playwright_fetch(params: dict, playwright_url: Optional[str] = None) -> Any: + """ + Fetch Google Flights data using Playwright. + + Args: + params: Query parameters for the Google Flights URL + playwright_url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance. + If None, uses local Chromium browser. + + Returns: + DummyResponse object with fetched content + """ url = "https://www.google.com/travel/flights?" + "&".join(f"{k}={v}" for k, v in params.items()) - body = asyncio.run(fetch_with_playwright(url)) + body = asyncio.run(fetch_with_playwright(url, playwright_url)) class DummyResponse: status_code = 200 From 4407b132fd6fd38959553c7706e44a904770d900 Mon Sep 17 00:00:00 2001 From: Evan Huang Date: Wed, 16 Jul 2025 18:12:13 -0400 Subject: [PATCH 2/6] add config for playwright url --- fast_flights/core.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fast_flights/core.py b/fast_flights/core.py index 6b11aafb..89dd54a1 100644 --- a/fast_flights/core.py +++ b/fast_flights/core.py @@ -1,5 +1,6 @@ import re import json +from dataclasses import dataclass from typing import List, Literal, Optional, Union, overload from selectolax.lexbor import LexborHTMLParser, LexborNode @@ -15,6 +16,16 @@ DataSource = Literal['html', 'js'] +@dataclass +class PlaywrightConfig: + """Configuration for Playwright browser automation. + + Args: + url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance. + If None, launches local Chromium browser. + """ + url: Optional[str] = None + def fetch(params: dict) -> Response: client = Client(impersonate="chrome_126", verify=False) res = client.get("https://www.google.com/travel/flights", params=params) @@ -28,6 +39,7 @@ def get_flights_from_filter( *, mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", data_source: Literal['js'] = ..., + playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[DecodedResult, None]: ... @overload @@ -37,6 +49,7 @@ def get_flights_from_filter( *, mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", data_source: Literal['html'], + playwright_config: Optional[PlaywrightConfig] = None, ) -> Result: ... def get_flights_from_filter( @@ -45,6 +58,7 @@ def get_flights_from_filter( *, mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", data_source: DataSource = 'html', + playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[Result, DecodedResult, None]: data = filter.as_b64() @@ -67,7 +81,8 @@ def get_flights_from_filter( elif mode == "local": from .local_playwright import local_playwright_fetch - res = local_playwright_fetch(params) + playwright_url = playwright_config.url if playwright_config else None + res = local_playwright_fetch(params, playwright_url) elif mode == "bright-data": res = bright_data_fetch(params) @@ -92,6 +107,7 @@ def get_flights( fetch_mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", max_stops: Optional[int] = None, data_source: DataSource = 'html', + playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[Result, DecodedResult, None]: return get_flights_from_filter( TFSData.from_interface( @@ -103,6 +119,7 @@ def get_flights( ), mode=fetch_mode, data_source=data_source, + playwright_config=playwright_config, ) From 66911169cff21d91ae329e186c68df5acd28ea4b Mon Sep 17 00:00:00 2001 From: Evan Huang Date: Wed, 16 Jul 2025 19:51:00 -0400 Subject: [PATCH 3/6] update fallback --- fast_flights/core.py | 6 ++++-- fast_flights/fallback_playwright.py | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fast_flights/core.py b/fast_flights/core.py index 89dd54a1..c95ece55 100644 --- a/fast_flights/core.py +++ b/fast_flights/core.py @@ -74,7 +74,8 @@ def get_flights_from_filter( res = fetch(params) except AssertionError as e: if mode == "fallback": - res = fallback_playwright_fetch(params) + playwright_url = playwright_config.url if playwright_config else None + res = fallback_playwright_fetch(params, playwright_url) else: raise e @@ -88,7 +89,8 @@ def get_flights_from_filter( res = bright_data_fetch(params) else: - res = fallback_playwright_fetch(params) + playwright_url = playwright_config.url if playwright_config else None + res = fallback_playwright_fetch(params, playwright_url) try: return parse_response(res, data_source) diff --git a/fast_flights/fallback_playwright.py b/fast_flights/fallback_playwright.py index a2a26bc9..480c27fc 100644 --- a/fast_flights/fallback_playwright.py +++ b/fast_flights/fallback_playwright.py @@ -1,4 +1,4 @@ -from typing import Any +from typing import Any, Optional from .primp import Client @@ -26,7 +26,23 @@ async def main(): """ -def fallback_playwright_fetch(params: dict) -> Any: +def fallback_playwright_fetch(params: dict, playwright_url: Optional[str] = None) -> Any: + """ + Fetch Google Flights data using try.playwright.tech service or local playwright. + + Args: + params: Query parameters for the Google Flights URL + playwright_url: WebSocket endpoint for remote Playwright. If provided, uses local playwright instead of try.playwright.tech + + Returns: + DummyResponse object with fetched content + """ + if playwright_url: + # Use local playwright with the specified URL + from .local_playwright import local_playwright_fetch + return local_playwright_fetch(params, playwright_url) + + # Original fallback behavior using try.playwright.tech client = Client(impersonate="chrome_100", verify=False) res = client.post( From 3d9e3d5dc96295bb9858c65a5d5d8a1125c02d38 Mon Sep 17 00:00:00 2001 From: Evan Huang Date: Wed, 16 Jul 2025 23:21:49 -0400 Subject: [PATCH 4/6] make url required --- fast_flights/core.py | 45 +++++++++++++++++++++++------------- fast_flights/flights_impl.py | 10 +++++--- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/fast_flights/core.py b/fast_flights/core.py index c95ece55..5f3bb106 100644 --- a/fast_flights/core.py +++ b/fast_flights/core.py @@ -14,17 +14,19 @@ from .primp import Client, Response -DataSource = Literal['html', 'js'] +DataSource = Literal["html", "js"] + @dataclass class PlaywrightConfig: """Configuration for Playwright browser automation. - + Args: url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance. - If None, launches local Chromium browser. """ - url: Optional[str] = None + + url: str + def fetch(params: dict) -> Response: client = Client(impersonate="chrome_126", verify=False) @@ -32,32 +34,41 @@ def fetch(params: dict) -> Response: assert res.status_code == 200, f"{res.status_code} Result: {res.text_markdown}" return res + @overload def get_flights_from_filter( filter: TFSData, currency: str = "", *, - mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", - data_source: Literal['js'] = ..., + mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", + data_source: Literal["js"] = ..., playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[DecodedResult, None]: ... + @overload def get_flights_from_filter( filter: TFSData, currency: str = "", *, - mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", - data_source: Literal['html'], + mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", + data_source: Literal["html"], playwright_config: Optional[PlaywrightConfig] = None, ) -> Result: ... + def get_flights_from_filter( filter: TFSData, currency: str = "", *, - mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", - data_source: DataSource = 'html', + mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", + data_source: DataSource = "html", playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[Result, DecodedResult, None]: data = filter.as_b64() @@ -106,9 +117,11 @@ def get_flights( trip: Literal["round-trip", "one-way", "multi-city"], passengers: Passengers, seat: Literal["economy", "premium-economy", "business", "first"], - fetch_mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common", + fetch_mode: Literal[ + "common", "fallback", "force-fallback", "local", "bright-data" + ] = "common", max_stops: Optional[int] = None, - data_source: DataSource = 'html', + data_source: DataSource = "html", playwright_config: Optional[PlaywrightConfig] = None, ) -> Union[Result, DecodedResult, None]: return get_flights_from_filter( @@ -145,11 +158,11 @@ def safe(n: Optional[LexborNode]): parser = LexborHTMLParser(r.text) - if data_source == 'js': - script = parser.css_first(r'script.ds\:1').text() + if data_source == "js": + script = parser.css_first(r"script.ds\:1").text() - match = re.search(r'^.*?\{.*?data:(\[.*\]).*\}', script) - assert match, 'Malformed js data, cannot find script data' + match = re.search(r"^.*?\{.*?data:(\[.*\]).*\}", script) + assert match, "Malformed js data, cannot find script data" data = json.loads(match.group(1)) return ResultDecoder.decode(data) if data is not None else None diff --git a/fast_flights/flights_impl.py b/fast_flights/flights_impl.py index 5bd49e0d..ff3181d9 100644 --- a/fast_flights/flights_impl.py +++ b/fast_flights/flights_impl.py @@ -12,6 +12,7 @@ AIRLINE_ALLIANCES = ["SKYTEAM", "STAR_ALLIANCE", "ONEWORLD"] + class FlightData: """Represents flight data. @@ -194,11 +195,14 @@ def from_interface( seat=seat_t, trip=trip_t, passengers=passengers, - max_stops=max_stops # Pass max_stops into TFSData + max_stops=max_stops, # Pass max_stops into TFSData ) def __repr__(self) -> str: - return f"TFSData(flight_data={self.flight_data!r}, max_stops={self.max_stops!r})" + return ( + f"TFSData(flight_data={self.flight_data!r}, max_stops={self.max_stops!r})" + ) + @dataclass class ItinerarySummary: @@ -207,7 +211,7 @@ class ItinerarySummary: currency: str @classmethod - def from_b64(cls, b64_string: str) -> 'ItinerarySummary': + def from_b64(cls, b64_string: str) -> "ItinerarySummary": raw = base64.b64decode(b64_string) pb = PB.ItinerarySummary() pb.ParseFromString(raw) From a8b2fa52d38dee5160abccfc4527c0491b09c2b4 Mon Sep 17 00:00:00 2001 From: Evan Huang Date: Thu, 17 Jul 2025 01:29:12 -0400 Subject: [PATCH 5/6] update readme --- README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/README.md b/README.md index 53c5ed2b..1fd01a95 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,34 @@ flight.delay? # may not be present flight.price ``` +### Using Playwright with Docker Container + +First, run a Playwright browser server in Docker: + +```bash +docker run -it --rm -p 3000:3000 mcr.microsoft.com/playwright:v1.53.0-noble /bin/bash -c "cd /home/pwuser && npx playwright install && npx -y playwright@1.53.0 run-server --port=3000" +``` + +Then use it in your Python code: + +```python +from fast_flights import FlightData, Passengers, Result, get_flights, PlaywrightConfig + +# Configure remote Playwright connection +playwright_config = PlaywrightConfig(url="ws://localhost:3000") + +result: Result = get_flights( + flight_data=[ + FlightData(date="2025-01-01", from_airport="TPE", to_airport="MYJ") + ], + trip="one-way", + seat="economy", + passengers=Passengers(adults=2, children=1, infants_in_seat=0, infants_on_lap=0), + fetch_mode="local", # or "fallback" + playwright_config=playwright_config, +) +``` + **Useless enums**: Additionally, you can use the `Airport` enum to search for airports in code (as you type)! See `_generated_enum.py` in source. ```python From 9c213e957b64712decb185bcbb742edf27df73c9 Mon Sep 17 00:00:00 2001 From: Evan Huang Date: Fri, 18 Jul 2025 01:38:31 -0400 Subject: [PATCH 6/6] fix bug in fallback mode --- fast_flights/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fast_flights/core.py b/fast_flights/core.py index 5f3bb106..9ab9f6f0 100644 --- a/fast_flights/core.py +++ b/fast_flights/core.py @@ -107,7 +107,9 @@ def get_flights_from_filter( return parse_response(res, data_source) except RuntimeError as e: if mode == "fallback": - return get_flights_from_filter(filter, mode="force-fallback") + return get_flights_from_filter( + filter, mode="force-fallback", playwright_config=playwright_config + ) raise e