Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,34 @@ flight.delay? # may not be present
flight.price
```

### Using Playwright with Docker Container

First, run a Playwright browser server in Docker:

```bash
docker run -it --rm -p 3000:3000 mcr.microsoft.com/playwright:v1.53.0-noble /bin/bash -c "cd /home/pwuser && npx playwright install && npx -y playwright@1.53.0 run-server --port=3000"
```

Then use it in your Python code:

```python
from fast_flights import FlightData, Passengers, Result, get_flights, PlaywrightConfig

# Configure remote Playwright connection
playwright_config = PlaywrightConfig(url="ws://localhost:3000")

result: Result = get_flights(
flight_data=[
FlightData(date="2025-01-01", from_airport="TPE", to_airport="MYJ")
],
trip="one-way",
seat="economy",
passengers=Passengers(adults=2, children=1, infants_in_seat=0, infants_on_lap=0),
fetch_mode="local", # or "fallback"
playwright_config=playwright_config,
)
```

**Useless enums**: Additionally, you can use the `Airport` enum to search for airports in code (as you type)! See `_generated_enum.py` in source.

```python
Expand Down
68 changes: 51 additions & 17 deletions fast_flights/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import json
from dataclasses import dataclass
from typing import List, Literal, Optional, Union, overload

from selectolax.lexbor import LexborHTMLParser, LexborNode
Expand All @@ -13,38 +14,62 @@
from .primp import Client, Response


DataSource = Literal['html', 'js']
DataSource = Literal["html", "js"]


@dataclass
class PlaywrightConfig:
"""Configuration for Playwright browser automation.

Args:
url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance.
"""

url: str


def fetch(params: dict) -> Response:
client = Client(impersonate="chrome_126", verify=False)
res = client.get("https://www.google.com/travel/flights", params=params)
assert res.status_code == 200, f"{res.status_code} Result: {res.text_markdown}"
return res


@overload
def get_flights_from_filter(
filter: TFSData,
currency: str = "",
*,
mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common",
data_source: Literal['js'] = ...,
mode: Literal[
"common", "fallback", "force-fallback", "local", "bright-data"
] = "common",
data_source: Literal["js"] = ...,
playwright_config: Optional[PlaywrightConfig] = None,
) -> Union[DecodedResult, None]: ...


@overload
def get_flights_from_filter(
filter: TFSData,
currency: str = "",
*,
mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common",
data_source: Literal['html'],
mode: Literal[
"common", "fallback", "force-fallback", "local", "bright-data"
] = "common",
data_source: Literal["html"],
playwright_config: Optional[PlaywrightConfig] = None,
) -> Result: ...


def get_flights_from_filter(
filter: TFSData,
currency: str = "",
*,
mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common",
data_source: DataSource = 'html',
mode: Literal[
"common", "fallback", "force-fallback", "local", "bright-data"
] = "common",
data_source: DataSource = "html",
playwright_config: Optional[PlaywrightConfig] = None,
) -> Union[Result, DecodedResult, None]:
data = filter.as_b64()

Expand All @@ -60,26 +85,31 @@ def get_flights_from_filter(
res = fetch(params)
except AssertionError as e:
if mode == "fallback":
res = fallback_playwright_fetch(params)
playwright_url = playwright_config.url if playwright_config else None
res = fallback_playwright_fetch(params, playwright_url)
else:
raise e

elif mode == "local":
from .local_playwright import local_playwright_fetch

res = local_playwright_fetch(params)
playwright_url = playwright_config.url if playwright_config else None
res = local_playwright_fetch(params, playwright_url)

elif mode == "bright-data":
res = bright_data_fetch(params)

else:
res = fallback_playwright_fetch(params)
playwright_url = playwright_config.url if playwright_config else None
res = fallback_playwright_fetch(params, playwright_url)

try:
return parse_response(res, data_source)
except RuntimeError as e:
if mode == "fallback":
return get_flights_from_filter(filter, mode="force-fallback")
return get_flights_from_filter(
filter, mode="force-fallback", playwright_config=playwright_config
)
raise e


Expand All @@ -89,9 +119,12 @@ def get_flights(
trip: Literal["round-trip", "one-way", "multi-city"],
passengers: Passengers,
seat: Literal["economy", "premium-economy", "business", "first"],
fetch_mode: Literal["common", "fallback", "force-fallback", "local", "bright-data"] = "common",
fetch_mode: Literal[
"common", "fallback", "force-fallback", "local", "bright-data"
] = "common",
max_stops: Optional[int] = None,
data_source: DataSource = 'html',
data_source: DataSource = "html",
playwright_config: Optional[PlaywrightConfig] = None,
) -> Union[Result, DecodedResult, None]:
return get_flights_from_filter(
TFSData.from_interface(
Expand All @@ -103,6 +136,7 @@ def get_flights(
),
mode=fetch_mode,
data_source=data_source,
playwright_config=playwright_config,
)


Expand All @@ -126,11 +160,11 @@ def safe(n: Optional[LexborNode]):

parser = LexborHTMLParser(r.text)

if data_source == 'js':
script = parser.css_first(r'script.ds\:1').text()
if data_source == "js":
script = parser.css_first(r"script.ds\:1").text()

match = re.search(r'^.*?\{.*?data:(\[.*\]).*\}', script)
assert match, 'Malformed js data, cannot find script data'
match = re.search(r"^.*?\{.*?data:(\[.*\]).*\}", script)
assert match, "Malformed js data, cannot find script data"
data = json.loads(match.group(1))
return ResultDecoder.decode(data) if data is not None else None

Expand Down
20 changes: 18 additions & 2 deletions fast_flights/fallback_playwright.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any
from typing import Any, Optional

from .primp import Client

Expand Down Expand Up @@ -26,7 +26,23 @@ async def main():
"""


def fallback_playwright_fetch(params: dict) -> Any:
def fallback_playwright_fetch(params: dict, playwright_url: Optional[str] = None) -> Any:
"""
Fetch Google Flights data using try.playwright.tech service or local playwright.

Args:
params: Query parameters for the Google Flights URL
playwright_url: WebSocket endpoint for remote Playwright. If provided, uses local playwright instead of try.playwright.tech

Returns:
DummyResponse object with fetched content
"""
if playwright_url:
# Use local playwright with the specified URL
from .local_playwright import local_playwright_fetch
return local_playwright_fetch(params, playwright_url)

# Original fallback behavior using try.playwright.tech
client = Client(impersonate="chrome_100", verify=False)

res = client.post(
Expand Down
10 changes: 7 additions & 3 deletions fast_flights/flights_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

AIRLINE_ALLIANCES = ["SKYTEAM", "STAR_ALLIANCE", "ONEWORLD"]


class FlightData:
"""Represents flight data.

Expand Down Expand Up @@ -194,11 +195,14 @@ def from_interface(
seat=seat_t,
trip=trip_t,
passengers=passengers,
max_stops=max_stops # Pass max_stops into TFSData
max_stops=max_stops, # Pass max_stops into TFSData
)

def __repr__(self) -> str:
return f"TFSData(flight_data={self.flight_data!r}, max_stops={self.max_stops!r})"
return (
f"TFSData(flight_data={self.flight_data!r}, max_stops={self.max_stops!r})"
)


@dataclass
class ItinerarySummary:
Expand All @@ -207,7 +211,7 @@ class ItinerarySummary:
currency: str

@classmethod
def from_b64(cls, b64_string: str) -> 'ItinerarySummary':
def from_b64(cls, b64_string: str) -> "ItinerarySummary":
raw = base64.b64decode(b64_string)
pb = PB.ItinerarySummary()
pb.ParseFromString(raw)
Expand Down
44 changes: 38 additions & 6 deletions fast_flights/local_playwright.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
from typing import Any
from typing import Any, Optional
import asyncio
from playwright.async_api import async_playwright

async def fetch_with_playwright(url: str) -> str:
async def fetch_with_playwright(url: str, playwright_url: Optional[str] = None) -> str:
"""
Fetch content from a URL using Playwright browser automation.

Args:
url: Target URL to fetch
playwright_url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance.
If None, launches local Chromium browser.

Returns:
HTML content from the page's main role element
"""
async with async_playwright() as p:
browser = await p.chromium.launch()
if playwright_url:
# Connect to remote Playwright instance (e.g., Docker container)
browser = await p.chromium.connect(playwright_url)
else:
# Launch local Chromium instance
browser = await p.chromium.launch()

page = await browser.new_page()
await page.goto(url)
if page.url.startswith("https://consent.google.com"):
Expand All @@ -14,12 +31,27 @@ async def fetch_with_playwright(url: str) -> str:
body = await page.evaluate(
"() => document.querySelector('[role=\"main\"]').innerHTML"
)
await browser.close()

if not playwright_url:
# Only close browser if we launched it locally
# Remote browsers should be managed by their container
await browser.close()
return body

def local_playwright_fetch(params: dict) -> Any:
def local_playwright_fetch(params: dict, playwright_url: Optional[str] = None) -> Any:
"""
Fetch Google Flights data using Playwright.

Args:
params: Query parameters for the Google Flights URL
playwright_url: WebSocket endpoint (ws:// or wss://) for remote Playwright instance.
If None, uses local Chromium browser.

Returns:
DummyResponse object with fetched content
"""
url = "https://www.google.com/travel/flights?" + "&".join(f"{k}={v}" for k, v in params.items())
body = asyncio.run(fetch_with_playwright(url))
body = asyncio.run(fetch_with_playwright(url, playwright_url))

class DummyResponse:
status_code = 200
Expand Down