77from fastapi import APIRouter , Query , Depends
88from fastapi .requests import Request
99from pydantic import BaseModel
10- from playwright .async_api import Browser , Route
10+ from playwright .async_api import Browser
1111
1212from internal import cache
1313from internal .browser import (
2323 ProxyQueryParams ,
2424)
2525
26+
2627router = APIRouter (prefix = '/api/page' , tags = ['page' ])
2728
29+
2830class AnyPage (BaseModel ):
2931 id : Annotated [str , Query (description = 'unique result ID' )]
3032 url : Annotated [str , Query (description = 'page URL after redirects, may not match the query URL' )]
@@ -38,24 +40,25 @@ class AnyPage(BaseModel):
3840 title : Annotated [str | None , Query (description = "page's title" )] = None
3941 status_code : Annotated [int , Query (description = 'HTTP status code of the page' )]
4042
43+
4144@router .get ('' , summary = 'Get any page from the given URL' , response_model = AnyPage )
4245async def get_any_page (
4346 request : Request ,
4447 url : Annotated [URLParam , Depends ()],
4548 common_params : Annotated [CommonQueryParams , Depends ()],
4649 browser_params : Annotated [BrowserQueryParams , Depends ()],
47- proxy_params : Annotated [ProxyQueryParams , Depends ()]
50+ proxy_params : Annotated [ProxyQueryParams , Depends ()],
4851) -> dict :
4952 """
5053 Get any page from the given URL.<br><br>
5154 Page is fetched using Playwright, but no additional processing is done.
5255 """
5356 # pylint: disable=duplicate-code
54- # Split URL into parts: host with scheme, path with query, query params as a dict
57+ # split URL into parts: host with scheme, path with query, query params as a dict
5558 host_url , full_path , query_dict = split_url (request .url )
5659
57- # Get cache data if exists
58- r_id = cache .make_key (full_path ) # Unique result ID
60+ # get cache data if exists
61+ r_id = cache .make_key (full_path ) # unique result ID
5962 if common_params .cache :
6063 data = cache .load_result (key = r_id )
6164 if data :
@@ -64,19 +67,9 @@ async def get_any_page(
6467 browser : Browser = request .state .browser
6568 semaphore : asyncio .Semaphore = request .state .semaphore
6669
67- # Create a new browser context
70+ # create a new browser context
6871 async with semaphore :
6972 async with new_context (browser , browser_params , proxy_params ) as context :
70- async def block_unwanted_resources (route : Route ):
71- if route .request .resource_type in (browser_params .block_types or []):
72- await route .abort ()
73- elif route .request .url .lower ().endswith (tuple (browser_params .block_extensions or [])):
74- await route .abort ()
75- else :
76- await route .continue_ ()
77-
78- await context .route ("**/*" , block_unwanted_resources )
79-
8073 page = await context .new_page ()
8174 status = await page_processing (
8275 page = page ,
@@ -106,7 +99,7 @@ async def block_unwanted_resources(route: Route):
10699 if common_params .screenshot :
107100 r ['screenshotUri' ] = f'{ host_url } /screenshot/{ r_id } '
108101
109- # Save result to disk
102+ # save result to disk
110103 if common_params .cache :
111104 cache .dump_result (r , key = r_id , screenshot = screenshot )
112105 return r
0 commit comments