From 152dff134053cfeb454302b4129cccc82a638fe1 Mon Sep 17 00:00:00 2001 From: broqdev Date: Thu, 22 May 2025 01:59:35 -0700 Subject: [PATCH 1/2] select url by retry count instead of pure randomness --- bilix/download/base_downloader_part.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/bilix/download/base_downloader_part.py b/bilix/download/base_downloader_part.py index c8458de7..7159b479 100644 --- a/bilix/download/base_downloader_part.py +++ b/bilix/download/base_downloader_part.py @@ -16,6 +16,8 @@ __all__ = ['BaseDownloaderPart'] +domain_scores = {} + class BaseDownloaderPart(BaseDownloader): """Base Async http Content-Range Downloader""" @@ -199,6 +201,7 @@ async def get_file(self, url_or_urls: Union[str, Iterable[str]], path: Union[Pat async def _get_file_part(self, urls: List[str], path: Path, part_range: Tuple[int, int], task_id) -> Path: + global domain_scores start, end = part_range part_path = path.with_name(f'{path.name}.{part_range[0]}-{part_range[1]}') exist, part_path = path_check(part_path) @@ -208,10 +211,21 @@ async def _get_file_part(self, urls: List[str], path: Path, part_range: Tuple[in await self.progress.update(task_id, advance=downloaded) if start > end: return part_path # skip already finished - url_idx = random.randint(0, len(urls) - 1) for times in range(1 + self.stream_retry): + url_idx = random.randint(0, len(urls) - 1) + # find domain with min score in domain_scores + domains = [urlparse(url_).netloc for url_ in urls] + domain_idx = min(range(len(domains)), key=lambda i: domain_scores.get(domains[i], 0)) + url_idx = domain_idx + # update domain_scores + domain_min_score = domain_scores.get(domains[domain_idx], 0) + for domain in domain_scores.keys(): + domain_scores[domain] = domain_scores[domain] - domain_min_score + try: + # parse domain from url + domain = urlparse(urls[url_idx]).netloc async with \ self.client.stream("GET", urls[url_idx], follow_redirects=True, headers={'Range': f'bytes={start}-{end}'}) as r, \ @@ -227,6 +241,7 @@ async def _get_file_part(self, urls: List[str], path: Path, part_range: Tuple[in await self._check_speed(len(chunk)) break except (httpx.HTTPStatusError, httpx.TransportError): + domain_scores[domain] = domain_scores.get(domain, 0) + 1 continue else: raise Exception(f"STREAM 超过重复次数 {part_path.name}") From bb28d0f90c2ecd69ad725704541aeabfc52c5034 Mon Sep 17 00:00:00 2001 From: broqdev Date: Thu, 22 May 2025 02:05:41 -0700 Subject: [PATCH 2/2] remove uncessary code --- bilix/download/base_downloader_part.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bilix/download/base_downloader_part.py b/bilix/download/base_downloader_part.py index 7159b479..12ea055c 100644 --- a/bilix/download/base_downloader_part.py +++ b/bilix/download/base_downloader_part.py @@ -213,7 +213,6 @@ async def _get_file_part(self, urls: List[str], path: Path, part_range: Tuple[in return part_path # skip already finished for times in range(1 + self.stream_retry): - url_idx = random.randint(0, len(urls) - 1) # find domain with min score in domain_scores domains = [urlparse(url_).netloc for url_ in urls] domain_idx = min(range(len(domains)), key=lambda i: domain_scores.get(domains[i], 0))