Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions pytube/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ async def _execute_request(
if resp.status == 400:
raise PytubeError(f"Not 200 code, code={resp.status}")
else:
resp.status = 206
return resp
except aiohttp.client_exceptions.InvalidURL:
await session.close()
Expand Down Expand Up @@ -162,17 +163,21 @@ async def stream(
url,
session,
timeout=900,
max_retries=0
max_retries=0,
current_downloaded: int =0,
stream_filesize: int =0
):
"""Read the response in chunks.
:param str url: The URL to perform the GET request for.
:rtype: Iterable[bytes]
"""
file_size: int = default_range_size # fake filesize to start
downloaded = 0

file_size: int = default_range_size if stream_filesize == 0 else stream_filesize # fake filesize to start otherwise use existing filesize
downloaded = current_downloaded
while downloaded < file_size:
# print("Im not here hahah ")
stop_pos = min(downloaded + default_range_size, file_size) - 1
range_header = f"bytes={downloaded}-{stop_pos}"
range_header = f"bytes={downloaded}-"
tries = 0

# Attempt to make the request multiple times as necessary.
Expand All @@ -187,8 +192,10 @@ async def stream(
url,
session,
method="GET",
headers={"Range": range_header},
timeout=timeout
headers={"Range": range_header
},
timeout=timeout,

)
except URLError as e:
# We only want to skip over timeout errors, and
Expand All @@ -205,6 +212,7 @@ async def stream(
if file_size == default_range_size:
try:
content_range = response.headers["Content-Range"]
# print("voici le content range:{}".format(content_range))
file_size = int(content_range.split("/")[1])
except (KeyError, IndexError, ValueError) as e:
logger.error(e)
Expand Down Expand Up @@ -292,4 +300,4 @@ async def head(url, session):
return {k.lower(): v for k, v in response_headers.items()}

def createSession():
return aiohttp.ClientSession()
return aiohttp.ClientSession(trust_env=True)
56 changes: 30 additions & 26 deletions pytube/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@
from urllib.parse import parse_qs
from aiohttp import ClientSession
from async_property import async_property
import aiofiles as aiofs
import aiofile as aiof
from aiofiles.threadpool.binary import AsyncFileIO
from pytube import extract
from pytube import request
from pytube.helpers import safe_filename
Expand Down Expand Up @@ -254,39 +251,48 @@ async def download(
output_path=output_path,
filename_prefix=filename_prefix,
)

if skip_existing and await self.exists_at_path(file_path):
if skip_existing and await self.exists_at_path(file_path):
logger.debug(f'file {file_path} already exists, skipping')
await self.on_complete(file_path)
return file_path

bytes_remaining = (await self.filesize)
logger.debug(f'downloading ({(await self.filesize)} total bytes) file to {file_path}')

with open(file_path, "wb") as fh:
# async_open allow us to open and write on many files concurently
with open(file_path, "ab+") as fh:
""" 'current_downloaded' help us to indicate to the server that we have already downloaded part of the file and we just want to continue downloading from there
"""
current_downloaded = fh.tell() # since we are opening file with 'ab' mode file_handler will give filesize in bytes
try:

async for chunk in request.stream(
self.url,
self._session,
timeout=timeout,
max_retries=max_retries
max_retries=max_retries,
current_downloaded=current_downloaded,
stream_filesize= bytes_remaining
):
# reduce the (bytes) remainder by the length of the chunk.
bytes_remaining -= len(chunk)
# send to the on_progress callback.
await self.on_progress(chunk, fh, bytes_remaining)
# # reduce the (bytes) remainder by the length of the chunk.
# bytes_remaining = -len(chunk)

# send to the on_progress callback with the current downloaded file to easily compute percentage in callback
bytes_remaining = fh.tell()
await self.on_progress(chunk, fh,bytes_remaining,)

except HTTPError as e:
print("erreure ici")
if e.code != 404:
raise
# Some adaptive streams need to be requested with sequence numbers
async for chunk in request.seq_stream(
self.url,
self._session,
timeout=timeout,
max_retries=max_retries
max_retries=max_retries,
):
# reduce the (bytes) remainder by the length of the chunk.
bytes_remaining -= len(chunk)
bytes_remaining -= len(chunk)
# send to the on_progress callback.
await self.on_progress(chunk, fh, bytes_remaining)
await self.on_complete(file_path)
Expand All @@ -308,11 +314,11 @@ def get_file_path(

async def exists_at_path(self, file_path: str) -> bool:
return (
os.path.isfile(file_path)
and os.path.getsize(file_path) == (await self.filesize)
os.path.isfile(file_path) and
os.path.getsize(file_path) == (await self.filesize)
)

async def stream_to_buffer(self, buffer: BinaryIO) -> None:
async def stream_to_buffer(self, buffer:BinaryIO) -> None:
"""Write the media stream to buffer

:rtype: io.BytesIO buffer
Expand All @@ -324,13 +330,13 @@ async def stream_to_buffer(self, buffer: BinaryIO) -> None:

async for chunk in request.stream(self.url, self._session):
# reduce the (bytes) remainder by the length of the chunk.
bytes_remaining -= len(chunk)
bytes_remaining -= len(chunk)
# send to the on_progress callback.
await self.on_progress(chunk, buffer, bytes_remaining)
await self.on_progress(chunk, buffer, bytes_remaining,)
await self.on_complete(None)

async def on_progress(
self, chunk: bytes, file_handler:BinaryIO, bytes_remaining: int
self, chunk: bytes, file_handler: BinaryIO, bytes_remaining: int
):
"""On progress callback function.

Expand All @@ -351,12 +357,10 @@ async def on_progress(
:rtype: None

"""

async with aiof.async_open(file_handler.name, mode="ab+") as fp:
await fp.write(chunk)
logger.debug("download remaining: %s", bytes_remaining)
if self._monostate.on_progress:
await self._monostate.on_progress(self, chunk, bytes_remaining)
file_handler.write(chunk)
logger.debug("download remaining: %s", bytes_remaining)
if self._monostate.on_progress:
await self._monostate.on_progress(self, chunk, bytes_remaining)

async def on_complete(self, file_path: Optional[str]):
"""On download complete handler function.
Expand Down