diff --git a/.gitignore b/.gitignore index 68bc17f..ac4fb5a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,10 @@ __pycache__/ *.py[cod] *$py.class +.vscode/ + +test* +note.txt # C extensions *.so diff --git a/README.md b/README.md index b12186e..1c5af94 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,49 @@ A python module to download and upload from [gigafile](https://gigafile.nu/). -This is a personal tool as you may come across errors and bugs so feel free to add an issue. # Install - $ python setup.py install --user - or - $ pip install git+https://github.com/Sraq-Zit/gfile.git +or -# Usage + $ pip install git+https://github.com/Sraq-Zit/gfile.git -U +# Usage ## Module ### Import - from gfile import GFile +```py +from gfile import GFile +``` ### Download - url, cookies = GFile('https://XX.gigafile.nu/YYY').get_download() - # or - filename = GFile('https://XX.gigafile.nu/YYY').download() +```py +filename = GFile('https://XX.gigafile.nu/YYY').download() +``` + ### Upload - url = GFile('path/to/file', progress=True).upload().get_download_page() +```py +url = GFile('path/to/file', progress=True).upload().get_download_page() +``` ## CLI - - $ gfile upload path/to/file - - $ gfile download https://66.gigafile.nu/0320-b36ec21d4a56b143537e12df7388a5367 - - $ gfile -h - usage: Gfile [-h] [-p] [-n THREAD_NUM] [-s CHUNK_SIZE] [-m CHUNK_COPY_SIZE] {download,upload} uri - - positional arguments: - {download,upload} Upload or download - uri Filename to upload or url to download - - optional arguments: - -h, --help show this help message and exit - -p, --hide-progress Hide progress bar - -n THREAD_NUM, --thread-num THREAD_NUM - Number of threads used for upload (can incease speed) - -s CHUNK_SIZE, --chunk-size CHUNK_SIZE - allowed chunk size per upload - -m CHUNK_COPY_SIZE, --copy-size CHUNK_COPY_SIZE - Specifies size to copy the main file into pieces (the size loaded in RAM) - +```bash +$ gfile upload path/to/file + +$ gfile download https://66.gigafile.nu/0320-b36ec21d4a56b143537e12df7388a5367 + +$ gfile -h +usage: Gfile [-h] [-p] [-o OUTPUT] [-n THREAD_NUM] [-s CHUNK_SIZE] [-m CHUNK_COPY_SIZE] {download,upload} uri + +positional arguments: + {download,upload} upload or download + uri filename to upload or url to download + +options: + -h, --help show this help message and exit + -p, --hide-progress hide progress bar + -o OUTPUT, --output OUTPUT + output filename for download + -n THREAD_NUM, --thread-num THREAD_NUM + number of threads used for upload [default: 8] + -s CHUNK_SIZE, --chunk-size CHUNK_SIZE + chunk size per upload in bytes; note: chunk_size*thread will be loaded into memory [default: 100MB] + -m CHUNK_COPY_SIZE, --copy-size CHUNK_COPY_SIZE + specifies size to copy the main file into pieces [default: 1MB] +``` \ No newline at end of file diff --git a/gfile/__init__.py b/gfile/__init__.py index ad8b44e..c17fd3f 100644 --- a/gfile/__init__.py +++ b/gfile/__init__.py @@ -1,5 +1,4 @@ from .gfile import GFile -__author__ = """SraqZit""" -__email__ = 'sraqzit@gmail.com' -__version__ = '2.1' \ No newline at end of file +__author__ = """Sraqzit, fireattack""" +__version__ = '3.1' \ No newline at end of file diff --git a/gfile/__main__.py b/gfile/__main__.py new file mode 100644 index 0000000..bda8a5f --- /dev/null +++ b/gfile/__main__.py @@ -0,0 +1,4 @@ +from . import cmd + +if __name__ == '__main__': + cmd.main() \ No newline at end of file diff --git a/gfile/cmd.py b/gfile/cmd.py index 3dd0ae2..9471087 100644 --- a/gfile/cmd.py +++ b/gfile/cmd.py @@ -1,9 +1,7 @@ import argparse from enum import Enum -import re -from tqdm import tqdm if __name__ == "__main__": from gfile import GFile else: from .gfile import GFile @@ -12,27 +10,25 @@ class Action(Enum): upload = 'upload' def __str__(self): return self.value - - def main(): parser = argparse.ArgumentParser(prog='Gfile') parser.add_argument('action', type=Action, choices=list(Action), help='upload or download') parser.add_argument('uri', help='filename to upload or url to download') parser.add_argument('-p', '--hide-progress', dest='progress', action='store_false', default=True, help='hide progress bar') - parser.add_argument('-o', '--output', dest='output file', type=str, default=None, help='hide progress bar') - parser.add_argument('-n', '--thread-num', dest='thread_num', default=int(4), type=int, help='number of threads used for upload (can incease speed)') - parser.add_argument('-s', '--chunk-size', dest='chunk_size', type=int, help='gigafile allowed chunk size per upload', default=1024*1024*100) - parser.add_argument('-m', '--copy-size', dest='chunk_copy_size', type=int, help='specifies size to copy the main file into pieces (the size loaded in RAM)', default=1024*1024) + parser.add_argument('-o', '--output', type=str, default=None, help='output filename for download') + parser.add_argument('-n', '--thread-num', dest='thread_num', default=8, type=int, help='number of threads used for upload [default: 8]') + parser.add_argument('-s', '--chunk-size', dest='chunk_size', default="100MB", help='chunk size per upload in bytes; note: chunk_size*thread will be loaded into memory [default: 100MB]') + parser.add_argument('-m', '--copy-size', dest='chunk_copy_size', default="1MB", help='specifies size to copy the main file into pieces [default: 1MB]') + parser.add_argument('-t', '--timeout', type=int, default=10, help='specifies timeout time (in seconds) [default: 10]') args = parser.parse_args() gf = GFile(**args.__dict__) if args.action == Action.download: - gf.download(args.chunk_copy_size, args.progress) - + gf.download(args.output) else: - print(gf.upload().get_download_page()) - + gf.upload().get_download_page() + if __name__ == "__main__": main() diff --git a/gfile/gfile.py b/gfile/gfile.py index 8babd59..96095eb 100644 --- a/gfile/gfile.py +++ b/gfile/gfile.py @@ -1,161 +1,249 @@ -from math import ceil -import os +import concurrent.futures +import functools +import io +import math import re -import sys -import tempfile -from threading import Lock, Thread +import time import uuid -import requests -from requests_toolbelt import MultipartEncoderMonitor -import requests as r +from datetime import datetime +from math import ceil +from pathlib import Path +from urllib.parse import unquote -from requests_toolbelt.multipart import encoder +import requests +from requests.adapters import HTTPAdapter +from requests_toolbelt import MultipartEncoder, StreamingIterator from tqdm import tqdm +from urllib3.util.retry import Retry + + +def bytes_to_size_str(bytes): + if bytes == 0: + return "0B" + units = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") + i = int(math.floor(math.log(bytes, 1024))) + p = math.pow(1024, i) + return f"{bytes/p:.02f} {units[i]}" + + +def size_str_to_bytes(size_str): + if isinstance(size_str, int): + return size_str + m = re.search(r'^(?P\d+) ?((?P[KMGTPEZY]?)(iB|B)?)$', size_str, re.IGNORECASE) + assert m + units = ("B", "K", "M", "G", "T", "P", "E", "Z", "Y") + unit = (m['unit'] or 'B').upper() + return int(math.pow(1024, units.index(unit)) * int(m['num'])) + + +def requests_retry_session( + retries=5, + backoff_factor=0.2, + status_forcelist=None, # (500, 502, 504) + session=None, +): + session = session or requests.Session() + retry = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + session.mount('https://', adapter) + return session + + +def split_file(input_file, out, target_size=None, start=0, chunk_copy_size=1024*1024): + input_file = Path(input_file) + size = 0 + + input_size = input_file.stat().st_size + if target_size is None: + output_size = input_size - start + else: + output_size = min( target_size, input_size - start) + + with open(input_file, 'rb') as f: + f.seek(start) + while True: + # print(f'{size / output_size * 100:.2f}%', end='\r') + if size == output_size: break + if size > output_size: + raise Exception(f'Size ({size}) is larger than {target_size} bytes!') + current_chunk_size = min(chunk_copy_size, output_size - size) + chunk = f.read(current_chunk_size) + if not chunk: break + size += len(chunk) + out.write(chunk) class GFile: - - def __init__(self, uri, progress=False, thread_num=4, chunk_size=1024*1024*100, chunk_copy_size=1024*1024, **kwargs) -> None: + def __init__(self, uri, progress=False, thread_num=4, chunk_size=1024*1024*10, chunk_copy_size=1024*1024, timeout=10, **kwargs) -> None: self.uri = uri - self.chunk_size = chunk_size - self.chunk_copy_size = chunk_copy_size + self.chunk_size = size_str_to_bytes(chunk_size) + self.chunk_copy_size = size_str_to_bytes(chunk_copy_size) self.thread_num=thread_num self.progress = progress self.data = None - self.pbar: tqdm = None - self.session = requests.Session() - self.index = 0 - - - def upload_chunk(self, chunks): - self.lock.acquire() - with open(self.uri, 'rb') as ff: - while not self.failed and self.index < chunks: - chunk_id = f'chunk {self.index}' - if self.pbar: - self.pbar.desc = chunk_id - with tempfile.NamedTemporaryFile() as f: - i = 0 - chunk = ff.read(self.chunk_copy_size) - while i < self.chunk_size and chunk: - f.write(chunk) - i += self.chunk_copy_size - chunk = ff.read(self.chunk_copy_size) - - f.seek(0) - - fields = { - "id": self.token, - "name": os.path.basename(self.uri), - "chunk": str(self.index), - "chunks": str(chunks), - "lifetime": "7", - "file": (self.uri, f, "application/octet-stream"), - } - # print(fields) - - released = False - - self.index += 1 - - - def progress(monitor: MultipartEncoderMonitor): - nonlocal released - self.pbar.update(monitor.bytes_read - monitor.prog) - monitor.prog = monitor.bytes_read - if self.failed: self.session.close() - if not released and monitor.bytes_read > i/10: - self.lock.release() - released = True - - form = encoder.MultipartEncoder(fields) - if self.pbar: - - form = encoder.MultipartEncoderMonitor(form, progress) - setattr(form, 'prog', 0) - server = re.search( - r'var server = "(.+?)"', self.session.get('https://gigafile.nu/').text)[1] - headers = { - "content-type": form.content_type, - } - resp = self.session.post( - f"https://{server}/upload_chunk.php", headers=headers, data=form).json() - - if 'url' in resp: - self.data = resp - - if 'status' not in resp or resp['status']: - print(resp) - self.failed = True - if self.failed: break - self.lock.acquire() - if self.lock.locked(): self.lock.release() - - + self.pbar = None + self.timeout = timeout + self.session = requests_retry_session() + self.session.request = functools.partial(self.session.request, timeout=self.timeout) + self.cookies = None + self.current_chunk = 0 + + + def upload_chunk(self, chunk_no, chunks): + bar = self.pbar[chunk_no % self.thread_num] if self.pbar else None + with io.BytesIO() as f: + split_file(self.uri, f, self.chunk_size, start=chunk_no * self.chunk_size, chunk_copy_size=self.chunk_copy_size) + chunk_size = f.tell() + f.seek(0) + fields = { + "id": self.token, + "name": Path(self.uri).name, + "chunk": str(chunk_no), + "chunks": str(chunks), + "lifetime": "100", + "file": ("blob", f, "application/octet-stream"), + } + form_data = MultipartEncoder(fields) + headers = { + "content-type": form_data.content_type, + } + # convert the form-data into a binary string, this way we can control/throttle its read() behavior + form_data_binary = form_data.to_string() + del form_data + + size = len(form_data_binary) + if bar: + bar.desc = f'chunk {chunk_no + 1}/{chunks}' + bar.reset(total=size) + # bar.refresh() + + def gen(): + offset = 0 + while True: + if offset < size: + update_tick = 1024 * 128 + yield form_data_binary[offset:offset+update_tick] + if bar: + bar.update(min(update_tick, size - offset)) + bar.refresh() + offset += update_tick + else: + if chunk_no != self.current_chunk: + time.sleep(0.01) + else: + time.sleep(0.1) + break + while True: + try: + streamer = StreamingIterator(size, gen()) + resp = self.session.post(f"https://{self.server}/upload_chunk.php", data=streamer, headers=headers) + except Exception as ex: + print(ex) + print('Retrying...') + else: + break + + resp_data = resp.json() + self.current_chunk += 1 + + if 'url' in resp_data: + self.data = resp_data + if 'status' not in resp_data or resp_data['status']: + print(resp_data) + self.failed = True + + def upload(self): self.token = uuid.uuid1().hex self.pbar = None self.failed = False - self.index = 0 - self.lock = Lock() - size = os.path.getsize(self.uri) + assert Path(self.uri).exists() + size = Path(self.uri).stat().st_size chunks = ceil(size / self.chunk_size) + print(f'Filesize {bytes_to_size_str(size)}, chunk size: {bytes_to_size_str(self.chunk_size)}, total chunks: {chunks}') + if self.progress: - self.pbar = tqdm(total=size, unit="B", unit_scale=True, leave=False, unit_divisor=1024) - self.session = requests.Session() - # self.session.get('https://gigafile.nu/') - threads = [] - for _ in range(self.thread_num): - t = Thread(target=self.upload_chunk, args=(chunks,)) - threads.append(t) - t.start() - - try: - for t in threads: - t.join() - - self.pbar.close() - if 'url' not in self.data: - print('something went wrong', self.data) - except KeyboardInterrupt: - self.pbar.close() - self.failed = True - print('Aborted! cleaning...') - return self - - def get_download_page(self): return self.data and self.data['url'] - def get_file_id(self): return self.data and self.data['filename'] - - def get_download(self): - _data: dict[str, str] = self.data - if not os.path.exists(self.uri): - data = re.search(r'^https?:\/\/\d+?\.gigafile\.nu\/([a-z0-9-]+)$', self.uri) - if data: - _data = {'url': self.uri, 'filename': data[1]} - else: - raise ValueError('URL invalid') - - if not _data: - return ValueError('You specified no file to upload nor to download') - - sess = requests.Session() - sess.get(_data['url']) - return (_data['url'].replace(_data['filename'], 'download.php?file='+_data['filename']), sess.cookies) - - def download(self, copy_size=1024*1024, progress=True, filename=None): - url, cookies = self.get_download() - if not filename: - headers = r.head(url, cookies=cookies).headers - filesize = int(headers['Content-Length']) - filename = re.search(r'filename="(.+?)";', headers['Content-Disposition'])[1] - filename = re.sub(r'\\|\/|:|\*|\?|"|<|>|\|', '_', filename) - if progress: - pbar = tqdm(total=filesize, unit='B', unit_scale=True, desc=filename) - - with open(filename, 'wb') as f: - with r.get(url, cookies=cookies, stream=True) as req: - req.raise_for_status() - for chunk in req.iter_content(chunk_size=copy_size): + self.pbar = [] + for i in range(self.thread_num): + self.pbar.append(tqdm(total=size, unit="B", unit_scale=True, leave=False, unit_divisor=1024, ncols=100, position=i)) + + self.server = re.search(r'var server = "(.+?)"', self.session.get('https://gigafile.nu/').text)[1] + + # upload the first chunk to set cookies properly. + self.upload_chunk(0, chunks) + + # upload second to second last chunk(s) + with concurrent.futures.ThreadPoolExecutor(max_workers=self.thread_num) as ex: + futures = {ex.submit(self.upload_chunk, i, chunks): i for i in range(1, chunks)} + try: + for future in concurrent.futures.as_completed(futures): + if self.failed: + print('Failed!') + for future in futures: + future.cancel() + return + except KeyboardInterrupt: + print('\nUser cancelled the operation.') + for future in futures: + future.cancel() + return + + # upload last chunk if not already + # if chunks > 1: + # # print('\nupload the last chunk in single thread') + # self.upload_chunk(chunks - 1, chunks) + + if self.pbar: + for bar in self.pbar: + bar.close() + print('') + if 'url' not in self.data: + print('Something went wrong. Upload failed.', self.data) + return self # for chain + + + def get_download_page(self): + if not self.data or not 'url' in self.data: + return + f = Path(self.uri) + print(f"Finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, filename: {f.name}, size: {bytes_to_size_str(f.stat().st_size)}") + print(self.data['url']) + return self.data['url'] + + + def download(self, filename=None): + m = re.search(r'^https?:\/\/\d+?\.gigafile\.nu\/([a-z0-9-]+)$', self.uri) + if not m: + print('Invalid URL.') + return + self.session.get(self.uri) # setup cookie + file_id = m[1] + download_url = self.uri.replace(file_id, 'download.php?file=' + file_id) + with self.session.get(download_url, stream=True) as r: + r.raise_for_status() + filesize = int(r.headers['Content-Length']) + if not filename: + filename = 'gigafile_noname.bin' # temp name + content_disp = r.headers['Content-Disposition'] + if "UTF-8''" in content_disp: + filename = unquote(content_disp.split("UTF-8''")[-1]) + else: + filename = re.search(r'filename="(.+?)";', content_disp)[1].encode('iso8859-1','ignore').decode('utf-8', 'ignore') + filename = re.sub(r'[\\/:*?"<>|]', '_', filename) # only sanitize remote filename. User provided ones are on users' own. + if self.progress: + desc = filename if len(filename) <= 20 else filename[0:11] + '..' + filename[-7:] + self.pbar = tqdm(total=filesize, unit='B', unit_scale=True, unit_divisor=1024, desc=desc) + with open(filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=self.chunk_copy_size): f.write(chunk) - if pbar: pbar.update(len(chunk)) + if self.pbar: self.pbar.update(len(chunk)) + if self.pbar: self.pbar.close() + + filesize_downloaded = Path(filename).stat().st_size + print(f'Filesize check: expected: {filesize}; actual: {filesize_downloaded}. {"Succeeded." if filesize==filesize_downloaded else "Failed!"}') return filename diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..49986e2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +build-backend = 'setuptools.build_meta' +requires = ['setuptools'] \ No newline at end of file diff --git a/setup.py b/setup.py index 719a2af..b5984c7 100644 --- a/setup.py +++ b/setup.py @@ -2,11 +2,10 @@ setup( name='gfile', - version='2.1', + version='3.1', description='A python module to download and upload from gigafile.nu', - author='Sraqzit', - author_email='kingofmestry@gmail.com', - install_requires=['requests==2.25.1', 'requests_toolbelt==0.9.1', 'tqdm==4.61.2'], + author='Sraqzit, fireattack', + install_requires=['requests>=2.25.1', 'requests_toolbelt>=0.9.1', 'tqdm>=4.61.2'], requires=[], packages=['gfile'], platforms=["Linux", "Mac OS-X", "Windows", "Unix"],