From 3927f384b4ff4eca64016bf44e4aeba1b26e1b63 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 18:58:56 +0300 Subject: [PATCH 01/13] add build.yml --- .github/workflows/build.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..93774b5 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,34 @@ +name: Publish Scribdl 🐍 distributions 📦 to PyPI and TestPyPI + +on: push + +jobs: + build-n-publish: + name: Publish Scribdl 🐍 distributions 📦 to PyPI and TestPyPI + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: >- + python -m + build + --sdist + --wheel + --outdir dist/ + . + + - name: Publish distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} From 6d0507c192fe52f6798cc1082bee21614032f717 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:03:43 +0300 Subject: [PATCH 02/13] add build.yml --- setup.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index bedcfc8..aaa0a4d 100644 --- a/setup.py +++ b/setup.py @@ -16,11 +16,11 @@ long_description=long_description, author='Ritiek Malhotra', author_email='ritiekmalhotra123@gmail.com', - packages = find_packages(), + packages=find_packages(), entry_points={ - 'console_scripts': [ - 'scribdl = scribdl.command_line:_command_line', - ] + 'console_scripts': [ + 'scribdl = scribdl.command_line:_command_line', + ] }, url='https://www.github.com/ritiek/scribd-downloader', keywords=['scribd-downloader', 'documents', 'command-line', 'python'], @@ -28,9 +28,9 @@ download_url='https://github.com/ritiek/scribd-downloader/archive/v' + __version__ + '.tar.gz', classifiers=[], install_requires=[ - 'requests >= 2.19.1', - 'BeautifulSoup4 >= 4.6.3', - 'img2pdf >= 0.3.1', - 'md2pdf >= 0.4' + 'requests >= 2.19.1', + 'BeautifulSoup4 >= 4.6.3', + 'img2pdf >= 0.3.1', + 'md2pdf >= 0.4' ] - ) + ) From db3f3ccad975899d8fa35ba7e6aad7e1acf5ef29 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:05:52 +0300 Subject: [PATCH 03/13] add build.yml --- setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index aaa0a4d..e9729a1 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,11 @@ with open(os.path.join("scribdl", "version.py")) as version_file: exec(version_file.read()) -with open("README.rst", "r") as f: - long_description = f.read() +try: + import pypandoc + long_description = pypandoc.convert_file('README.md', 'rst') +except(IOError, ImportError): + long_description = open('README.md').read() setup(name='scribd-downloader', version=__version__, From 7900f1ffce61020adee267024b81c3b95cf55257 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:07:54 +0300 Subject: [PATCH 04/13] add build.yml --- setup.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index e9729a1..bf9b052 100644 --- a/setup.py +++ b/setup.py @@ -7,16 +7,13 @@ with open(os.path.join("scribdl", "version.py")) as version_file: exec(version_file.read()) -try: - import pypandoc - long_description = pypandoc.convert_file('README.md', 'rst') -except(IOError, ImportError): - long_description = open('README.md').read() +with open("README.rst", "r") as f: + long_description = f.read() setup(name='scribd-downloader', version=__version__, description='Download documents, books and audiobooks off Scribd', - long_description=long_description, + long_description='Check description in https://github.com/Phoenix124/scribd-downloader/blob/master/README.rst', author='Ritiek Malhotra', author_email='ritiekmalhotra123@gmail.com', packages=find_packages(), From 75f7392b4e66163f36a0c1f018325ad67f92b27c Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:10:23 +0300 Subject: [PATCH 05/13] add build.yml --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 93774b5..63aaeb4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,10 +1,10 @@ -name: Publish Scribdl 🐍 distributions 📦 to PyPI and TestPyPI +name: Publish Scribdl 🐍 distributions 📦 to PyPI on: push jobs: build-n-publish: - name: Publish Scribdl 🐍 distributions 📦 to PyPI and TestPyPI + name: Publish Scribdl 🐍 distributions 📦 to PyPI runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From 04dea914cac382725801111472331c278d3c91e7 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:13:21 +0300 Subject: [PATCH 06/13] add build.yml --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index bf9b052..d05879a 100644 --- a/setup.py +++ b/setup.py @@ -3,17 +3,17 @@ from setuptools import setup, find_packages import os +__version__ = '1.3.1' + # __version__ comes into namespace from here with open(os.path.join("scribdl", "version.py")) as version_file: exec(version_file.read()) -with open("README.rst", "r") as f: - long_description = f.read() - setup(name='scribd-downloader', version=__version__, description='Download documents, books and audiobooks off Scribd', long_description='Check description in https://github.com/Phoenix124/scribd-downloader/blob/master/README.rst', + long_description_content_type='text/x-rst', author='Ritiek Malhotra', author_email='ritiekmalhotra123@gmail.com', packages=find_packages(), @@ -22,7 +22,7 @@ 'scribdl = scribdl.command_line:_command_line', ] }, - url='https://www.github.com/ritiek/scribd-downloader', + url='https://github.com/Phoenix124/scribd-downloader', keywords=['scribd-downloader', 'documents', 'command-line', 'python'], license='MIT', download_url='https://github.com/ritiek/scribd-downloader/archive/v' + __version__ + '.tar.gz', From 3f92e930509ff0531d1a6a1e9b027d6c91674db6 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:20:52 +0300 Subject: [PATCH 07/13] update setup.py --- setup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index d05879a..bb89073 100644 --- a/setup.py +++ b/setup.py @@ -4,16 +4,17 @@ import os __version__ = '1.3.1' - -# __version__ comes into namespace from here with open(os.path.join("scribdl", "version.py")) as version_file: exec(version_file.read()) +with open("README.rst", "r") as f: + long_description = f.read() + setup(name='scribd-downloader', version=__version__, description='Download documents, books and audiobooks off Scribd', - long_description='Check description in https://github.com/Phoenix124/scribd-downloader/blob/master/README.rst', - long_description_content_type='text/x-rst', + long_description_content_type="text/x-rst", + long_description=long_description, author='Ritiek Malhotra', author_email='ritiekmalhotra123@gmail.com', packages=find_packages(), From 474ce3e2304f28d2f5af59da497119cc5f040c93 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:24:12 +0300 Subject: [PATCH 08/13] add build.yml --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index bb89073..d83e4d7 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,8 @@ setup(name='scribd-downloader', version=__version__, description='Download documents, books and audiobooks off Scribd', - long_description_content_type="text/x-rst", - long_description=long_description, + long_description_content_type="text/markdown", + long_description='### Readme.md', author='Ritiek Malhotra', author_email='ritiekmalhotra123@gmail.com', packages=find_packages(), From fde1212132f985c7393b3b7ad8ab74269aa237ea Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:28:16 +0300 Subject: [PATCH 09/13] add build.yml --- PKG-INFO | 183 ------------------ scribd_downloader.egg-info/PKG-INFO | 183 ------------------ scribd_downloader.egg-info/SOURCES.txt | 32 --- .../dependency_links.txt | 1 - scribd_downloader.egg-info/entry_points.txt | 3 - scribd_downloader.egg-info/requires.txt | 4 - scribd_downloader.egg-info/top_level.txt | 1 - 7 files changed, 407 deletions(-) delete mode 100644 PKG-INFO delete mode 100644 scribd_downloader.egg-info/PKG-INFO delete mode 100644 scribd_downloader.egg-info/SOURCES.txt delete mode 100644 scribd_downloader.egg-info/dependency_links.txt delete mode 100644 scribd_downloader.egg-info/entry_points.txt delete mode 100644 scribd_downloader.egg-info/requires.txt delete mode 100644 scribd_downloader.egg-info/top_level.txt diff --git a/PKG-INFO b/PKG-INFO deleted file mode 100644 index 2699dca..0000000 --- a/PKG-INFO +++ /dev/null @@ -1,183 +0,0 @@ -Metadata-Version: 1.1 -Name: scribd-downloader -Version: 1.3.1 -Summary: Download documents, books and audiobooks off Scribd -Home-page: https://www.github.com/ritiek/scribd-downloader -Author: Ritiek Malhotra -Author-email: ritiekmalhotra123@gmail.com -License: MIT -Download-URL: https://github.com/ritiek/scribd-downloader/archive/v1.3.1.tar.gz -Description: Scribd-Downloader - ================= - - |PyPi Version| |Build Status| |Coverage Status| - - (I also found an online service https://dlscrib.com/ created by `Erik Fong`_. It doesn't - use this script as some people seem to think!). - - Current features: - - +------------+-------------------------------------+-------------------------------------------+ - | Type | Downloadable without Scribd premium | Requires Scribd premium for full download | - +============+=====================================+===========================================+ - | Documents | Yes | No | - +------------+-------------------------------------+-------------------------------------------+ - | Books | Yes | Yes | - +------------+-------------------------------------+-------------------------------------------+ - | Audiobooks | Yes | Yes | - +------------+-------------------------------------+-------------------------------------------+ - - **Some information about Scribd documents:** - - There are two types of documents on Scribd: - - - Documents made up using a collection of images and - - Actual documents where the text can be selected, copied etc. - - This script takes a different approach to both of them: - - - Documents consisting of a collection of images is straightforward and - this script will simply download the induvidual images which can - be combined to ``.pdf`` by passing ``--pdf`` option to the tool. Simple. - - - Actual documents where the text can be selected are hard to tackle. - If we feed such a document to this tool, only the text present in - document will be downloaded. Scribd seems to use javascript to somehow - combine text and images. So far, I haven't been able to combine them - with Python in a way they look like the original document. - - ------------ - Installation - ------------ - - Make sure you're using Python 3 (Python 2 is not supported by a few dependencies). - Then run these commands: - - :: - - $ pip install scribd-downloader - - or install the development version with: - - :: - - $ python setup.py install - - ----- - Usage - ----- - - :: - - usage: scribdl [-h] [-i] [-p] URL - - Download documents and books from scribd.com - - positional arguments: - URL scribd url to download - - optional arguments: - -h, --help show this help message and exit - -i, --images download url made up of images - -p, --pdf convert to pdf (*Nix: imagemagick) - -c CREDENTIALS_FILE, --credentials-file CREDENTIALS_FILE - path to file containing your Scribd premium - credentials - - -------- - Examples - -------- - - Scribd Documents - ---------------- - Downloading text from document containing selectable text: - :: - $ scribdl https://www.scribd.com/document/55949937/33-Strategies-of-War - - (Text will be saved side by side in a ``.md`` file in the current - working directory) - - Download document containing images; use the ``--images`` option (the tool cannot figure out this on its own): - :: - $ scribdl -i https://scribd.com/doc/17142797/Case-in-Point - - (Images will be saved in the current working directory) - - Scribd Books - ------------ - The below command will generate an ``.md`` file of the book in the current working directory: - :: - $ scribdl https://www.scribd.com/read/189087235/Confessions-of-a-Casting-Director-Help-Actors-Land-Any-Role-with-Secrets-from-Inside-the-Audition-Room - - Pass ``--pdf`` option to convert the generated output to a PDF. - - This will only dowload the book content available without owning a premium account on Scribd. - See the below section for downloading full books if you own a premium Scribd account. - - Scribd Audiobooks - ----------------- - This will download .mp3 of the audiobook: - :: - $ scribdl https://www.scribd.com/audiobook/237606860/100-Ways-to-Motivate-Yourself-Change-Your-Life-Forever - - This will only download the preview version of the audiobook. See the below section for - downloading complete audiobooks if you own a premium Scribd account. - - ------------------------------------------------- - Downloading complete textual books and audiobooks - ------------------------------------------------- - - If you have a premium Scribd account, you can also download the full version of - textual books and audiobooks. - - Create a text file containing your Scribd credentials, such that the contents of the file look like below: - :: - user@mail.com - password - - - Now pass the file path to the ``-c`` option, for example: - :: - $ scribdl -c scribd_credentials.txt https://www.scribd.com/audiobook/359295794/Principles-Life-and-Work - - It should then download you all the audiobook chapters as mp3. Similarly, you could also download complete - contents of a Scribd book by replacing the URL with the URL of your choice. - - If you're not willing to use place your account credentials in a file, you could also copy the cookie values - for ``_scribd_session`` and ``_scribd_expire`` when logged into your premium account on scribd on the web - browser and replace them with the ones in this file https://github.com/ritiek/scribd-downloader/blob/master/scribdl/const.py. - - You should then be able to automatically download full version of both textual books and audiobooks - from Scribd using the tool by running the commands as usual. - - ---------- - Disclaimer - ---------- - - Downloading books from Scribd for free maybe prohibited. This tool is - meant for educational purposes only. Please support the authors by buying - their titles. - - ------- - License - ------- - - ``The MIT License`` - - .. |PyPi Version| image:: https://img.shields.io/pypi/v/scribd-downloader.svg - :target: https://pypi.org/project/scribd-downloader - - .. |Build Status| image:: https://travis-ci.org/ritiek/scribd-downloader.svg?branch=master - :target: https://travis-ci.org/ritiek/scribd-downloader - - .. |Coverage Status| image:: https://codecov.io/gh/ritiek/scribd-downloader/branch/master/graph/badge.svg - :target: https://codecov.io/gh/ritiek/scribd-downloader - - .. _Mitmproxy: https://github.com/mitmproxy/mitmproxy - - .. _Erik Fong: mailto:dlscrib@gmail.com - .. _BookURL: https://www.scribd.com/read/189087235/Confessions-of-a-Casting-Director-Help-Actors-Land-Any-Role-with-Secrets-from-Inside-the-Audition-Room - .. ConstantValues: - -Keywords: scribd-downloader,documents,command-line,python -Platform: UNKNOWN diff --git a/scribd_downloader.egg-info/PKG-INFO b/scribd_downloader.egg-info/PKG-INFO deleted file mode 100644 index 2699dca..0000000 --- a/scribd_downloader.egg-info/PKG-INFO +++ /dev/null @@ -1,183 +0,0 @@ -Metadata-Version: 1.1 -Name: scribd-downloader -Version: 1.3.1 -Summary: Download documents, books and audiobooks off Scribd -Home-page: https://www.github.com/ritiek/scribd-downloader -Author: Ritiek Malhotra -Author-email: ritiekmalhotra123@gmail.com -License: MIT -Download-URL: https://github.com/ritiek/scribd-downloader/archive/v1.3.1.tar.gz -Description: Scribd-Downloader - ================= - - |PyPi Version| |Build Status| |Coverage Status| - - (I also found an online service https://dlscrib.com/ created by `Erik Fong`_. It doesn't - use this script as some people seem to think!). - - Current features: - - +------------+-------------------------------------+-------------------------------------------+ - | Type | Downloadable without Scribd premium | Requires Scribd premium for full download | - +============+=====================================+===========================================+ - | Documents | Yes | No | - +------------+-------------------------------------+-------------------------------------------+ - | Books | Yes | Yes | - +------------+-------------------------------------+-------------------------------------------+ - | Audiobooks | Yes | Yes | - +------------+-------------------------------------+-------------------------------------------+ - - **Some information about Scribd documents:** - - There are two types of documents on Scribd: - - - Documents made up using a collection of images and - - Actual documents where the text can be selected, copied etc. - - This script takes a different approach to both of them: - - - Documents consisting of a collection of images is straightforward and - this script will simply download the induvidual images which can - be combined to ``.pdf`` by passing ``--pdf`` option to the tool. Simple. - - - Actual documents where the text can be selected are hard to tackle. - If we feed such a document to this tool, only the text present in - document will be downloaded. Scribd seems to use javascript to somehow - combine text and images. So far, I haven't been able to combine them - with Python in a way they look like the original document. - - ------------ - Installation - ------------ - - Make sure you're using Python 3 (Python 2 is not supported by a few dependencies). - Then run these commands: - - :: - - $ pip install scribd-downloader - - or install the development version with: - - :: - - $ python setup.py install - - ----- - Usage - ----- - - :: - - usage: scribdl [-h] [-i] [-p] URL - - Download documents and books from scribd.com - - positional arguments: - URL scribd url to download - - optional arguments: - -h, --help show this help message and exit - -i, --images download url made up of images - -p, --pdf convert to pdf (*Nix: imagemagick) - -c CREDENTIALS_FILE, --credentials-file CREDENTIALS_FILE - path to file containing your Scribd premium - credentials - - -------- - Examples - -------- - - Scribd Documents - ---------------- - Downloading text from document containing selectable text: - :: - $ scribdl https://www.scribd.com/document/55949937/33-Strategies-of-War - - (Text will be saved side by side in a ``.md`` file in the current - working directory) - - Download document containing images; use the ``--images`` option (the tool cannot figure out this on its own): - :: - $ scribdl -i https://scribd.com/doc/17142797/Case-in-Point - - (Images will be saved in the current working directory) - - Scribd Books - ------------ - The below command will generate an ``.md`` file of the book in the current working directory: - :: - $ scribdl https://www.scribd.com/read/189087235/Confessions-of-a-Casting-Director-Help-Actors-Land-Any-Role-with-Secrets-from-Inside-the-Audition-Room - - Pass ``--pdf`` option to convert the generated output to a PDF. - - This will only dowload the book content available without owning a premium account on Scribd. - See the below section for downloading full books if you own a premium Scribd account. - - Scribd Audiobooks - ----------------- - This will download .mp3 of the audiobook: - :: - $ scribdl https://www.scribd.com/audiobook/237606860/100-Ways-to-Motivate-Yourself-Change-Your-Life-Forever - - This will only download the preview version of the audiobook. See the below section for - downloading complete audiobooks if you own a premium Scribd account. - - ------------------------------------------------- - Downloading complete textual books and audiobooks - ------------------------------------------------- - - If you have a premium Scribd account, you can also download the full version of - textual books and audiobooks. - - Create a text file containing your Scribd credentials, such that the contents of the file look like below: - :: - user@mail.com - password - - - Now pass the file path to the ``-c`` option, for example: - :: - $ scribdl -c scribd_credentials.txt https://www.scribd.com/audiobook/359295794/Principles-Life-and-Work - - It should then download you all the audiobook chapters as mp3. Similarly, you could also download complete - contents of a Scribd book by replacing the URL with the URL of your choice. - - If you're not willing to use place your account credentials in a file, you could also copy the cookie values - for ``_scribd_session`` and ``_scribd_expire`` when logged into your premium account on scribd on the web - browser and replace them with the ones in this file https://github.com/ritiek/scribd-downloader/blob/master/scribdl/const.py. - - You should then be able to automatically download full version of both textual books and audiobooks - from Scribd using the tool by running the commands as usual. - - ---------- - Disclaimer - ---------- - - Downloading books from Scribd for free maybe prohibited. This tool is - meant for educational purposes only. Please support the authors by buying - their titles. - - ------- - License - ------- - - ``The MIT License`` - - .. |PyPi Version| image:: https://img.shields.io/pypi/v/scribd-downloader.svg - :target: https://pypi.org/project/scribd-downloader - - .. |Build Status| image:: https://travis-ci.org/ritiek/scribd-downloader.svg?branch=master - :target: https://travis-ci.org/ritiek/scribd-downloader - - .. |Coverage Status| image:: https://codecov.io/gh/ritiek/scribd-downloader/branch/master/graph/badge.svg - :target: https://codecov.io/gh/ritiek/scribd-downloader - - .. _Mitmproxy: https://github.com/mitmproxy/mitmproxy - - .. _Erik Fong: mailto:dlscrib@gmail.com - .. _BookURL: https://www.scribd.com/read/189087235/Confessions-of-a-Casting-Director-Help-Actors-Land-Any-Role-with-Secrets-from-Inside-the-Audition-Room - .. ConstantValues: - -Keywords: scribd-downloader,documents,command-line,python -Platform: UNKNOWN diff --git a/scribd_downloader.egg-info/SOURCES.txt b/scribd_downloader.egg-info/SOURCES.txt deleted file mode 100644 index b763265..0000000 --- a/scribd_downloader.egg-info/SOURCES.txt +++ /dev/null @@ -1,32 +0,0 @@ -README.rst -setup.cfg -setup.py -scribd_downloader.egg-info/PKG-INFO -scribd_downloader.egg-info/SOURCES.txt -scribd_downloader.egg-info/dependency_links.txt -scribd_downloader.egg-info/entry_points.txt -scribd_downloader.egg-info/requires.txt -scribd_downloader.egg-info/top_level.txt -scribdl/__init__.py -scribdl/authorize.py -scribdl/command_line.py -scribdl/const.py -scribdl/downloader.py -scribdl/exceptions.py -scribdl/internals.py -scribdl/pdf_converter.py -scribdl/version.py -scribdl/content/__init__.py -scribdl/content/audiobook.py -scribdl/content/base.py -scribdl/content/book.py -scribdl/content/document.py -scribdl/content/test/__init__.py -scribdl/content/test/test_audiobook.py -scribdl/content/test/test_base.py -scribdl/content/test/test_book.py -scribdl/content/test/test_document.py -scribdl/test/__init__.py -scribdl/test/test_command_line.py -scribdl/test/test_download.py -scribdl/test/test_internals.py \ No newline at end of file diff --git a/scribd_downloader.egg-info/dependency_links.txt b/scribd_downloader.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/scribd_downloader.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/scribd_downloader.egg-info/entry_points.txt b/scribd_downloader.egg-info/entry_points.txt deleted file mode 100644 index e3b8b08..0000000 --- a/scribd_downloader.egg-info/entry_points.txt +++ /dev/null @@ -1,3 +0,0 @@ -[console_scripts] -scribdl = scribdl.command_line:_command_line - diff --git a/scribd_downloader.egg-info/requires.txt b/scribd_downloader.egg-info/requires.txt deleted file mode 100644 index 9c4fa0a..0000000 --- a/scribd_downloader.egg-info/requires.txt +++ /dev/null @@ -1,4 +0,0 @@ -requests>=2.19.1 -BeautifulSoup4>=4.6.3 -img2pdf>=0.3.1 -md2pdf>=0.4 diff --git a/scribd_downloader.egg-info/top_level.txt b/scribd_downloader.egg-info/top_level.txt deleted file mode 100644 index 88a066c..0000000 --- a/scribd_downloader.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -scribdl From b361bcf18ac63cbe223834c69e6a847ceace9ab0 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:30:02 +0300 Subject: [PATCH 10/13] add build.yml --- scribdl/__init__.py | 10 - scribdl/authorize.py | 60 ----- scribdl/command_line.py | 61 ----- scribdl/const.py | 8 - scribdl/content/__init__.py | 0 scribdl/content/audiobook.py | 297 ------------------------- scribdl/content/base.py | 65 ------ scribdl/content/book.py | 186 ---------------- scribdl/content/document.py | 204 ----------------- scribdl/content/test/__init__.py | 0 scribdl/content/test/test_audiobook.py | 87 -------- scribdl/content/test/test_base.py | 26 --- scribdl/content/test/test_book.py | 26 --- scribdl/content/test/test_document.py | 43 ---- scribdl/downloader.py | 105 --------- scribdl/exceptions.py | 12 - scribdl/internals.py | 40 ---- scribdl/pdf_converter.py | 48 ---- scribdl/test/__init__.py | 0 scribdl/test/test_command_line.py | 44 ---- scribdl/test/test_download.py | 47 ---- scribdl/test/test_internals.py | 15 -- scribdl/version.py | 1 - setup.py | 2 - 24 files changed, 1387 deletions(-) delete mode 100644 scribdl/__init__.py delete mode 100644 scribdl/authorize.py delete mode 100644 scribdl/command_line.py delete mode 100644 scribdl/const.py delete mode 100644 scribdl/content/__init__.py delete mode 100644 scribdl/content/audiobook.py delete mode 100644 scribdl/content/base.py delete mode 100644 scribdl/content/book.py delete mode 100644 scribdl/content/document.py delete mode 100644 scribdl/content/test/__init__.py delete mode 100644 scribdl/content/test/test_audiobook.py delete mode 100644 scribdl/content/test/test_base.py delete mode 100644 scribdl/content/test/test_book.py delete mode 100644 scribdl/content/test/test_document.py delete mode 100644 scribdl/downloader.py delete mode 100644 scribdl/exceptions.py delete mode 100644 scribdl/internals.py delete mode 100644 scribdl/pdf_converter.py delete mode 100644 scribdl/test/__init__.py delete mode 100644 scribdl/test/test_command_line.py delete mode 100644 scribdl/test/test_download.py delete mode 100644 scribdl/test/test_internals.py delete mode 100644 scribdl/version.py diff --git a/scribdl/__init__.py b/scribdl/__init__.py deleted file mode 100644 index 974a0b3..0000000 --- a/scribdl/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .version import __version__ - -from .downloader import Downloader - -from .content.document import ScribdTextualDocument -from .content.document import ScribdImageDocument -from .content.book import ScribdBook -from .content.audiobook import ScribdAudioBook - -from .pdf_converter import ConvertToPDF diff --git a/scribdl/authorize.py b/scribdl/authorize.py deleted file mode 100644 index 0fa7401..0000000 --- a/scribdl/authorize.py +++ /dev/null @@ -1,60 +0,0 @@ -import requests -import json -from bs4 import BeautifulSoup -from . import const -from . import exceptions - -SCRIBD_LOGIN_URL = "https://www.scribd.com/login" - -SCRIBD_LOGIN_HEADERS = { - "X-Requested-With": "XMLHttpRequest" -} - -SCRIBD_LOGIN_DATA = { - "signup_location": "https://www.scribd.com/" -} - - -def set_credentials(filepath): - """ - Reads username and password for Scribd premium account - from the file passed and overrides the default values - for headers and cookies. - """ - login_page = requests.get(SCRIBD_LOGIN_URL) - login_cookies = login_page.cookies - - with open(filepath, "r") as in_file: - content = in_file.read() - username, password = content.split() - - SCRIBD_LOGIN_DATA["login_or_email"] = username - SCRIBD_LOGIN_DATA["login_password"] = password - - # - soup = BeautifulSoup(login_page.text, features="html5lib") - csrf = soup.find("meta", dict(name="csrf-token")) - if csrf: - SCRIBD_LOGIN_HEADERS["X-CSRF-Token"] = csrf.attrs['content'] - - response = requests.post(SCRIBD_LOGIN_URL, - headers=SCRIBD_LOGIN_HEADERS, - cookies=login_cookies, - json=SCRIBD_LOGIN_DATA) - - if response.status_code != 200: - raise exceptions.ScribdFetchError("Login failed with status " + str(response.status_code)) - - #print(response.text) - result = json.loads(response.text) - # {"login":true,"success":true,"user":{"id":514698173}} - if not "login" in result or not result["login"]: - # {"form_name":null,"errors":[{"input_name":"login_or_email","msg":"No account found with that email or username. Please try again or sign up."}]} - errors = result["errors"] - if errors: - raise exceptions.ScribdFetchError("Login error: " + errors[0]["msg"]) - - const.premium_cookies["_scribd_session"] = response.cookies["_scribd_session"] - const.premium_cookies["_scribd_expire"] = response.cookies["_scribd_expire"] - - return response diff --git a/scribdl/command_line.py b/scribdl/command_line.py deleted file mode 100644 index a68d842..0000000 --- a/scribdl/command_line.py +++ /dev/null @@ -1,61 +0,0 @@ -import argparse - -from .downloader import Downloader -from . import authorize - - -def get_arguments(): - """ - Parses arguments off the command-line. - """ - parser = argparse.ArgumentParser( - description="Download documents and books from scribd.com" - ) - - parser.add_argument("url", metavar="URL", type=str, help="scribd url to download") - parser.add_argument( - "-i", - "--images", - help="download url made up of images", - action="store_true", - default=False, - ) - parser.add_argument( - "-p", - "--pdf", - help="convert to pdf (*Nix: imagemagick)", - action="store_true", - default=False, - ) - parser.add_argument( - "-c", - "--credentials-file", - help="path to file containing your Scribd premium credentials", - ) - - return parser - - -def _command_line(): - """ - This function that gets executed when called via command-line. - """ - parser = get_arguments() - args = parser.parse_args() - url = args.url - pdf = args.pdf - images = args.images - - if args.credentials_file: - credentials_file = args.credentials_file - authorize.set_credentials(credentials_file) - - scribd_link = Downloader(url) - downloaded_content = scribd_link.download(is_image_document=images) - if pdf: - print("\nConverting to {}..".format(downloaded_content.pdf_path)) - downloaded_content.to_pdf() - - -if __name__ == "__main__": - _command_line() diff --git a/scribdl/const.py b/scribdl/const.py deleted file mode 100644 index 4d18ef1..0000000 --- a/scribdl/const.py +++ /dev/null @@ -1,8 +0,0 @@ -# Replace these values with ones generated for your web-browser when -# logged into a Scribd premium-account. This will allow access to -# full audiobooks. - -premium_cookies = { - "_scribd_session": "eyJzZXNzaW9uX2lkIjoiqmQxZTM0MjUzMzZhMTMzMzgwZTc2ODg5ZGQ3ZjVkNmEiLCJfY3NyZl90b2tlbiI6IlY4bmhMRXo4S2RFZ0g2TnF1amZwUHNIVFBWZ1Z0SVhHTGZNTkF1Ull2NEU9IiwiY2FydDMiOnsiY2FydF9pdGVtcyI6W3sidGl0bGUiOiJNb250aCBQYXNzIiwiZGVzY3JpcHRpb24iOiIxIG1vbnRoIHVubGltaXRlZCBhY2Nlc3MgdG8gU2NyaWJkIGRvY3VtZW50cyIsInByaWNlIjp7ImFtb3VudCI6ODk5LCJjdXJyZW5jeSI6IlVTRCJ9LCJwdXJjaGFzZV9jbGFzc19uYW1lIjoiUGF5bWVudHM6OkFyY2hpdmVQdXJjaGFzZSIsInB1cmNoYXNlX2F0dHJpYnV0ZXMiOnt9fV0sInN1bW1hcnkiOiJSZWN1cnJpbmcgTW9udGhseSBNZW1iZXJzaGlwIGZvciAkOC45OS9tb250aCIsInN1YnNjcmlwdGlvbiI6dHJ1ZSwicHJvZHVjdF9oYW5kbGUiOiJzY3JpYmQtcG1wLW1vbnRobHktdW5saW1pdGVkLXN1YnNjcmlwdGlvbi05MDAiLCJzdWJzY3JpcHRpb25fZHVyYXRpb24iOiIxLm1vbnRoIiwiY2hlY2tvdXRfdGl0bGUiOiJSZWFkIGJvb2tzLCBhdWRpb2Jvb2tzLCBhbmQgbW9yZSBvbiBhbnkgZGV2aWNlIiwiY2FsbGJhY2tfY2xhc3NfbmFtZSI6IkFyY2hpdmVGbG93Q29udHJvbGxlciIsImFmdGVyX2NoZWNrb3V0X2xpbmsiOnsibGFiZWwiOiJCYWNrIHRvIFNjcmliZCIsInVybCI6Imh0dHBzOi8vd3d3LnNjcmliZC5jb20vIn0sIm1ldGFkYXRhIjp7ImNvbnRleHQiOiJwbXAiLCJwYWdlIjoiaG9tZSIsImFjdGlvbiI6ImV4cGlyZWRfYm9va19wcmV2aWV3IiwicmVzdG9yZV9wYWdlIjoiaG9tZSIsInJlc3RvcmVfYWN0aW9uIjoiZXhwaXJlZF9ib29rX3ByZXZpZXciLCJsb2dnZWRfaW4iOnRydWUsInBsYXRmb3JtIjoid2ViIiwiZ2FfY2xpZW50X2lkIjoiMTQ0OTM2ODE5MS4xNTQ3NjQ3MDQxIn0sIm1lc3NhZ2UiOiJZb3UgaGF2ZSBjb21wbGV0ZWQgeW91ciBmcmVlIHRyaWFsIGFuZCB3aWxsIGJlIGNoYXJnZWQgJDguOTkvbW9udGggZm9yIHRoZSBtZW1iZXJzaGlwLiIsInBheXBhbF9kaXNhYmxlZCI6ZmFsc2UsImV4dGlyZXNfYXQiOjE1NDgzOTEzNjN9LCJyIjoiMTU0NzkwNDY2MiIsIndvcmRfaWQiOjc1OTU5OTY5LCJwIjoxMzg2NjM1MzA0LCJsYXN0X3JlYXV0aCI6MTU0NzkwNDY2Mn0%3D--c979f3a1da6282d3f497d45d0324b4802ebcfcdc", - "_scribd_expire": "1547904862", -} diff --git a/scribdl/content/__init__.py b/scribdl/content/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/scribdl/content/audiobook.py b/scribdl/content/audiobook.py deleted file mode 100644 index a49228d..0000000 --- a/scribdl/content/audiobook.py +++ /dev/null @@ -1,297 +0,0 @@ -from bs4 import BeautifulSoup -import requests -import json -import re - -from .base import ScribdBase -from .. import internals -from .. import const -from .. import exceptions - - -class Track: - """ - A class for an audio chapter in a Scribd audiobook playlist. - - Parameters - ---------- - track: `dict` - A dictionary information about an audiobook chapter - containing the keys: "url", "part_number" and "chapter_number". - """ - - def __init__(self, track): - self.url = track["url"] - self.part_number = track["part_number"] - self.chapter_number = track["chapter_number"] - self._track = track - - def download(self, path): - """ - Downloads the audiobook chapter to the given path. - """ - internals.download_stream(self.url, path) - - -class Playlist: - """ - A class for a Scribd audiobook playlist. - - Parameters - ---------- - title: `str` - The title of the audiobook. - - playlist: `dict` - A dictionary information about an audiobook playlist and - its tracks containing the keys: "playlist", "expires" and - "playlist_token". - """ - - def __init__(self, title, playlist): - self.title = title - self.sanitized_title = internals.sanitize_title(title) - self.tracks = [ Track(track) for track in playlist["playlist"] ] - self._playlist = playlist - self.download_paths = [] - - def download(self): - """ - Downloads all the chapters available in the playlist. - """ - for track in self.tracks: - path = "{0}_{1}.mp3".format(self.sanitized_title, track.chapter_number) - dl_str = 'Downloading chapter-{0} ({1}) to "{2}"'.format(track.chapter_number, - track.url, - path) - print(dl_str) - track.download(path) - self.download_paths.append(path) - - -class ScribdAudioBook(ScribdBase): - """ - A base class for downloading audiobooks off Scribd. - - Parameters - ---------- - url: `str` - A string containing Scribd audiobook URL. - """ - - def __init__(self, audiobook_url): - super().__init__(audiobook_url) - scribd_id_search = re.search("[0-9]{9}", audiobook_url) - scribd_id = scribd_id_search.group() - - self._audiobook_keys= None - self._book_id = None - self._author_id = None - self._license_id = None - self._playlist = None - - self.audiobook_url = audiobook_url - self.scribd_id = scribd_id - - # Replace these cookie values with ones generated when logged into a - # Scribd premium-account. This will allow access to full audiobooks. - self.cookies = const.premium_cookies - - @property - def audiobook_keys(self): - """ - Stores scraped information for an audiobook. - """ - if not self._audiobook_keys: - audiobook_keys = self._scrape_audiobook_page() - try: - authenticate_page_keys = self._scrape_authentication_page() - except exceptions.ScribdFetchError: - pass - else: - audiobook_keys.update(authenticate_page_keys) - self._audiobook_keys = audiobook_keys - return self._audiobook_keys - - @property - def session_key(self): - """ - Returns the Scribd session key used to communicate with https://api.findawayworld.com/. - """ - try: - session_key = self.audiobook_keys["audiobook"]["session_key"] - except KeyError: - session_key = None - return session_key - - @property - def headers(self): - """ - Constructs headers to pass with the network request. - """ - return {"Session-Key": self.session_key} - - @property - def preview_url(self): - """ - The free-to-access URL of the audiobook. - """ - return self.audiobook_keys["preview_url"] - - @property - def book_id(self): - """ - The Book-ID of the audiobook. - """ - if not self._book_id: - audiobook = self.audiobook_keys - self._book_id = audiobook["book_id"] - return self._book_id - - @property - def author_id(self): - """ - The Author-ID of Scribd used to authenticate with - https://api.findawayworld.com/. - """ - if not self._author_id: - audiobook = self.audiobook_keys - self._author_id = audiobook["author_id"] - return self._author_id - - @property - def license_url(self): - """ - Returns the URL used to fetch the License-ID. - """ - return "https://api.findawayworld.com/v4/accounts/scribd-{0}/audiobooks/{1}".format(self.author_id, self.book_id) - - @property - def license_id(self): - """ - Returns the License-ID to be used by Scribd to fetch - the audiobook content from http://api.findawayworld.com/. - """ - if not self._license_id: - try: - self._license_id = self._get_license_id() - except exceptions.ScribdFetchError: - self._license_id = None - return self._license_id - - @property - def playlist_url(self): - """ - Returns the audiobook playlist URL. - """ - return "https://api.findawayworld.com/v4/audiobooks/{}/playlists".format(self.book_id) - - @property - def authenticate_url(self): - """ - Authentication URL for premium Scribd accounts - (if this didn't exist, we would have been able to download - complete audiobooks off Scribd without needing a premium account). - """ - return "https://www.scribd.com/listen/{}".format(self.scribd_id) - - @property - def premium_cookies(self): - """ - Returns a boolean based on whether the user is authenticated - with a premium Scribd account. - """ - try: - premium_cookies = bool(self.license_id) - except exceptions.ScribdFetchError: - premium_cookies = False - return premium_cookies - - @property - def playlist(self): - """ - Returns a `Playlist` object. - """ - if not self._playlist: - self._playlist = Playlist(self.title, self.make_playlist()) - return self._playlist - - def _get_license_id(self): - """ - Scrapes the License-ID for the audiobook. We need to handle retries - as Scribd can sometimes fail to deliver the License-ID in the HTML. - """ - requests.get(self.authenticate_url, cookies=self.cookies) - response = requests.get(self.license_url, headers=self.headers) - response_dict = json.loads(response.text) - try: - license_id = response_dict["licenses"][0]["id"] - except KeyError: - raise exceptions.ScribdFetchError("Unable to fetch the License ID for the audiobook. This attribute" - "is only available when using a premium Scribd account.") - else: - return license_id - - def download(self): - raise NotImplementedError("Use method `ScribdAudioBook.playlist.download` instead.") - - def _scrape_audiobook_page(self): - """ - Scrapes the provided audiobook URL for information scraps. - """ - response = requests.get(self.audiobook_url, cookies=self.cookies) - # response = requests.get(self.audiobook_url) - soup = BeautifulSoup(response.text, "html.parser") - - div_tag = soup.find("div", {"data-track_category": "book_preview"}) - text = json.loads(div_tag["data-push_state"]) - preview_url = text["audiobook_sample_url"] - book_id_search = re.search("[0-9]{5,6}", preview_url) - book_id = book_id_search.group() - - js_tag = soup.find_all("script", {"type": "text/javascript"})[-1] - js_code = js_tag.get_text() - author_id_search = re.search("[0-9]{8,9}", js_code) - author_id = author_id_search.group() if author_id_search else None - - return {"preview_url": preview_url, "book_id": book_id, "author_id": author_id} - - def _scrape_authentication_page(self): - """ - Scrapes the authentication/listen page of the audiobook - for information scraps. - """ - response = requests.get(self.authenticate_url, cookies=self.cookies) - soup = BeautifulSoup(response.text, "html.parser") - js_tag = soup.find_all("script", {"type": "text/javascript"})[-2] - - try: - start = response.text[response.text.find('{"eor_url":'):] - raw_info_str, *_ = start.split("\n") - final_curlbrace = -(raw_info_str[::-1].find("}")) - info_str = raw_info_str[:final_curlbrace] - info_dict = json.loads(info_str) - info_dict["pingback_url"] = "".join(info_dict["pingback_url"]) - except ValueError: - raise exceptions.ScribdFetchError("Unable to fetch information via the authentication page for the" - "audiobook. This is only available when using a premium Scribd account.") - else: - return info_dict - - def make_playlist(self): - """ - Generates a playlist dictionary based on whether the user - is authenticated with a premium Scribd account or not. - """ - if self.premium_cookies: - data = '{"license_id":"' + self.license_id + '"}' - response = requests.post(self.playlist_url, headers=self.headers, data=data) - playlist = json.loads(response.text) - else: - playlist = {"playlist": [{"url": self.preview_url, - "part_number": "preview", - "chapter_number": "preview"}], - "expires": None, - "playlist_token": None} - - return playlist diff --git a/scribdl/content/base.py b/scribdl/content/base.py deleted file mode 100644 index 1af999f..0000000 --- a/scribdl/content/base.py +++ /dev/null @@ -1,65 +0,0 @@ -from bs4 import BeautifulSoup -import requests -from abc import ABCMeta, abstractmethod -import six - -from .. import internals - - -@six.add_metaclass(ABCMeta) -class ScribdBase: - """ - A base class for Scribd books, documents and audiobooks. - - Parameters - ---------- - url : `str` - A string containing Scribd URL. - """ - - def __init__(self, url): - self.url = url - self._title = None - self._sanitized_title = None - self._hidden_soup = None - - @property - def title(self): - """ - Scrapes the title of the Scribd document. - """ - if not self._title: - title = self._soup.find("h1").get_text() - # this unneed prefix may happen on textual books - unneeded_prefix = "Currently Reading: " - if title.startswith(unneeded_prefix): - title = title[len(unneeded_prefix):] - self._title = title - return self._title - - @property - def sanitized_title(self): - """ - Remove special characters from title to make - it suitable for filenames. - """ - if not self._sanitized_title: - self._sanitized_title = internals.sanitize_title(self.title) - return self._sanitized_title - - @abstractmethod - def download(self): - """ - An abstract method for fetching content off Scribd book or document. - """ - pass - - @property - def _soup(self): - """ - Parse HTML. - """ - if not self._hidden_soup: - response = requests.get(self.url) - self._hidden_soup = BeautifulSoup(response.text, "html.parser") - return self._hidden_soup diff --git a/scribdl/content/book.py b/scribdl/content/book.py deleted file mode 100644 index 5657fb1..0000000 --- a/scribdl/content/book.py +++ /dev/null @@ -1,186 +0,0 @@ -import requests -import json -import os - -from .base import ScribdBase -from .. import internals -from .. import const - - -class ScribdBook(ScribdBase): - """ - A class for downloading books off Scribd. - - Parameters - ---------- - url : `str` - A string containing Scribd book URL. - """ - - def __init__(self, book_url): - super().__init__(book_url) - self.filename = self.sanitized_title + ".md" - self.url = book_url - self._book_id = None - self._csrf_token = None - - @property - def book_id(self): - """ - Extracts the book ID. - """ - if not self._book_id: - splits = self.url.split("/") - for split in splits: - try: - book_id = int(split) - except ValueError: - continue - self._book_id = book_id - return self._book_id - - @property - def csrf_token_header(self): - """ - CSRF-Token is used to gain access to premium content - in textual books premium content of audiobooks can still - be downloaded without it though. - """ - if not self._csrf_token: - csrf_token_url = "https://scribd.com/csrf_token" - response = requests.get(csrf_token_url, cookies=const.premium_cookies) - json_dict = json.loads(response.text) - self._csrf_token = {"X-CSRF-Token": json_dict["csrf_token"]} - return self._csrf_token - - def download(self, filename=None): - """ - Processing text and image extraction. - """ - if not filename: - filename = self.filename - - token = self._get_token() - chapter = 1 - - while True: - response = self.fetch_response(chapter, token) - - if response.status_code == 403: - token = self._get_token() - response = self.fetch_response(chapter, token) - - if response.status_code == 403: - print("No more content being exposed by Scribd!") - break - - try: - json_response = json.loads(response.text) - except ValueError: - print("Completed downloading book!") - break - - self._extract_text_blocks(json_response, chapter, token, filename) - - chapter += 1 - - return filename - - def _extract_text(self, content, chapter, token): - """ - Extracts text given a block of raw html. - """ - words = [] - for word in content["words"]: - if word.get("break_map", None): - words.append(word["break_map"]["text"]) - elif word.get("text", None): - words.append(word["text"]) - elif word.get("type", None) == "image": - image_url = self._format_image_url(chapter, word["src"], token) - string_text = self._process_image_text(word, image_url) - words.append(string_text) - else: - words += self._extract_text(word, chapter, token) - return words - - def fetch_response(self, chapter, token): - url = self._format_content_url(chapter, token) - response = requests.get(url) - return response - - def _extract_text_blocks(self, response_dict, chapter, token, filename): - """ - Extracts small blocks of raw book text and image - URLs and writes them to a file. - """ - for block in response_dict["blocks"]: - if block["type"] == "text": - string_text = ( - " ".join(self._extract_text(block, chapter, token)) + "\n\n" - ) - elif block["type"] == "image": - image_url = self._format_image_url(chapter, block["src"], token) - string_text = self._process_image_text(block, image_url) - - if block["type"] in ("text", "image"): - print(string_text) - self.save_text(string_text, filename) - - def _process_image_text(self, block, image_url): - image_name = block["src"].replace("images/", "") - image_path = os.path.join(self.sanitized_title, image_name) - self._download_image(image_url, image_path) - string_text = "![{}]({})\n\n".format(image_name, image_path) - return string_text - - def _download_image(self, url, path): - try: - os.makedirs(os.path.dirname(path)) - except OSError: - pass - internals.download_stream(url, path) - - def _extract_image_path_from_url(self, url): - image_name = url.split("/")[-1].split("?token=")[0] - return os.path.join(self.book_id, image_name) - - def _format_content_url(self, chapter, token): - """ - Generates a string which points to a URL containing - the raw book text. - """ - unformatted_url = ( - "https://www.scribd.com/scepub/{}/chapters/{}/contents.json?token={}" - ) - return unformatted_url.format(self.book_id, chapter, token) - - def _format_image_url(self, chapter, image, token): - """ - Generates a string which points to an image URL. - """ - unformatted_url = "https://www.scribd.com/scepub/{}/chapters/{}/{}?token={}" - return unformatted_url.format(self.book_id, chapter, image, token) - - def _get_token(self): - """ - Fetches a uniquely generated token for the current - session. - """ - # data can take take any value but it must take some value - # otherwise Scribd will reject the request - data = "data" - - token_url = "https://www.scribd.com/read2/{}/access_token".format(self.book_id) - token = requests.post(token_url, - headers=self.csrf_token_header, - cookies=const.premium_cookies, - data=data) - return json.loads(token.text)["response"] - - def save_text(self, string_text, filename): - """ - Appends text to the passed file. - """ - with open(filename, "a", encoding="utf-8") as f: - f.write(string_text) diff --git a/scribdl/content/document.py b/scribdl/content/document.py deleted file mode 100644 index 5083d43..0000000 --- a/scribdl/content/document.py +++ /dev/null @@ -1,204 +0,0 @@ -from bs4 import BeautifulSoup -import requests - -import os - -from abc import abstractmethod -from .base import ScribdBase -from .. import internals - - -class ScribdDocument(ScribdBase): - """ - A base class for downloading documents off Scribd. - - Parameters - ---------- - url : `str` - A string containing Scribd document URL. - """ - - def __init__(self, document_url): - super().__init__(document_url) - self.url = document_url - self._jsonp_urls = None - self._hidden_soup = None - - @property - def jsonp_urls(self): - """ - Extracts all URLs ending with '.jsonp' by parsing the - HTML code. - """ - if not self._jsonp_urls: - js_text = self._soup.find_all("script", type="text/javascript") - jsonp_urls = [] - for opening in js_text: - for inner_opening in opening: - jsonp = self._extract_jsonp_url(inner_opening) - if jsonp: - jsonp_urls.append(jsonp) - self._jsonp_urls = jsonp_urls - return self._jsonp_urls - - def _extract_jsonp_url(self, inner_opening): - """ - Extracts URLs ending with '.jsonp'. These URLs contain the - raw document text. - """ - portion1 = inner_opening.find("https://") - - if portion1 == -1: - jsonp = None - else: - portion2 = inner_opening.find(".jsonp") - jsonp = inner_opening[portion1 : portion2 + 6] - - return jsonp - - @abstractmethod - def download(self): - """ - An abstract method which will fetch the actual content - found in the '.jsonp' URLs. - """ - pass - - -class ScribdTextualDocument(ScribdDocument): - """ - A class for downloading textual documents off Scribd. - - Parameters - ---------- - document_url : `str` - A string containing Scribd document URL. - """ - - def __init__(self, document_url): - super().__init__(document_url) - self.filename = self.sanitized_title + ".md" - - def download(self, filename=None): - """ - Generates the filename and processes the text extraction - to this file. - """ - if not filename: - filename = self.filename - - print("Extracting text to", self.sanitized_title, "\n") - self._text_extractor(filename) - return filename - - def _text_extractor(self, filename): - """ - Saves text from every '.jsonp' URL. - """ - for jsonp_url in self.jsonp_urls: - self._save_text(jsonp_url, filename) - - def _save_text(self, jsonp, filename): - """ - Makes a GET request to the '.jsonp' URL and saves - the text to the passed file. - """ - response = requests.get(jsonp).text - page_no = response[11:12] - - response_head = ( - (response) - .replace("window.page" + page_no + '_callback(["', "") - .replace("\\n", "") - .replace("\\", "") - .replace('"]);', "") - ) - soup_content = BeautifulSoup(response_head, "html.parser") - - for x in soup_content.find_all("span", {"class": "a"}): - xtext = internals.fix_encoding(x.get_text()) - print(xtext) - - extraction = xtext + "\n\n" - with open(filename, "a") as feed: - feed.write(extraction) - - -class ScribdImageDocument(ScribdDocument): - """ - A class for downloading image documents off Scribd. - - Parameters - ---------- - document_url : `str` - A string containing Scribd document URL. - """ - - def __init__(self, document_url): - super().__init__(document_url) - self._image_download_counter = 1 - - def download(self, initial_filename=None): - """ - Function for downloading images off '.jsonp' URLs to - filenames. - """ - if not initial_filename: - initial_filename = self.sanitized_title - - downloaded_html_images = self._html_image_extractor(initial_filename) - downloaded_jsonp_images = self._jsonp_image_extractor(initial_filename) - return downloaded_html_images + downloaded_jsonp_images - - def _jsonp_image_extractor(self, initial_filename): - """ - Extract images from extracted .jsonp URLs. - """ - downloaded_images = [] - found = self._image_download_counter > 1 - for jsonp_url in self.jsonp_urls: - filename = "{}_{}.jpg".format(initial_filename, self._image_download_counter) - img_url = self._convert_jsonp_url_to_image_url(jsonp_url, found=found) - self._save_image(img_url, filename) - downloaded_images.append(filename) - self._image_download_counter += 1 - return downloaded_images - - def _html_image_extractor(self, initial_filename): - """ - Extracts images that are directly embedded in the original - HTML page. - """ - downloaded_images = [] - absimg = self._soup.find_all("img", {"class": "absimg"}, src=True) - for img in absimg: - filename = "{}_{}.jpg".format(initial_filename, self._image_download_counter) - self._save_image(img["src"], filename) - downloaded_images.append(filename) - self._image_download_counter += 1 - return downloaded_images - - def _convert_jsonp_url_to_image_url(self, jsonp_url, found): - """ - Gets the image URL corresponding to the '.jsonp' URL. - """ - if jsonp_url.endswith(".jsonp"): - replacement = jsonp_url.replace("/pages/", "/images/") - if found: - replacement = replacement.replace(".jsonp", "/000.jpg") - else: - replacement = replacement.replace(".jsonp", ".jpg") - else: - replacement = jsonp_url - return replacement - - def _save_image(self, url, imagename): - """ - Skips downloading if the image is already downloaded, - otherwise downloads it locally. - """ - print("Downloading", imagename) - already_present = os.listdir(".") - if imagename in already_present: - return - internals.download_stream(url, imagename) diff --git a/scribdl/content/test/__init__.py b/scribdl/content/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/scribdl/content/test/test_audiobook.py b/scribdl/content/test/test_audiobook.py deleted file mode 100644 index d918859..0000000 --- a/scribdl/content/test/test_audiobook.py +++ /dev/null @@ -1,87 +0,0 @@ -from .. import audiobook -from ... import exceptions - -import pytest - - -@pytest.fixture(scope="module") -def scribd_audiobook(): - return audiobook.ScribdAudioBook( - "https://www.scribd.com/audiobook/237606860/100-Ways-to-Motivate-Yourself-Change-Your-Life-Forever") - - -class TestScribdAudioBook: - def test_title(self, scribd_audiobook): - assert scribd_audiobook.title == "100 Ways to Motivate Yourself: Change Your Life Forever" - - def test_sanitized_title(self, scribd_audiobook): - assert scribd_audiobook.sanitized_title == "100_Ways_to_Motivate_Yourself__Change_Your_Life_Forever" - - def test_preview_url(self, scribd_audiobook): - assert scribd_audiobook.preview_url == "https://samples.findawayworld.com/19991/19991_sample.mp3" - - def test_scribd_id(self, scribd_audiobook): - assert scribd_audiobook.scribd_id == "237606860" - - def test_authenticate_url(self, scribd_audiobook): - assert scribd_audiobook.authenticate_url == "https://www.scribd.com/listen/237606860" - - def test_author_id(self, scribd_audiobook): - assert scribd_audiobook.author_id == None - - def test_book_id(self, scribd_audiobook): - assert scribd_audiobook.book_id == "19991" - - def test_playlist_url(self, scribd_audiobook): - assert scribd_audiobook.playlist_url == "https://api.findawayworld.com/v4/audiobooks/19991/playlists" - - def test_premium_cokies(self, scribd_audiobook): - assert scribd_audiobook.premium_cookies == False - - def test_license_url(self, scribd_audiobook): - assert scribd_audiobook.license_url == "https://api.findawayworld.com/v4/accounts/scribd-None/audiobooks/19991" - - def test_license_id(self, scribd_audiobook): - assert scribd_audiobook.license_id == None - - -class TestPlaylist: - def test_playlist_instance(self, scribd_audiobook): - assert isinstance(scribd_audiobook.playlist, audiobook.Playlist) - - def test_playlist_title(self, scribd_audiobook): - assert scribd_audiobook.playlist.title == "100 Ways to Motivate Yourself: Change Your Life Forever" - - def test_playlist_sanitized_title(self, scribd_audiobook): - assert scribd_audiobook.playlist.sanitized_title == "100_Ways_to_Motivate_Yourself__Change_Your_Life_Forever" - - def test_playlist_raw_content(self, scribd_audiobook): - raw_content = {'expires': None, - 'playlist': [{'chapter_number': 'preview', - 'part_number': 'preview', - 'url': 'https://samples.findawayworld.com/19991/19991_sample.mp3'}], - 'playlist_token': None} - assert scribd_audiobook.playlist._playlist == raw_content - - -class TestTrack: - def test_track_instance(self, scribd_audiobook): - assert isinstance(scribd_audiobook.playlist.tracks[0], audiobook.Track) - - def test_track_count(self, scribd_audiobook): - assert len(scribd_audiobook.playlist.tracks) == 1 - - def test_track_url(self, scribd_audiobook): - assert scribd_audiobook.playlist.tracks[0].url == "https://samples.findawayworld.com/19991/19991_sample.mp3" - - def test_track_part_number(self, scribd_audiobook): - assert scribd_audiobook.playlist.tracks[0].part_number == "preview" - - def test_track_chapter_number(self, scribd_audiobook): - assert scribd_audiobook.playlist.tracks[0].chapter_number == "preview" - - def test_track_raw_content(self, scribd_audiobook): - raw_content = {'chapter_number': 'preview', - 'part_number': 'preview', - 'url': 'https://samples.findawayworld.com/19991/19991_sample.mp3'} - assert scribd_audiobook.playlist.tracks[0]._track == raw_content diff --git a/scribdl/content/test/test_base.py b/scribdl/content/test/test_base.py deleted file mode 100644 index a481589..0000000 --- a/scribdl/content/test/test_base.py +++ /dev/null @@ -1,26 +0,0 @@ -from .. import base - -import pytest - - -def test_abstract_class(): - with pytest.raises(TypeError): - x = base.ScribdBase() - - -class ScribdBaseTop(base.ScribdBase): - def download(self): - pass - - -class TestScribdBase: - @pytest.fixture(scope="class") - def scribd_base(self): - return ScribdBaseTop( - "https://www.scribd.com/audiobook/367877343/Intelligence-in-Nature-An-Inquiry-into-Knowledge") - - def test_title(self, scribd_base): - assert scribd_base.title == "Intelligence in Nature: An Inquiry into Knowledge" - - def test_sanitized_title(self, scribd_base): - assert scribd_base.sanitized_title == "Intelligence_in_Nature__An_Inquiry_into_Knowledge" diff --git a/scribdl/content/test/test_book.py b/scribdl/content/test/test_book.py deleted file mode 100644 index b0743a6..0000000 --- a/scribdl/content/test/test_book.py +++ /dev/null @@ -1,26 +0,0 @@ -from .. import book - -import pytest - - -@pytest.fixture(scope="module") -def scribd_book(): - return book.ScribdBook( - "https://www.scribd.com/read/189087235/Confessions-of-a-Casting-Director-Help-Actors-Land-Any-Role-with-Secrets-from-Inside-the-Audition-Room") - - -class TestScribdBook: - def test_title(self, scribd_book): - assert scribd_book.title == "Confessions of a Casting Director: Help Actors Land Any Role with Secrets from Inside the Audition Room" - - def test_sanitized_title(self, scribd_book): - assert scribd_book.sanitized_title == "Confessions_of_a_Casting_Director__Help_Actors_Land_Any_Role_with_Secrets_from_Inside_the_Audition_Room" - - def test_filename(self, scribd_book): - assert scribd_book.filename == "Confessions_of_a_Casting_Director__Help_Actors_Land_Any_Role_with_Secrets_from_Inside_the_Audition_Room.md" - - def test_book_id(self, scribd_book): - assert scribd_book.book_id == 189087235 - - def test_url(self, scribd_book): - assert scribd_book.url == "https://www.scribd.com/read/189087235/Confessions-of-a-Casting-Director-Help-Actors-Land-Any-Role-with-Secrets-from-Inside-the-Audition-Room" diff --git a/scribdl/content/test/test_document.py b/scribdl/content/test/test_document.py deleted file mode 100644 index a2debfe..0000000 --- a/scribdl/content/test/test_document.py +++ /dev/null @@ -1,43 +0,0 @@ -from .. import document - -import pytest - - -@pytest.fixture(scope="module") -def scribd_textual_document(): - return document.ScribdTextualDocument( - "https://www.scribd.com/document/55949937/33-Strategies-of-War") - - -@pytest.fixture(scope="module") -def scribd_image_document(): - return document.ScribdImageDocument( - "https://scribd.com/doc/17142797/Case-in-Point") - - -class TestScribdTextualDocument: - def test_title(self, scribd_textual_document): - assert scribd_textual_document.title == "33 Strategies of War" - - def test_sanitized_title(self, scribd_textual_document): - assert scribd_textual_document.sanitized_title == "33_Strategies_of_War" - - def test_url(self, scribd_textual_document): - assert scribd_textual_document.url == "https://www.scribd.com/document/55949937/33-Strategies-of-War" - - def test_jsonp_urls(self, scribd_textual_document): - assert len(scribd_textual_document.jsonp_urls) == 19 - - -class TestScribdImageDocument: - def test_title(self, scribd_image_document): - assert scribd_image_document.title == "Case in Point" - - def test_sanitized_title(self, scribd_image_document): - assert scribd_image_document.sanitized_title == "Case_in_Point" - - def test_url(self, scribd_image_document): - assert scribd_image_document.url == "https://scribd.com/doc/17142797/Case-in-Point" - - def test_jsonp_urls(self, scribd_image_document): - assert len(scribd_image_document.jsonp_urls) == 182 diff --git a/scribdl/downloader.py b/scribdl/downloader.py deleted file mode 100644 index 6560dd4..0000000 --- a/scribdl/downloader.py +++ /dev/null @@ -1,105 +0,0 @@ -from bs4 import BeautifulSoup -import requests - -from .content.document import ScribdTextualDocument -from .content.document import ScribdImageDocument -from .content.book import ScribdBook -from .content.audiobook import ScribdAudioBook - -from .pdf_converter import ConvertToPDF - - -class Downloader: - """ - A helper class for downloading books and documents off Scribd. - - Parameters - ---------- - url : `str` - A string containing path to a Scribd URL - """ - - def __init__(self, url): - self.url = url - is_audiobook = self.is_audiobook() - if is_audiobook: - is_book = False - else: - is_book = self.is_book() - - self._is_audiobook = is_audiobook - self._is_book = is_book - - def download(self, is_image_document=None): - """ - Downloads books and documents from Scribd. - Returns an object of `ConvertToPDF` class. - """ - if self._is_audiobook: - content = self._download_audiobook() - return content - - if self._is_book: - content = self._download_book() - else: - if is_image_document is None: - raise TypeError( - "The input URL points to a document. You must specify " - "whether it is an image document or a textual document " - "in the `image_document` parameter." - ) - content = self._download_document(is_image_document) - return content - - def _download_book(self): - """ - Downloads books off Scribd. - Returns an object of `ConvertToPDF` class. - """ - book = ScribdBook(self.url) - md_path = book.download() - pdf_path = "{}.pdf".format(book.sanitized_title) - return ConvertToPDF(md_path, pdf_path) - - def _download_document(self, image_document): - """ - Downloads textual and image documents off Scribd. - Returns an object of `ConvertToPDF` class. - """ - if image_document: - document = ScribdImageDocument(self.url) - else: - document = ScribdTextualDocument(self.url) - - content_path = document.download() - pdf_path = "{}.pdf".format(document.sanitized_title) - return ConvertToPDF(content_path, pdf_path) - - def _download_audiobook(self): - """ - Downloads audiobooks off Scribd. - Returns a list containing local audio filepaths. - """ - audiobook = ScribdAudioBook(self.url) - playlist = audiobook.playlist - if not audiobook.premium_cookies: - print("Premium cookies not detected. Only the preview version of audiobook will be downloaded.") - playlist.download() - return playlist.download_paths - - def is_book(self): - """ - Checks whether the passed URL points to a Scribd book - or a Scribd document. - """ - response = requests.get(self.url) - soup = BeautifulSoup(response.text, "html.parser") - content_class = soup.find("body")["class"] - matches_with_book = content_class[0] == "autogen_class_views_layouts_book_web" - return matches_with_book - - def is_audiobook(self): - """ - Checks whether the passed URL points to a Scribd audiobook. - """ - return "/audiobook/" in self.url diff --git a/scribdl/exceptions.py b/scribdl/exceptions.py deleted file mode 100644 index 1a7da66..0000000 --- a/scribdl/exceptions.py +++ /dev/null @@ -1,12 +0,0 @@ -class ScribdFetchError(Exception): - """ - Handle exceptions for anything Scribd. - - Parameters - ---------- - message: `str` - Exception message. - """ - - def __init__(self, message): - super().__init__(message) diff --git a/scribdl/internals.py b/scribdl/internals.py deleted file mode 100644 index 8644276..0000000 --- a/scribdl/internals.py +++ /dev/null @@ -1,40 +0,0 @@ -import requests -import sys -import shutil - -GITHUB_URL_BASE = "https://github.com/ritiek/scribd-downloader" - - -def fix_encoding(query): - """ - Encoding fixes for Python 2 and Python 3 cross-compatibilty. - """ - if sys.version_info > (3, 0): - return query - else: - return query.encode("utf-8") - - -def sanitize_title(title): - """ - Remove forbidden characters from title that will prevent Windows - from creating directory. - - Also change ' ' to '_' to preserve previous behavior. - """ - forbidden_chars = ' *"/\<>:|(),' - replace_char = "_" - - for ch in forbidden_chars: - title = title.replace(ch, replace_char) - - return title - - -def download_stream(url, filepath): - """ - Stream stuff from the Internet to a local file. - """ - response = requests.get(url, stream=True) - with open(filepath, "wb") as out_file: - shutil.copyfileobj(response.raw, out_file) diff --git a/scribdl/pdf_converter.py b/scribdl/pdf_converter.py deleted file mode 100644 index 2a010fc..0000000 --- a/scribdl/pdf_converter.py +++ /dev/null @@ -1,48 +0,0 @@ -from md2pdf.core import md2pdf -import img2pdf -import os - - -class ConvertToPDF: - """ - A class for converting downloading books and documents to PDF. - - Parameters - ---------- - input_content : `str`, `list` - A string containing path to a single markdown file - or a list containing paths to many images. - output_content : `str` - Output path of the generated PDF. - """ - - def __init__(self, input_content, output_path): - self.input_content = input_content - self.pdf_path = output_path - - def to_pdf(self): - """ - Converts to PDF depending upon the type of content, - i.e. images or markdown. - """ - if isinstance(self.input_content, list): - self._images_to_pdf() - else: - self._markdown_to_pdf() - - def _markdown_to_pdf(self): - """ - Converts markdown to PDF. - """ - md2pdf(self.pdf_path, - md_file_path=self.input_content, - base_url=os.getcwd()) - - def _images_to_pdf(self): - """ - Converts images to PDF. - """ - with open(self.pdf_path, "wb") as f: - open_images = [open(img, "rb") for img in self.input_content] - pdf_images = img2pdf.convert(open_images) - f.write(pdf_images) diff --git a/scribdl/test/__init__.py b/scribdl/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/scribdl/test/test_command_line.py b/scribdl/test/test_command_line.py deleted file mode 100644 index 1ed0c6e..0000000 --- a/scribdl/test/test_command_line.py +++ /dev/null @@ -1,44 +0,0 @@ -from .. import command_line -import sys - -import pytest - - -class TestCommandLine: - def test_empty(self): - args = [] - parser = command_line.get_arguments() - with pytest.raises(SystemExit): - parser.parse_args(args) - - def test_image_no_url(self): - args = [] - args.append("-i") - parser = command_line.get_arguments() - with pytest.raises(SystemExit): - parser.parse_args(args) - - def test_image_url(self): - args = [] - args.append("-i") - args.append("https://example.com/") - parser = command_line.get_arguments() - parsed_args = parser.parse_args(args) - assert parsed_args.images and not parsed_args.pdf - - def test_pdf_url(self): - args = [] - args.append("-p") - args.append("https://example.com/") - parser = command_line.get_arguments() - parsed_args = parser.parse_args(args) - assert not parsed_args.images and parsed_args.pdf - - def test_image_pdf_url(self): - args = [] - args.append("-i") - args.append("-p") - args.append("https://example.com/") - parser = command_line.get_arguments() - parsed_args = parser.parse_args(args) - assert parsed_args.images and parsed_args.pdf diff --git a/scribdl/test/test_download.py b/scribdl/test/test_download.py deleted file mode 100644 index 4327e63..0000000 --- a/scribdl/test/test_download.py +++ /dev/null @@ -1,47 +0,0 @@ -from ..downloader import Downloader -import os - -import pytest - - -@pytest.fixture -def cwd_to_tmpdir(tmpdir): - os.chdir(str(tmpdir)) - - -def test_audiobook_download(cwd_to_tmpdir, monkeypatch): - audiobook_url = "https://www.scribd.com/audiobook/237606860/100-Ways-to-Motivate-Yourself-Change-Your-Life-Forever" - audiobook_downloader = Downloader(audiobook_url) - audio = audiobook_downloader.download() - assert audio[0] == "100_Ways_to_Motivate_Yourself__Change_Your_Life_Forever_preview.mp3" - assert os.path.getsize(audio[0]) == 2127830 - - -def test_text_document_download(cwd_to_tmpdir): - text_doc_url = "https://www.scribd.com/document/96882378/Trademark-License-Agreement" - text_downloader = Downloader(text_doc_url) - md_doc = text_downloader.download(is_image_document=False) - assert os.path.getsize(md_doc.input_content) in range(1000, 2000) - md_doc.to_pdf() - assert os.path.getsize(md_doc.pdf_path) in range(20000, 31000) - - -def test_img_document_download(cwd_to_tmpdir): - img_doc_url = "https://www.scribd.com/doc/136711944/Signature-Scanning-and-Verification-in-Finacle" - img_downloader = Downloader(img_doc_url) - imgs = img_downloader.download(is_image_document=True) - assert len(imgs.input_content) == 2 - imgs.to_pdf() - assert os.path.getsize(imgs.pdf_path) in range(140000, 150000) - - -def test_book_download(cwd_to_tmpdir, monkeypatch): - book_url = "https://www.scribd.com/read/262694921/Acting-The-First-Six-Lessons" - book_downloader = Downloader(book_url) - # We don't want to clutter stdout with book contents if this test fails - monkeypatch.setattr("builtins.print", lambda x: None) - md_book = book_downloader.download() - assert os.path.getsize(md_book.input_content) in range(10000, 20000) - md_book.to_pdf() - assert os.path.getsize(md_book.pdf_path) in range(200000, 2500000) - diff --git a/scribdl/test/test_internals.py b/scribdl/test/test_internals.py deleted file mode 100644 index f410beb..0000000 --- a/scribdl/test/test_internals.py +++ /dev/null @@ -1,15 +0,0 @@ -from .. import internals - -import pytest - - -SANITIZE_TITLE_TEST_TABLE = [ - ("good_title", "good_title"), - ("*bla", "_bla"), - ("**free_as_in_**", "__free_as_in__freedom___"), - ("troller*\"/\<>:|(haha)jojo", "troller_________haha_jojo"), -] - -@pytest.mark.parametrize("input_str, expected_str", SANITIZE_TITLE_TEST_TABLE) -def test_sanitize_title(input_str, expected_str): - assert internals.sanitize_title(input_str) == expected_str diff --git a/scribdl/version.py b/scribdl/version.py deleted file mode 100644 index 9c73af2..0000000 --- a/scribdl/version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "1.3.1" diff --git a/setup.py b/setup.py index d83e4d7..7c11bae 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,6 @@ setup(name='scribd-downloader', version=__version__, description='Download documents, books and audiobooks off Scribd', - long_description_content_type="text/markdown", - long_description='### Readme.md', author='Ritiek Malhotra', author_email='ritiekmalhotra123@gmail.com', packages=find_packages(), From 0147e8093fc9609a18d9a6f1b198c94cb110fd4f Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:30:35 +0300 Subject: [PATCH 11/13] add build.yml --- scribdl/__init__.py | 10 + scribdl/authorize.py | 0 scribdl/command_line.py | 61 +++++ scribdl/const.py | 0 scribdl/content/__init__.py | 0 scribdl/content/audiobook.py | 297 +++++++++++++++++++++++++ scribdl/content/base.py | 0 scribdl/content/book.py | 186 ++++++++++++++++ scribdl/content/document.py | 0 scribdl/content/test/__init__.py | 0 scribdl/content/test/test_audiobook.py | 0 scribdl/content/test/test_base.py | 0 scribdl/content/test/test_book.py | 0 scribdl/content/test/test_document.py | 0 scribdl/downloader.py | 105 +++++++++ scribdl/exceptions.py | 0 scribdl/internals.py | 40 ++++ scribdl/pdf_converter.py | 0 scribdl/test/__init__.py | 0 scribdl/test/test_command_line.py | 0 scribdl/test/test_download.py | 47 ++++ scribdl/test/test_internals.py | 0 scribdl/version.py | 1 + 23 files changed, 747 insertions(+) create mode 100644 scribdl/__init__.py create mode 100644 scribdl/authorize.py create mode 100644 scribdl/command_line.py create mode 100644 scribdl/const.py create mode 100644 scribdl/content/__init__.py create mode 100644 scribdl/content/audiobook.py create mode 100644 scribdl/content/base.py create mode 100644 scribdl/content/book.py create mode 100644 scribdl/content/document.py create mode 100644 scribdl/content/test/__init__.py create mode 100644 scribdl/content/test/test_audiobook.py create mode 100644 scribdl/content/test/test_base.py create mode 100644 scribdl/content/test/test_book.py create mode 100644 scribdl/content/test/test_document.py create mode 100644 scribdl/downloader.py create mode 100644 scribdl/exceptions.py create mode 100644 scribdl/internals.py create mode 100644 scribdl/pdf_converter.py create mode 100644 scribdl/test/__init__.py create mode 100644 scribdl/test/test_command_line.py create mode 100644 scribdl/test/test_download.py create mode 100644 scribdl/test/test_internals.py create mode 100644 scribdl/version.py diff --git a/scribdl/__init__.py b/scribdl/__init__.py new file mode 100644 index 0000000..974a0b3 --- /dev/null +++ b/scribdl/__init__.py @@ -0,0 +1,10 @@ +from .version import __version__ + +from .downloader import Downloader + +from .content.document import ScribdTextualDocument +from .content.document import ScribdImageDocument +from .content.book import ScribdBook +from .content.audiobook import ScribdAudioBook + +from .pdf_converter import ConvertToPDF diff --git a/scribdl/authorize.py b/scribdl/authorize.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/command_line.py b/scribdl/command_line.py new file mode 100644 index 0000000..a68d842 --- /dev/null +++ b/scribdl/command_line.py @@ -0,0 +1,61 @@ +import argparse + +from .downloader import Downloader +from . import authorize + + +def get_arguments(): + """ + Parses arguments off the command-line. + """ + parser = argparse.ArgumentParser( + description="Download documents and books from scribd.com" + ) + + parser.add_argument("url", metavar="URL", type=str, help="scribd url to download") + parser.add_argument( + "-i", + "--images", + help="download url made up of images", + action="store_true", + default=False, + ) + parser.add_argument( + "-p", + "--pdf", + help="convert to pdf (*Nix: imagemagick)", + action="store_true", + default=False, + ) + parser.add_argument( + "-c", + "--credentials-file", + help="path to file containing your Scribd premium credentials", + ) + + return parser + + +def _command_line(): + """ + This function that gets executed when called via command-line. + """ + parser = get_arguments() + args = parser.parse_args() + url = args.url + pdf = args.pdf + images = args.images + + if args.credentials_file: + credentials_file = args.credentials_file + authorize.set_credentials(credentials_file) + + scribd_link = Downloader(url) + downloaded_content = scribd_link.download(is_image_document=images) + if pdf: + print("\nConverting to {}..".format(downloaded_content.pdf_path)) + downloaded_content.to_pdf() + + +if __name__ == "__main__": + _command_line() diff --git a/scribdl/const.py b/scribdl/const.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/__init__.py b/scribdl/content/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/audiobook.py b/scribdl/content/audiobook.py new file mode 100644 index 0000000..a49228d --- /dev/null +++ b/scribdl/content/audiobook.py @@ -0,0 +1,297 @@ +from bs4 import BeautifulSoup +import requests +import json +import re + +from .base import ScribdBase +from .. import internals +from .. import const +from .. import exceptions + + +class Track: + """ + A class for an audio chapter in a Scribd audiobook playlist. + + Parameters + ---------- + track: `dict` + A dictionary information about an audiobook chapter + containing the keys: "url", "part_number" and "chapter_number". + """ + + def __init__(self, track): + self.url = track["url"] + self.part_number = track["part_number"] + self.chapter_number = track["chapter_number"] + self._track = track + + def download(self, path): + """ + Downloads the audiobook chapter to the given path. + """ + internals.download_stream(self.url, path) + + +class Playlist: + """ + A class for a Scribd audiobook playlist. + + Parameters + ---------- + title: `str` + The title of the audiobook. + + playlist: `dict` + A dictionary information about an audiobook playlist and + its tracks containing the keys: "playlist", "expires" and + "playlist_token". + """ + + def __init__(self, title, playlist): + self.title = title + self.sanitized_title = internals.sanitize_title(title) + self.tracks = [ Track(track) for track in playlist["playlist"] ] + self._playlist = playlist + self.download_paths = [] + + def download(self): + """ + Downloads all the chapters available in the playlist. + """ + for track in self.tracks: + path = "{0}_{1}.mp3".format(self.sanitized_title, track.chapter_number) + dl_str = 'Downloading chapter-{0} ({1}) to "{2}"'.format(track.chapter_number, + track.url, + path) + print(dl_str) + track.download(path) + self.download_paths.append(path) + + +class ScribdAudioBook(ScribdBase): + """ + A base class for downloading audiobooks off Scribd. + + Parameters + ---------- + url: `str` + A string containing Scribd audiobook URL. + """ + + def __init__(self, audiobook_url): + super().__init__(audiobook_url) + scribd_id_search = re.search("[0-9]{9}", audiobook_url) + scribd_id = scribd_id_search.group() + + self._audiobook_keys= None + self._book_id = None + self._author_id = None + self._license_id = None + self._playlist = None + + self.audiobook_url = audiobook_url + self.scribd_id = scribd_id + + # Replace these cookie values with ones generated when logged into a + # Scribd premium-account. This will allow access to full audiobooks. + self.cookies = const.premium_cookies + + @property + def audiobook_keys(self): + """ + Stores scraped information for an audiobook. + """ + if not self._audiobook_keys: + audiobook_keys = self._scrape_audiobook_page() + try: + authenticate_page_keys = self._scrape_authentication_page() + except exceptions.ScribdFetchError: + pass + else: + audiobook_keys.update(authenticate_page_keys) + self._audiobook_keys = audiobook_keys + return self._audiobook_keys + + @property + def session_key(self): + """ + Returns the Scribd session key used to communicate with https://api.findawayworld.com/. + """ + try: + session_key = self.audiobook_keys["audiobook"]["session_key"] + except KeyError: + session_key = None + return session_key + + @property + def headers(self): + """ + Constructs headers to pass with the network request. + """ + return {"Session-Key": self.session_key} + + @property + def preview_url(self): + """ + The free-to-access URL of the audiobook. + """ + return self.audiobook_keys["preview_url"] + + @property + def book_id(self): + """ + The Book-ID of the audiobook. + """ + if not self._book_id: + audiobook = self.audiobook_keys + self._book_id = audiobook["book_id"] + return self._book_id + + @property + def author_id(self): + """ + The Author-ID of Scribd used to authenticate with + https://api.findawayworld.com/. + """ + if not self._author_id: + audiobook = self.audiobook_keys + self._author_id = audiobook["author_id"] + return self._author_id + + @property + def license_url(self): + """ + Returns the URL used to fetch the License-ID. + """ + return "https://api.findawayworld.com/v4/accounts/scribd-{0}/audiobooks/{1}".format(self.author_id, self.book_id) + + @property + def license_id(self): + """ + Returns the License-ID to be used by Scribd to fetch + the audiobook content from http://api.findawayworld.com/. + """ + if not self._license_id: + try: + self._license_id = self._get_license_id() + except exceptions.ScribdFetchError: + self._license_id = None + return self._license_id + + @property + def playlist_url(self): + """ + Returns the audiobook playlist URL. + """ + return "https://api.findawayworld.com/v4/audiobooks/{}/playlists".format(self.book_id) + + @property + def authenticate_url(self): + """ + Authentication URL for premium Scribd accounts + (if this didn't exist, we would have been able to download + complete audiobooks off Scribd without needing a premium account). + """ + return "https://www.scribd.com/listen/{}".format(self.scribd_id) + + @property + def premium_cookies(self): + """ + Returns a boolean based on whether the user is authenticated + with a premium Scribd account. + """ + try: + premium_cookies = bool(self.license_id) + except exceptions.ScribdFetchError: + premium_cookies = False + return premium_cookies + + @property + def playlist(self): + """ + Returns a `Playlist` object. + """ + if not self._playlist: + self._playlist = Playlist(self.title, self.make_playlist()) + return self._playlist + + def _get_license_id(self): + """ + Scrapes the License-ID for the audiobook. We need to handle retries + as Scribd can sometimes fail to deliver the License-ID in the HTML. + """ + requests.get(self.authenticate_url, cookies=self.cookies) + response = requests.get(self.license_url, headers=self.headers) + response_dict = json.loads(response.text) + try: + license_id = response_dict["licenses"][0]["id"] + except KeyError: + raise exceptions.ScribdFetchError("Unable to fetch the License ID for the audiobook. This attribute" + "is only available when using a premium Scribd account.") + else: + return license_id + + def download(self): + raise NotImplementedError("Use method `ScribdAudioBook.playlist.download` instead.") + + def _scrape_audiobook_page(self): + """ + Scrapes the provided audiobook URL for information scraps. + """ + response = requests.get(self.audiobook_url, cookies=self.cookies) + # response = requests.get(self.audiobook_url) + soup = BeautifulSoup(response.text, "html.parser") + + div_tag = soup.find("div", {"data-track_category": "book_preview"}) + text = json.loads(div_tag["data-push_state"]) + preview_url = text["audiobook_sample_url"] + book_id_search = re.search("[0-9]{5,6}", preview_url) + book_id = book_id_search.group() + + js_tag = soup.find_all("script", {"type": "text/javascript"})[-1] + js_code = js_tag.get_text() + author_id_search = re.search("[0-9]{8,9}", js_code) + author_id = author_id_search.group() if author_id_search else None + + return {"preview_url": preview_url, "book_id": book_id, "author_id": author_id} + + def _scrape_authentication_page(self): + """ + Scrapes the authentication/listen page of the audiobook + for information scraps. + """ + response = requests.get(self.authenticate_url, cookies=self.cookies) + soup = BeautifulSoup(response.text, "html.parser") + js_tag = soup.find_all("script", {"type": "text/javascript"})[-2] + + try: + start = response.text[response.text.find('{"eor_url":'):] + raw_info_str, *_ = start.split("\n") + final_curlbrace = -(raw_info_str[::-1].find("}")) + info_str = raw_info_str[:final_curlbrace] + info_dict = json.loads(info_str) + info_dict["pingback_url"] = "".join(info_dict["pingback_url"]) + except ValueError: + raise exceptions.ScribdFetchError("Unable to fetch information via the authentication page for the" + "audiobook. This is only available when using a premium Scribd account.") + else: + return info_dict + + def make_playlist(self): + """ + Generates a playlist dictionary based on whether the user + is authenticated with a premium Scribd account or not. + """ + if self.premium_cookies: + data = '{"license_id":"' + self.license_id + '"}' + response = requests.post(self.playlist_url, headers=self.headers, data=data) + playlist = json.loads(response.text) + else: + playlist = {"playlist": [{"url": self.preview_url, + "part_number": "preview", + "chapter_number": "preview"}], + "expires": None, + "playlist_token": None} + + return playlist diff --git a/scribdl/content/base.py b/scribdl/content/base.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/book.py b/scribdl/content/book.py new file mode 100644 index 0000000..5657fb1 --- /dev/null +++ b/scribdl/content/book.py @@ -0,0 +1,186 @@ +import requests +import json +import os + +from .base import ScribdBase +from .. import internals +from .. import const + + +class ScribdBook(ScribdBase): + """ + A class for downloading books off Scribd. + + Parameters + ---------- + url : `str` + A string containing Scribd book URL. + """ + + def __init__(self, book_url): + super().__init__(book_url) + self.filename = self.sanitized_title + ".md" + self.url = book_url + self._book_id = None + self._csrf_token = None + + @property + def book_id(self): + """ + Extracts the book ID. + """ + if not self._book_id: + splits = self.url.split("/") + for split in splits: + try: + book_id = int(split) + except ValueError: + continue + self._book_id = book_id + return self._book_id + + @property + def csrf_token_header(self): + """ + CSRF-Token is used to gain access to premium content + in textual books premium content of audiobooks can still + be downloaded without it though. + """ + if not self._csrf_token: + csrf_token_url = "https://scribd.com/csrf_token" + response = requests.get(csrf_token_url, cookies=const.premium_cookies) + json_dict = json.loads(response.text) + self._csrf_token = {"X-CSRF-Token": json_dict["csrf_token"]} + return self._csrf_token + + def download(self, filename=None): + """ + Processing text and image extraction. + """ + if not filename: + filename = self.filename + + token = self._get_token() + chapter = 1 + + while True: + response = self.fetch_response(chapter, token) + + if response.status_code == 403: + token = self._get_token() + response = self.fetch_response(chapter, token) + + if response.status_code == 403: + print("No more content being exposed by Scribd!") + break + + try: + json_response = json.loads(response.text) + except ValueError: + print("Completed downloading book!") + break + + self._extract_text_blocks(json_response, chapter, token, filename) + + chapter += 1 + + return filename + + def _extract_text(self, content, chapter, token): + """ + Extracts text given a block of raw html. + """ + words = [] + for word in content["words"]: + if word.get("break_map", None): + words.append(word["break_map"]["text"]) + elif word.get("text", None): + words.append(word["text"]) + elif word.get("type", None) == "image": + image_url = self._format_image_url(chapter, word["src"], token) + string_text = self._process_image_text(word, image_url) + words.append(string_text) + else: + words += self._extract_text(word, chapter, token) + return words + + def fetch_response(self, chapter, token): + url = self._format_content_url(chapter, token) + response = requests.get(url) + return response + + def _extract_text_blocks(self, response_dict, chapter, token, filename): + """ + Extracts small blocks of raw book text and image + URLs and writes them to a file. + """ + for block in response_dict["blocks"]: + if block["type"] == "text": + string_text = ( + " ".join(self._extract_text(block, chapter, token)) + "\n\n" + ) + elif block["type"] == "image": + image_url = self._format_image_url(chapter, block["src"], token) + string_text = self._process_image_text(block, image_url) + + if block["type"] in ("text", "image"): + print(string_text) + self.save_text(string_text, filename) + + def _process_image_text(self, block, image_url): + image_name = block["src"].replace("images/", "") + image_path = os.path.join(self.sanitized_title, image_name) + self._download_image(image_url, image_path) + string_text = "![{}]({})\n\n".format(image_name, image_path) + return string_text + + def _download_image(self, url, path): + try: + os.makedirs(os.path.dirname(path)) + except OSError: + pass + internals.download_stream(url, path) + + def _extract_image_path_from_url(self, url): + image_name = url.split("/")[-1].split("?token=")[0] + return os.path.join(self.book_id, image_name) + + def _format_content_url(self, chapter, token): + """ + Generates a string which points to a URL containing + the raw book text. + """ + unformatted_url = ( + "https://www.scribd.com/scepub/{}/chapters/{}/contents.json?token={}" + ) + return unformatted_url.format(self.book_id, chapter, token) + + def _format_image_url(self, chapter, image, token): + """ + Generates a string which points to an image URL. + """ + unformatted_url = "https://www.scribd.com/scepub/{}/chapters/{}/{}?token={}" + return unformatted_url.format(self.book_id, chapter, image, token) + + def _get_token(self): + """ + Fetches a uniquely generated token for the current + session. + """ + # data can take take any value but it must take some value + # otherwise Scribd will reject the request + data = "data" + + token_url = "https://www.scribd.com/read2/{}/access_token".format(self.book_id) + token = requests.post(token_url, + headers=self.csrf_token_header, + cookies=const.premium_cookies, + data=data) + return json.loads(token.text)["response"] + + def save_text(self, string_text, filename): + """ + Appends text to the passed file. + """ + with open(filename, "a", encoding="utf-8") as f: + f.write(string_text) diff --git a/scribdl/content/document.py b/scribdl/content/document.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/test/__init__.py b/scribdl/content/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/test/test_audiobook.py b/scribdl/content/test/test_audiobook.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/test/test_base.py b/scribdl/content/test/test_base.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/test/test_book.py b/scribdl/content/test/test_book.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/content/test/test_document.py b/scribdl/content/test/test_document.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/downloader.py b/scribdl/downloader.py new file mode 100644 index 0000000..6560dd4 --- /dev/null +++ b/scribdl/downloader.py @@ -0,0 +1,105 @@ +from bs4 import BeautifulSoup +import requests + +from .content.document import ScribdTextualDocument +from .content.document import ScribdImageDocument +from .content.book import ScribdBook +from .content.audiobook import ScribdAudioBook + +from .pdf_converter import ConvertToPDF + + +class Downloader: + """ + A helper class for downloading books and documents off Scribd. + + Parameters + ---------- + url : `str` + A string containing path to a Scribd URL + """ + + def __init__(self, url): + self.url = url + is_audiobook = self.is_audiobook() + if is_audiobook: + is_book = False + else: + is_book = self.is_book() + + self._is_audiobook = is_audiobook + self._is_book = is_book + + def download(self, is_image_document=None): + """ + Downloads books and documents from Scribd. + Returns an object of `ConvertToPDF` class. + """ + if self._is_audiobook: + content = self._download_audiobook() + return content + + if self._is_book: + content = self._download_book() + else: + if is_image_document is None: + raise TypeError( + "The input URL points to a document. You must specify " + "whether it is an image document or a textual document " + "in the `image_document` parameter." + ) + content = self._download_document(is_image_document) + return content + + def _download_book(self): + """ + Downloads books off Scribd. + Returns an object of `ConvertToPDF` class. + """ + book = ScribdBook(self.url) + md_path = book.download() + pdf_path = "{}.pdf".format(book.sanitized_title) + return ConvertToPDF(md_path, pdf_path) + + def _download_document(self, image_document): + """ + Downloads textual and image documents off Scribd. + Returns an object of `ConvertToPDF` class. + """ + if image_document: + document = ScribdImageDocument(self.url) + else: + document = ScribdTextualDocument(self.url) + + content_path = document.download() + pdf_path = "{}.pdf".format(document.sanitized_title) + return ConvertToPDF(content_path, pdf_path) + + def _download_audiobook(self): + """ + Downloads audiobooks off Scribd. + Returns a list containing local audio filepaths. + """ + audiobook = ScribdAudioBook(self.url) + playlist = audiobook.playlist + if not audiobook.premium_cookies: + print("Premium cookies not detected. Only the preview version of audiobook will be downloaded.") + playlist.download() + return playlist.download_paths + + def is_book(self): + """ + Checks whether the passed URL points to a Scribd book + or a Scribd document. + """ + response = requests.get(self.url) + soup = BeautifulSoup(response.text, "html.parser") + content_class = soup.find("body")["class"] + matches_with_book = content_class[0] == "autogen_class_views_layouts_book_web" + return matches_with_book + + def is_audiobook(self): + """ + Checks whether the passed URL points to a Scribd audiobook. + """ + return "/audiobook/" in self.url diff --git a/scribdl/exceptions.py b/scribdl/exceptions.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/internals.py b/scribdl/internals.py new file mode 100644 index 0000000..8644276 --- /dev/null +++ b/scribdl/internals.py @@ -0,0 +1,40 @@ +import requests +import sys +import shutil + +GITHUB_URL_BASE = "https://github.com/ritiek/scribd-downloader" + + +def fix_encoding(query): + """ + Encoding fixes for Python 2 and Python 3 cross-compatibilty. + """ + if sys.version_info > (3, 0): + return query + else: + return query.encode("utf-8") + + +def sanitize_title(title): + """ + Remove forbidden characters from title that will prevent Windows + from creating directory. + + Also change ' ' to '_' to preserve previous behavior. + """ + forbidden_chars = ' *"/\<>:|(),' + replace_char = "_" + + for ch in forbidden_chars: + title = title.replace(ch, replace_char) + + return title + + +def download_stream(url, filepath): + """ + Stream stuff from the Internet to a local file. + """ + response = requests.get(url, stream=True) + with open(filepath, "wb") as out_file: + shutil.copyfileobj(response.raw, out_file) diff --git a/scribdl/pdf_converter.py b/scribdl/pdf_converter.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/test/__init__.py b/scribdl/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/test/test_command_line.py b/scribdl/test/test_command_line.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/test/test_download.py b/scribdl/test/test_download.py new file mode 100644 index 0000000..4327e63 --- /dev/null +++ b/scribdl/test/test_download.py @@ -0,0 +1,47 @@ +from ..downloader import Downloader +import os + +import pytest + + +@pytest.fixture +def cwd_to_tmpdir(tmpdir): + os.chdir(str(tmpdir)) + + +def test_audiobook_download(cwd_to_tmpdir, monkeypatch): + audiobook_url = "https://www.scribd.com/audiobook/237606860/100-Ways-to-Motivate-Yourself-Change-Your-Life-Forever" + audiobook_downloader = Downloader(audiobook_url) + audio = audiobook_downloader.download() + assert audio[0] == "100_Ways_to_Motivate_Yourself__Change_Your_Life_Forever_preview.mp3" + assert os.path.getsize(audio[0]) == 2127830 + + +def test_text_document_download(cwd_to_tmpdir): + text_doc_url = "https://www.scribd.com/document/96882378/Trademark-License-Agreement" + text_downloader = Downloader(text_doc_url) + md_doc = text_downloader.download(is_image_document=False) + assert os.path.getsize(md_doc.input_content) in range(1000, 2000) + md_doc.to_pdf() + assert os.path.getsize(md_doc.pdf_path) in range(20000, 31000) + + +def test_img_document_download(cwd_to_tmpdir): + img_doc_url = "https://www.scribd.com/doc/136711944/Signature-Scanning-and-Verification-in-Finacle" + img_downloader = Downloader(img_doc_url) + imgs = img_downloader.download(is_image_document=True) + assert len(imgs.input_content) == 2 + imgs.to_pdf() + assert os.path.getsize(imgs.pdf_path) in range(140000, 150000) + + +def test_book_download(cwd_to_tmpdir, monkeypatch): + book_url = "https://www.scribd.com/read/262694921/Acting-The-First-Six-Lessons" + book_downloader = Downloader(book_url) + # We don't want to clutter stdout with book contents if this test fails + monkeypatch.setattr("builtins.print", lambda x: None) + md_book = book_downloader.download() + assert os.path.getsize(md_book.input_content) in range(10000, 20000) + md_book.to_pdf() + assert os.path.getsize(md_book.pdf_path) in range(200000, 2500000) + diff --git a/scribdl/test/test_internals.py b/scribdl/test/test_internals.py new file mode 100644 index 0000000..e69de29 diff --git a/scribdl/version.py b/scribdl/version.py new file mode 100644 index 0000000..9c73af2 --- /dev/null +++ b/scribdl/version.py @@ -0,0 +1 @@ +__version__ = "1.3.1" From 7ea6c9527c1d7c53ca87601e402dedc3709d5323 Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:58:49 +0300 Subject: [PATCH 12/13] add build.yml --- setup.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 7c11bae..b88dfc4 100644 --- a/setup.py +++ b/setup.py @@ -3,25 +3,27 @@ from setuptools import setup, find_packages import os -__version__ = '1.3.1' +# __version__ comes into namespace from here with open(os.path.join("scribdl", "version.py")) as version_file: exec(version_file.read()) with open("README.rst", "r") as f: long_description = f.read() -setup(name='scribd-downloader', +setup(name='scribd-download', version=__version__, description='Download documents, books and audiobooks off Scribd', + long_description='Check long description in https://github.com/Phoenix124/scribd-downloader', + long_description_content_type='text/x-rst', author='Ritiek Malhotra', author_email='ritiekmalhotra123@gmail.com', - packages=find_packages(), + packages = find_packages(), entry_points={ 'console_scripts': [ 'scribdl = scribdl.command_line:_command_line', ] }, - url='https://github.com/Phoenix124/scribd-downloader', + url='https://www.github.com/ritiek/scribd-downloader', keywords=['scribd-downloader', 'documents', 'command-line', 'python'], license='MIT', download_url='https://github.com/ritiek/scribd-downloader/archive/v' + __version__ + '.tar.gz', From 4f837fde2dba7dfaf7aaa5edf94f46986173a1aa Mon Sep 17 00:00:00 2001 From: maksimliakhavets Date: Sun, 2 Apr 2023 19:59:12 +0300 Subject: [PATCH 13/13] add build.yml --- scribdl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scribdl/version.py b/scribdl/version.py index 9c73af2..f708a9b 100644 --- a/scribdl/version.py +++ b/scribdl/version.py @@ -1 +1 @@ -__version__ = "1.3.1" +__version__ = "1.3.2"