diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6294371 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea +__pycache__ + diff --git a/README.md b/README.md index e21ad8a..34e3a8a 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ # WebMoney +To start write command: +/venv/bin/python -m flask run diff --git a/app.py b/app.py new file mode 100644 index 0000000..a95b822 --- /dev/null +++ b/app.py @@ -0,0 +1,35 @@ +from flask import Flask, render_template, request +import random +import json +import crawler + +app = Flask(__name__) + + +@app.route('/') +def hello_world(): + currency = crawler.get_value() + return render_template("main.html", currency=currency) + + +@app.route('/game/') +def guess_currency(): + currency = crawler.get_value() + random.shuffle(currency) + return render_template("game.html", value=currency[0]) + + +@app.route('/game/answer', methods=['GET', 'POST']) +def check_answer(): + form = request.args + currency = form['currency'] + ans = form['ans'] + print(type(ans)) + data = {} + data['currency'] = currency + data['name'] = ans + return render_template("answer.html",data=data ) + + +if __name__ == '__main__': + app.run() diff --git a/check.py b/check.py new file mode 100644 index 0000000..c1da90a --- /dev/null +++ b/check.py @@ -0,0 +1,3 @@ +import crawler +value = crawler.get_value() +print(value[0]['count']) \ No newline at end of file diff --git a/crawler.py b/crawler.py new file mode 100644 index 0000000..08047b0 --- /dev/null +++ b/crawler.py @@ -0,0 +1,27 @@ +import requests +from bs4 import BeautifulSoup +import re +import json +def get_value(): + link = "http://cbr.ru/currency_base/daily" + responce = requests.get(link) + soup = BeautifulSoup(responce.text, 'html.parser') + p = [] + for i in soup.find_all('td'): + p.append(i) + count = 2 + currency = [] + while count <= len(p): + d = {} + d["count"] = int(p[count].string) + count += 1 + d["currency_name"] = p[count].string + count += 1 + d["currency"] = float(p[count].string.replace(',','.')) + count += 3 + currency.append(d) + + with open("currency.txt", "w") as f: + for i in currency: + f.write(json.dumps(i, ensure_ascii=False) + "\n") + return currency \ No newline at end of file diff --git a/currency.txt b/currency.txt new file mode 100644 index 0000000..e951905 --- /dev/null +++ b/currency.txt @@ -0,0 +1,34 @@ +{"count": 1, "currency_name": "Австралийский доллар", "currency": 58.2872} +{"count": 1, "currency_name": "Азербайджанский манат", "currency": 44.1188} +{"count": 100, "currency_name": "Армянских драмов", "currency": 14.4022} +{"count": 1, "currency_name": "Белорусский рубль", "currency": 29.261} +{"count": 1, "currency_name": "Болгарский лев", "currency": 46.2331} +{"count": 1, "currency_name": "Бразильский реал", "currency": 13.7874} +{"count": 100, "currency_name": "Венгерских форинтов", "currency": 24.9066} +{"count": 1000, "currency_name": "Вон Республики Корея", "currency": 67.3775} +{"count": 10, "currency_name": "Гонконгских долларов", "currency": 96.5715} +{"count": 1, "currency_name": "Датская крона", "currency": 12.1608} +{"count": 1, "currency_name": "Доллар США", "currency": 74.9578} +{"count": 1, "currency_name": "Евро", "currency": 90.4666} +{"count": 10, "currency_name": "Индийских рупий", "currency": 10.0429} +{"count": 100, "currency_name": "Казахстанских тенге", "currency": 17.4521} +{"count": 1, "currency_name": "Канадский доллар", "currency": 60.3817} +{"count": 100, "currency_name": "Киргизских сомов", "currency": 88.3653} +{"count": 1, "currency_name": "Китайский юань", "currency": 11.5569} +{"count": 10, "currency_name": "Молдавских леев", "currency": 41.9607} +{"count": 1, "currency_name": "Новый туркменский манат", "currency": 21.4472} +{"count": 10, "currency_name": "Норвежских крон", "currency": 90.2497} +{"count": 1, "currency_name": "Польский злотый", "currency": 19.8411} +{"count": 1, "currency_name": "Румынский лей", "currency": 18.3599} +{"count": 1, "currency_name": "СДР (специальные права заимствования)", "currency": 107.7196} +{"count": 1, "currency_name": "Сингапурский доллар", "currency": 56.508} +{"count": 10, "currency_name": "Таджикских сомони", "currency": 65.7236} +{"count": 10, "currency_name": "Турецких лир", "currency": 90.7161} +{"count": 10000, "currency_name": "Узбекских сумов", "currency": 71.0623} +{"count": 10, "currency_name": "Украинских гривен", "currency": 26.9984} +{"count": 1, "currency_name": "Фунт стерлингов Соединенного королевства", "currency": 104.0189} +{"count": 10, "currency_name": "Чешских крон", "currency": 34.9568} +{"count": 10, "currency_name": "Шведских крон", "currency": 89.1114} +{"count": 1, "currency_name": "Швейцарский франк", "currency": 81.7692} +{"count": 10, "currency_name": "Южноафриканских рэндов", "currency": 52.3266} +{"count": 100, "currency_name": "Японских иен", "currency": 69.178} diff --git a/static/style.css b/static/style.css new file mode 100644 index 0000000..53196f5 --- /dev/null +++ b/static/style.css @@ -0,0 +1,24 @@ +body{ + font-family: "Lato Hairline", Helvetica, Arial, sans-serif; + background-color: honeydew; + color: #29ff00; +} +h1{ + text-align: center; + color: blueviolet; +} +table{; + width: auto; +} +thead td{ + font-weight: bold; + border-bottom: solid darkblue; +} +tbody tr:nth-child(odd){ + background-color: darkslateblue; +} + +tbody tr:nth-child(even){ + background-color: #ff0026; +} + diff --git a/templates/answer.html b/templates/answer.html new file mode 100644 index 0000000..4aa6150 --- /dev/null +++ b/templates/answer.html @@ -0,0 +1,27 @@ + + + + + + + +{% if data['name'] == data['currency'] %} +

Молодец

+ +
+ +

+
+{% endif %} +{% if data['currency'] != data['name'] %} +

Мимо, правильный ответ: '{{ data['currency']}}'

+ +
+ +

+
+{% endif %} + + + + \ No newline at end of file diff --git a/templates/game.html b/templates/game.html new file mode 100644 index 0000000..0f4699e --- /dev/null +++ b/templates/game.html @@ -0,0 +1,27 @@ + + + + + + + +

А ты знаешь ему цену?

+ +
+ + + + +

+
+ + + \ No newline at end of file diff --git a/templates/main.html b/templates/main.html new file mode 100644 index 0000000..82fc80a --- /dev/null +++ b/templates/main.html @@ -0,0 +1,35 @@ + + + + + + + +

Курс валюты на сегодня

+ +
+ +

+
+ + + + + + + + + + {% for value in currency %} + + + + + + + {% endfor %} + + +
РубльВалютаЕдиницКурс
Рубль{{value['count']}}{{value['currency_name']}}{{value['currency']}}
+ + \ No newline at end of file diff --git a/venv/bin/activate b/venv/bin/activate new file mode 100644 index 0000000..46e6663 --- /dev/null +++ b/venv/bin/activate @@ -0,0 +1,76 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + +deactivate () { + # reset old environment variables + if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then + PATH="${_OLD_VIRTUAL_PATH:-}" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then + PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r + fi + + if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then + PS1="${_OLD_VIRTUAL_PS1:-}" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + if [ ! "$1" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV="/home/lepad/Desktop/proga/WebMoney/venv" +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/bin:$PATH" +export PATH + +# unset PYTHONHOME if set +# this will fail if PYTHONHOME is set to the empty string (which is bad anyway) +# could use `if (set -u; : $PYTHONHOME) ;` in bash +if [ -n "${PYTHONHOME:-}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" + if [ "x(venv) " != x ] ; then + PS1="(venv) ${PS1:-}" + else + if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then + # special case for Aspen magic directories + # see http://www.zetadev.com/software/aspen/ + PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1" + else + PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1" + fi + fi + export PS1 +fi + +# This should detect bash and zsh, which have a hash command that must +# be called to get it to forget past commands. Without forgetting +# past commands the $PATH changes we made may not be respected +if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r +fi diff --git a/venv/bin/activate.csh b/venv/bin/activate.csh new file mode 100644 index 0000000..2d758f1 --- /dev/null +++ b/venv/bin/activate.csh @@ -0,0 +1,37 @@ +# This file must be used with "source bin/activate.csh" *from csh*. +# You cannot run it directly. +# Created by Davide Di Blasi . +# Ported to Python 3.3 venv by Andrew Svetlov + +alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate' + +# Unset irrelevant variables. +deactivate nondestructive + +setenv VIRTUAL_ENV "/home/lepad/Desktop/proga/WebMoney/venv" + +set _OLD_VIRTUAL_PATH="$PATH" +setenv PATH "$VIRTUAL_ENV/bin:$PATH" + + +set _OLD_VIRTUAL_PROMPT="$prompt" + +if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then + if ("venv" != "") then + set env_name = "venv" + else + if (`basename "VIRTUAL_ENV"` == "__") then + # special case for Aspen magic directories + # see http://www.zetadev.com/software/aspen/ + set env_name = `basename \`dirname "$VIRTUAL_ENV"\`` + else + set env_name = `basename "$VIRTUAL_ENV"` + endif + endif + set prompt = "[$env_name] $prompt" + unset env_name +endif + +alias pydoc python -m pydoc + +rehash diff --git a/venv/bin/activate.fish b/venv/bin/activate.fish new file mode 100644 index 0000000..79c4fac --- /dev/null +++ b/venv/bin/activate.fish @@ -0,0 +1,75 @@ +# This file must be used with ". bin/activate.fish" *from fish* (http://fishshell.org) +# you cannot run it directly + +function deactivate -d "Exit virtualenv and return to normal shell environment" + # reset old environment variables + if test -n "$_OLD_VIRTUAL_PATH" + set -gx PATH $_OLD_VIRTUAL_PATH + set -e _OLD_VIRTUAL_PATH + end + if test -n "$_OLD_VIRTUAL_PYTHONHOME" + set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME + set -e _OLD_VIRTUAL_PYTHONHOME + end + + if test -n "$_OLD_FISH_PROMPT_OVERRIDE" + functions -e fish_prompt + set -e _OLD_FISH_PROMPT_OVERRIDE + functions -c _old_fish_prompt fish_prompt + functions -e _old_fish_prompt + end + + set -e VIRTUAL_ENV + if test "$argv[1]" != "nondestructive" + # Self destruct! + functions -e deactivate + end +end + +# unset irrelevant variables +deactivate nondestructive + +set -gx VIRTUAL_ENV "/home/lepad/Desktop/proga/WebMoney/venv" + +set -gx _OLD_VIRTUAL_PATH $PATH +set -gx PATH "$VIRTUAL_ENV/bin" $PATH + +# unset PYTHONHOME if set +if set -q PYTHONHOME + set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME + set -e PYTHONHOME +end + +if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + # fish uses a function instead of an env var to generate the prompt. + + # save the current fish_prompt function as the function _old_fish_prompt + functions -c fish_prompt _old_fish_prompt + + # with the original prompt function renamed, we can override with our own. + function fish_prompt + # Save the return status of the last command + set -l old_status $status + + # Prompt override? + if test -n "(venv) " + printf "%s%s" "(venv) " (set_color normal) + else + # ...Otherwise, prepend env + set -l _checkbase (basename "$VIRTUAL_ENV") + if test $_checkbase = "__" + # special case for Aspen magic directories + # see http://www.zetadev.com/software/aspen/ + printf "%s[%s]%s " (set_color -b blue white) (basename (dirname "$VIRTUAL_ENV")) (set_color normal) + else + printf "%s(%s)%s" (set_color -b blue white) (basename "$VIRTUAL_ENV") (set_color normal) + end + end + + # Restore the return status of the previous command. + echo "exit $old_status" | . + _old_fish_prompt + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" +end diff --git a/venv/bin/chardetect b/venv/bin/chardetect new file mode 100755 index 0000000..cfb3b97 --- /dev/null +++ b/venv/bin/chardetect @@ -0,0 +1,8 @@ +#!/home/lepad/Desktop/proga/WebMoney/venv/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from chardet.cli.chardetect import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv/bin/flask b/venv/bin/flask new file mode 100755 index 0000000..921359d --- /dev/null +++ b/venv/bin/flask @@ -0,0 +1,8 @@ +#!/home/lepad/Desktop/proga/WebMoney/venv/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from flask.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv/bin/pip b/venv/bin/pip new file mode 100755 index 0000000..0fcf5a4 --- /dev/null +++ b/venv/bin/pip @@ -0,0 +1,8 @@ +#!/home/lepad/Desktop/proga/WebMoney/venv/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv/bin/pip3 b/venv/bin/pip3 new file mode 100755 index 0000000..0fcf5a4 --- /dev/null +++ b/venv/bin/pip3 @@ -0,0 +1,8 @@ +#!/home/lepad/Desktop/proga/WebMoney/venv/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv/bin/pip3.6 b/venv/bin/pip3.6 new file mode 100755 index 0000000..0fcf5a4 --- /dev/null +++ b/venv/bin/pip3.6 @@ -0,0 +1,8 @@ +#!/home/lepad/Desktop/proga/WebMoney/venv/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/venv/bin/python b/venv/bin/python new file mode 120000 index 0000000..039b719 --- /dev/null +++ b/venv/bin/python @@ -0,0 +1 @@ +python3.6 \ No newline at end of file diff --git a/venv/bin/python3 b/venv/bin/python3 new file mode 120000 index 0000000..039b719 --- /dev/null +++ b/venv/bin/python3 @@ -0,0 +1 @@ +python3.6 \ No newline at end of file diff --git a/venv/bin/python3.6 b/venv/bin/python3.6 new file mode 120000 index 0000000..6270541 --- /dev/null +++ b/venv/bin/python3.6 @@ -0,0 +1 @@ +/usr/bin/python3.6 \ No newline at end of file diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/INSTALLER b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/LICENSE.rst b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/LICENSE.rst new file mode 100644 index 0000000..9d227a0 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/LICENSE.rst @@ -0,0 +1,28 @@ +Copyright 2010 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/METADATA b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/METADATA new file mode 100644 index 0000000..db7fcd1 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/METADATA @@ -0,0 +1,137 @@ +Metadata-Version: 2.1 +Name: Flask +Version: 1.1.2 +Summary: A simple framework for building complex web applications. +Home-page: https://palletsprojects.com/p/flask/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: Pallets +Maintainer-email: contact@palletsprojects.com +License: BSD-3-Clause +Project-URL: Documentation, https://flask.palletsprojects.com/ +Project-URL: Code, https://github.com/pallets/flask +Project-URL: Issue tracker, https://github.com/pallets/flask/issues +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Framework :: Flask +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application +Classifier: Topic :: Software Development :: Libraries :: Application Frameworks +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* +Requires-Dist: Werkzeug (>=0.15) +Requires-Dist: Jinja2 (>=2.10.1) +Requires-Dist: itsdangerous (>=0.24) +Requires-Dist: click (>=5.1) +Provides-Extra: dev +Requires-Dist: pytest ; extra == 'dev' +Requires-Dist: coverage ; extra == 'dev' +Requires-Dist: tox ; extra == 'dev' +Requires-Dist: sphinx ; extra == 'dev' +Requires-Dist: pallets-sphinx-themes ; extra == 'dev' +Requires-Dist: sphinxcontrib-log-cabinet ; extra == 'dev' +Requires-Dist: sphinx-issues ; extra == 'dev' +Provides-Extra: docs +Requires-Dist: sphinx ; extra == 'docs' +Requires-Dist: pallets-sphinx-themes ; extra == 'docs' +Requires-Dist: sphinxcontrib-log-cabinet ; extra == 'docs' +Requires-Dist: sphinx-issues ; extra == 'docs' +Provides-Extra: dotenv +Requires-Dist: python-dotenv ; extra == 'dotenv' + +Flask +===== + +Flask is a lightweight `WSGI`_ web application framework. It is designed +to make getting started quick and easy, with the ability to scale up to +complex applications. It began as a simple wrapper around `Werkzeug`_ +and `Jinja`_ and has become one of the most popular Python web +application frameworks. + +Flask offers suggestions, but doesn't enforce any dependencies or +project layout. It is up to the developer to choose the tools and +libraries they want to use. There are many extensions provided by the +community that make adding new functionality easy. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U Flask + + +A Simple Example +---------------- + +.. code-block:: python + + from flask import Flask + + app = Flask(__name__) + + @app.route("/") + def hello(): + return "Hello, World!" + +.. code-block:: text + + $ env FLASK_APP=hello.py flask run + * Serving Flask app "hello" + * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit) + + +Contributing +------------ + +For guidance on setting up a development environment and how to make a +contribution to Flask, see the `contributing guidelines`_. + +.. _contributing guidelines: https://github.com/pallets/flask/blob/master/CONTRIBUTING.rst + + +Donate +------ + +The Pallets organization develops and supports Flask and the libraries +it uses. In order to grow the community of contributors and users, and +allow the maintainers to devote more time to the projects, `please +donate today`_. + +.. _please donate today: https://psfmember.org/civicrm/contribute/transact?reset=1&id=20 + + +Links +----- + +* Website: https://palletsprojects.com/p/flask/ +* Documentation: https://flask.palletsprojects.com/ +* Releases: https://pypi.org/project/Flask/ +* Code: https://github.com/pallets/flask +* Issue tracker: https://github.com/pallets/flask/issues +* Test status: https://dev.azure.com/pallets/flask/_build +* Official chat: https://discord.gg/t6rrQZH + +.. _WSGI: https://wsgi.readthedocs.io +.. _Werkzeug: https://www.palletsprojects.com/p/werkzeug/ +.. _Jinja: https://www.palletsprojects.com/p/jinja/ +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/RECORD b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/RECORD new file mode 100644 index 0000000..28a2fab --- /dev/null +++ b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/RECORD @@ -0,0 +1,49 @@ +../../../bin/flask,sha256=dfmXfpYFWxqnhH4DprzfnX_-n6rN1zQmyt3cpOGWoVU,242 +Flask-1.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +Flask-1.1.2.dist-info/LICENSE.rst,sha256=SJqOEQhQntmKN7uYPhHg9-HTHwvY-Zp5yESOf_N9B-o,1475 +Flask-1.1.2.dist-info/METADATA,sha256=3INpPWH6nKfZ33R2N-bQZy4TOe1wQCMweZc9mwcNrtc,4591 +Flask-1.1.2.dist-info/RECORD,, +Flask-1.1.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +Flask-1.1.2.dist-info/WHEEL,sha256=8zNYZbwQSXoB9IfXOjPfeNwvAsALAjffgk27FqvCWbo,110 +Flask-1.1.2.dist-info/entry_points.txt,sha256=gBLA1aKg0OYR8AhbAfg8lnburHtKcgJLDU52BBctN0k,42 +Flask-1.1.2.dist-info/top_level.txt,sha256=dvi65F6AeGWVU0TBpYiC04yM60-FX1gJFkK31IKQr5c,6 +flask/__init__.py,sha256=YnA9wkwbJcnb_jTT-nMsMFeFE_UWt33khKzdHmMSuyI,1894 +flask/__main__.py,sha256=fjVtt3QTANXlpJCOv3Ha7d5H-76MwzSIOab7SFD9TEk,254 +flask/__pycache__/__init__.cpython-36.pyc,, +flask/__pycache__/__main__.cpython-36.pyc,, +flask/__pycache__/_compat.cpython-36.pyc,, +flask/__pycache__/app.cpython-36.pyc,, +flask/__pycache__/blueprints.cpython-36.pyc,, +flask/__pycache__/cli.cpython-36.pyc,, +flask/__pycache__/config.cpython-36.pyc,, +flask/__pycache__/ctx.cpython-36.pyc,, +flask/__pycache__/debughelpers.cpython-36.pyc,, +flask/__pycache__/globals.cpython-36.pyc,, +flask/__pycache__/helpers.cpython-36.pyc,, +flask/__pycache__/logging.cpython-36.pyc,, +flask/__pycache__/sessions.cpython-36.pyc,, +flask/__pycache__/signals.cpython-36.pyc,, +flask/__pycache__/templating.cpython-36.pyc,, +flask/__pycache__/testing.cpython-36.pyc,, +flask/__pycache__/views.cpython-36.pyc,, +flask/__pycache__/wrappers.cpython-36.pyc,, +flask/_compat.py,sha256=8KPT54Iig96TuLipdogLRHNYToIcg-xPhnSV5VRERnw,4099 +flask/app.py,sha256=tmEhx_XrIRP24vZg39dHMWFzJ2jj-YxIcd51LaIT5cE,98059 +flask/blueprints.py,sha256=vkdm8NusGsfZUeIfPdCluj733QFmiQcT4Sk1tuZLUjw,21400 +flask/cli.py,sha256=SIb22uq9wYBeB2tKMl0pYdhtZ1MAQyZtPL-3m6es4G0,31035 +flask/config.py,sha256=3dejvQRYfNHw_V7dCLMxU8UNFpL34xIKemN7gHZIZ8Y,10052 +flask/ctx.py,sha256=cks-omGedkxawHFo6bKIrdOHsJCAgg1i_NWw_htxb5U,16724 +flask/debughelpers.py,sha256=-whvPKuAoU8AZ9c1z_INuOeBgfYDqE1J2xNBsoriugU,6475 +flask/globals.py,sha256=OgcHb6_NCyX6-TldciOdKcyj4PNfyQwClxdMhvov6aA,1637 +flask/helpers.py,sha256=IHa578HU_3XAAo1wpXQv24MYRYO5TzaiDQQwvUIcE6Q,43074 +flask/json/__init__.py,sha256=6nITbZYiYOPB8Qfi1-dvsblwn01KRz8VOsMBIZyaYek,11988 +flask/json/__pycache__/__init__.cpython-36.pyc,, +flask/json/__pycache__/tag.cpython-36.pyc,, +flask/json/tag.py,sha256=vq9GOllg_0kTWKuVFrwmkeOQzR-jdBD23x-89JyCCQI,8306 +flask/logging.py,sha256=WcY5UkqTysGfmosyygSlXyZYGwOp3y-VsE6ehoJ48dk,3250 +flask/sessions.py,sha256=G0KsEkr_i1LG_wOINwFSOW3ts7Xbv4bNgEZKc7TRloc,14360 +flask/signals.py,sha256=yYLOed2x8WnQ7pirGalQYfpYpCILJ0LJhmNSrnWvjqw,2212 +flask/templating.py,sha256=F8E_IZXn9BGsjMzUJ5N_ACMyZdiFBp_SSEaUunvfZ7g,4939 +flask/testing.py,sha256=WXsciCQbHBP7xjHqNvOA4bT0k86GvSNpgzncfXLDEEg,10146 +flask/views.py,sha256=eeWnadLAj0QdQPLtjKipDetRZyG62CT2y7fNOFDJz0g,5802 +flask/wrappers.py,sha256=kgsvtZuMM6RQaDqhRbc5Pcj9vqTnaERl2pmXcdGL7LU,4736 diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/REQUESTED b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/REQUESTED new file mode 100644 index 0000000..e69de29 diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/WHEEL b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/WHEEL new file mode 100644 index 0000000..8b701e9 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.33.6) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/entry_points.txt b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/entry_points.txt new file mode 100644 index 0000000..1eb0252 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +flask = flask.cli:main + diff --git a/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/top_level.txt b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/top_level.txt new file mode 100644 index 0000000..7e10602 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Flask-1.1.2.dist-info/top_level.txt @@ -0,0 +1 @@ +flask diff --git a/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/INSTALLER b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/LICENSE.rst b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/LICENSE.rst new file mode 100644 index 0000000..c37cae4 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/LICENSE.rst @@ -0,0 +1,28 @@ +Copyright 2007 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/METADATA b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/METADATA new file mode 100644 index 0000000..1af8df0 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/METADATA @@ -0,0 +1,106 @@ +Metadata-Version: 2.1 +Name: Jinja2 +Version: 2.11.3 +Summary: A very fast and expressive template engine. +Home-page: https://palletsprojects.com/p/jinja/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: Pallets +Maintainer-email: contact@palletsprojects.com +License: BSD-3-Clause +Project-URL: Documentation, https://jinja.palletsprojects.com/ +Project-URL: Code, https://github.com/pallets/jinja +Project-URL: Issue tracker, https://github.com/pallets/jinja/issues +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup :: HTML +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* +Description-Content-Type: text/x-rst +Requires-Dist: MarkupSafe (>=0.23) +Provides-Extra: i18n +Requires-Dist: Babel (>=0.8) ; extra == 'i18n' + +Jinja +===== + +Jinja is a fast, expressive, extensible templating engine. Special +placeholders in the template allow writing code similar to Python +syntax. Then the template is passed data to render the final document. + +It includes: + +- Template inheritance and inclusion. +- Define and import macros within templates. +- HTML templates can use autoescaping to prevent XSS from untrusted + user input. +- A sandboxed environment can safely render untrusted templates. +- AsyncIO support for generating templates and calling async + functions. +- I18N support with Babel. +- Templates are compiled to optimized Python code just-in-time and + cached, or can be compiled ahead-of-time. +- Exceptions point to the correct line in templates to make debugging + easier. +- Extensible filters, tests, functions, and even syntax. + +Jinja's philosophy is that while application logic belongs in Python if +possible, it shouldn't make the template designer's job difficult by +restricting functionality too much. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + $ pip install -U Jinja2 + +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + +In A Nutshell +------------- + +.. code-block:: jinja + + {% extends "base.html" %} + {% block title %}Members{% endblock %} + {% block content %} + + {% endblock %} + + +Links +----- + +- Website: https://palletsprojects.com/p/jinja/ +- Documentation: https://jinja.palletsprojects.com/ +- Releases: https://pypi.org/project/Jinja2/ +- Code: https://github.com/pallets/jinja +- Issue tracker: https://github.com/pallets/jinja/issues +- Test status: https://dev.azure.com/pallets/jinja/_build +- Official chat: https://discord.gg/t6rrQZH + + diff --git a/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/RECORD b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/RECORD new file mode 100644 index 0000000..1c9880e --- /dev/null +++ b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/RECORD @@ -0,0 +1,61 @@ +Jinja2-2.11.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +Jinja2-2.11.3.dist-info/LICENSE.rst,sha256=O0nc7kEF6ze6wQ-vG-JgQI_oXSUrjp3y4JefweCUQ3s,1475 +Jinja2-2.11.3.dist-info/METADATA,sha256=PscpJ1C3RSp8xcjV3fAuTz13rKbGxmzJXnMQFH-WKhs,3535 +Jinja2-2.11.3.dist-info/RECORD,, +Jinja2-2.11.3.dist-info/WHEEL,sha256=Z-nyYpwrcSqxfdux5Mbn_DQ525iP7J2DG3JgGvOYyTQ,110 +Jinja2-2.11.3.dist-info/entry_points.txt,sha256=Qy_DkVo6Xj_zzOtmErrATe8lHZhOqdjpt3e4JJAGyi8,61 +Jinja2-2.11.3.dist-info/top_level.txt,sha256=PkeVWtLb3-CqjWi1fO29OCbj55EhX_chhKrCdrVe_zs,7 +jinja2/__init__.py,sha256=LZUXmxJc2GIchfSAeMWsxCWiQYO-w1-736f2Q3I8ms8,1549 +jinja2/__pycache__/__init__.cpython-36.pyc,, +jinja2/__pycache__/_compat.cpython-36.pyc,, +jinja2/__pycache__/_identifier.cpython-36.pyc,, +jinja2/__pycache__/asyncfilters.cpython-36.pyc,, +jinja2/__pycache__/asyncsupport.cpython-36.pyc,, +jinja2/__pycache__/bccache.cpython-36.pyc,, +jinja2/__pycache__/compiler.cpython-36.pyc,, +jinja2/__pycache__/constants.cpython-36.pyc,, +jinja2/__pycache__/debug.cpython-36.pyc,, +jinja2/__pycache__/defaults.cpython-36.pyc,, +jinja2/__pycache__/environment.cpython-36.pyc,, +jinja2/__pycache__/exceptions.cpython-36.pyc,, +jinja2/__pycache__/ext.cpython-36.pyc,, +jinja2/__pycache__/filters.cpython-36.pyc,, +jinja2/__pycache__/idtracking.cpython-36.pyc,, +jinja2/__pycache__/lexer.cpython-36.pyc,, +jinja2/__pycache__/loaders.cpython-36.pyc,, +jinja2/__pycache__/meta.cpython-36.pyc,, +jinja2/__pycache__/nativetypes.cpython-36.pyc,, +jinja2/__pycache__/nodes.cpython-36.pyc,, +jinja2/__pycache__/optimizer.cpython-36.pyc,, +jinja2/__pycache__/parser.cpython-36.pyc,, +jinja2/__pycache__/runtime.cpython-36.pyc,, +jinja2/__pycache__/sandbox.cpython-36.pyc,, +jinja2/__pycache__/tests.cpython-36.pyc,, +jinja2/__pycache__/utils.cpython-36.pyc,, +jinja2/__pycache__/visitor.cpython-36.pyc,, +jinja2/_compat.py,sha256=B6Se8HjnXVpzz9-vfHejn-DV2NjaVK-Iewupc5kKlu8,3191 +jinja2/_identifier.py,sha256=EdgGJKi7O1yvr4yFlvqPNEqV6M1qHyQr8Gt8GmVTKVM,1775 +jinja2/asyncfilters.py,sha256=XJtYXTxFvcJ5xwk6SaDL4S0oNnT0wPYvXBCSzc482fI,4250 +jinja2/asyncsupport.py,sha256=ZBFsDLuq3Gtji3Ia87lcyuDbqaHZJRdtShZcqwpFnSQ,7209 +jinja2/bccache.py,sha256=3Pmp4jo65M9FQuIxdxoDBbEDFwe4acDMQf77nEJfrHA,12139 +jinja2/compiler.py,sha256=Ta9W1Lit542wItAHXlDcg0sEOsFDMirCdlFPHAurg4o,66284 +jinja2/constants.py,sha256=RR1sTzNzUmKco6aZicw4JpQpJGCuPuqm1h1YmCNUEFY,1458 +jinja2/debug.py,sha256=neR7GIGGjZH3_ILJGVUYy3eLQCCaWJMXOb7o0kGInWc,8529 +jinja2/defaults.py,sha256=85B6YUUCyWPSdrSeVhcqFVuu_bHUAQXeey--FIwSeVQ,1126 +jinja2/environment.py,sha256=XDSLKc4SqNLMOwTSq3TbWEyA5WyXfuLuVD0wAVjEFwM,50629 +jinja2/exceptions.py,sha256=VjNLawcmf2ODffqVMCQK1cRmvFaUfQWF4u8ouP3QPcE,5425 +jinja2/ext.py,sha256=AtwL5O5enT_L3HR9-oBvhGyUTdGoyaqG_ICtnR_EVd4,26441 +jinja2/filters.py,sha256=9ORilsZrUoydSI9upz8_qGy7gozDWLYoFmlIBFSVRnQ,41439 +jinja2/idtracking.py,sha256=J3O4VHsrbf3wzwiBc7Cro26kHb6_5kbULeIOzocchIU,9211 +jinja2/lexer.py,sha256=nUFLRKhhKmmEWkLI65nQePgcQs7qsRdjVYZETMt_v0g,30331 +jinja2/loaders.py,sha256=C-fST_dmFjgWkp0ZuCkrgICAoOsoSIF28wfAFink0oU,17666 +jinja2/meta.py,sha256=QjyYhfNRD3QCXjBJpiPl9KgkEkGXJbAkCUq4-Ur10EQ,4131 +jinja2/nativetypes.py,sha256=Ul__gtVw4xH-0qvUvnCNHedQeNDwmEuyLJztzzSPeRg,2753 +jinja2/nodes.py,sha256=Mk1oJPVgIjnQw9WOqILvcu3rLepcFZ0ahxQm2mbwDwc,31095 +jinja2/optimizer.py,sha256=gQLlMYzvQhluhzmAIFA1tXS0cwgWYOjprN-gTRcHVsc,1457 +jinja2/parser.py,sha256=fcfdqePNTNyvosIvczbytVA332qpsURvYnCGcjDHSkA,35660 +jinja2/runtime.py,sha256=0y-BRyIEZ9ltByL2Id6GpHe1oDRQAwNeQvI0SKobNMw,30618 +jinja2/sandbox.py,sha256=knayyUvXsZ-F0mk15mO2-ehK9gsw04UhB8td-iUOtLc,17127 +jinja2/tests.py,sha256=iO_Y-9Vo60zrVe1lMpSl5sKHqAxe2leZHC08OoZ8K24,4799 +jinja2/utils.py,sha256=Wy4yC3IByqUWwnKln6SdaixdzgK74P6F5nf-gQZrYnU,22436 +jinja2/visitor.py,sha256=DUHupl0a4PGp7nxRtZFttUzAi1ccxzqc2hzetPYUz8U,3240 diff --git a/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/WHEEL b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/WHEEL new file mode 100644 index 0000000..01b8fc7 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.36.2) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/entry_points.txt b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/entry_points.txt new file mode 100644 index 0000000..3619483 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[babel.extractors] +jinja2 = jinja2.ext:babel_extract [i18n] + diff --git a/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/top_level.txt b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/top_level.txt new file mode 100644 index 0000000..7f7afbf --- /dev/null +++ b/venv/lib/python3.6/site-packages/Jinja2-2.11.3.dist-info/top_level.txt @@ -0,0 +1 @@ +jinja2 diff --git a/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/INSTALLER b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/LICENSE.rst b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/LICENSE.rst new file mode 100644 index 0000000..9d227a0 --- /dev/null +++ b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/LICENSE.rst @@ -0,0 +1,28 @@ +Copyright 2010 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/METADATA b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/METADATA new file mode 100644 index 0000000..e4a7b90 --- /dev/null +++ b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/METADATA @@ -0,0 +1,94 @@ +Metadata-Version: 2.1 +Name: MarkupSafe +Version: 1.1.1 +Summary: Safely add untrusted strings to HTML/XML markup. +Home-page: https://palletsprojects.com/p/markupsafe/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: The Pallets Team +Maintainer-email: contact@palletsprojects.com +License: BSD-3-Clause +Project-URL: Documentation, https://markupsafe.palletsprojects.com/ +Project-URL: Code, https://github.com/pallets/markupsafe +Project-URL: Issue tracker, https://github.com/pallets/markupsafe/issues +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Text Processing :: Markup :: HTML +Requires-Python: >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.* +Description-Content-Type: text/x-rst + +MarkupSafe +========== + +MarkupSafe implements a text object that escapes characters so it is +safe to use in HTML and XML. Characters that have special meanings are +replaced so that they display as the actual characters. This mitigates +injection attacks, meaning untrusted user input can safely be displayed +on a page. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U MarkupSafe + +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + +Examples +-------- + +.. code-block:: pycon + + >>> from markupsafe import Markup, escape + >>> # escape replaces special characters and wraps in Markup + >>> escape('') + Markup(u'<script>alert(document.cookie);</script>') + >>> # wrap in Markup to mark text "safe" and prevent escaping + >>> Markup('Hello') + Markup('hello') + >>> escape(Markup('Hello')) + Markup('hello') + >>> # Markup is a text subclass (str on Python 3, unicode on Python 2) + >>> # methods and operators escape their arguments + >>> template = Markup("Hello %s") + >>> template % '"World"' + Markup('Hello "World"') + + +Donate +------ + +The Pallets organization develops and supports MarkupSafe and other +libraries that use it. In order to grow the community of contributors +and users, and allow the maintainers to devote more time to the +projects, `please donate today`_. + +.. _please donate today: https://palletsprojects.com/donate + + +Links +----- + +* Website: https://palletsprojects.com/p/markupsafe/ +* Documentation: https://markupsafe.palletsprojects.com/ +* Releases: https://pypi.org/project/MarkupSafe/ +* Code: https://github.com/pallets/markupsafe +* Issue tracker: https://github.com/pallets/markupsafe/issues +* Test status: https://dev.azure.com/pallets/markupsafe/_build +* Official chat: https://discord.gg/t6rrQZH + + diff --git a/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/RECORD b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/RECORD new file mode 100644 index 0000000..0ec0e83 --- /dev/null +++ b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/RECORD @@ -0,0 +1,16 @@ +MarkupSafe-1.1.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +MarkupSafe-1.1.1.dist-info/LICENSE.rst,sha256=SJqOEQhQntmKN7uYPhHg9-HTHwvY-Zp5yESOf_N9B-o,1475 +MarkupSafe-1.1.1.dist-info/METADATA,sha256=-XXnVvCxQP2QbHutIQq_7Pk9OATy-x0NC7gN_3_SCRE,3167 +MarkupSafe-1.1.1.dist-info/RECORD,, +MarkupSafe-1.1.1.dist-info/WHEEL,sha256=0k8JcxMWZX7TJIi6JFgSy7UEc0KxSHdqNxJk5plm2FY,112 +MarkupSafe-1.1.1.dist-info/top_level.txt,sha256=qy0Plje5IJuvsCBjejJyhDCjEAdcDLK_2agVcex8Z6U,11 +markupsafe/__init__.py,sha256=oTblO5f9KFM-pvnq9bB0HgElnqkJyqHnFN1Nx2NIvnY,10126 +markupsafe/__pycache__/__init__.cpython-36.pyc,, +markupsafe/__pycache__/_compat.cpython-36.pyc,, +markupsafe/__pycache__/_constants.cpython-36.pyc,, +markupsafe/__pycache__/_native.cpython-36.pyc,, +markupsafe/_compat.py,sha256=uEW1ybxEjfxIiuTbRRaJpHsPFf4yQUMMKaPgYEC5XbU,558 +markupsafe/_constants.py,sha256=zo2ajfScG-l1Sb_52EP3MlDCqO7Y1BVHUXXKRsVDRNk,4690 +markupsafe/_native.py,sha256=d-8S_zzYt2y512xYcuSxq0NeG2DUUvG80wVdTn-4KI8,1873 +markupsafe/_speedups.c,sha256=k0fzEIK3CP6MmMqeY0ob43TP90mVN0DTyn7BAl3RqSg,9884 +markupsafe/_speedups.cpython-36m-x86_64-linux-gnu.so,sha256=cSivDbl3OniO_9aYaNWWHz_eSOMhpALIiE1z1TQXsu4,46832 diff --git a/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL new file mode 100644 index 0000000..aa767d9 --- /dev/null +++ b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.36.2) +Root-Is-Purelib: false +Tag: cp36-cp36m-manylinux2010_x86_64 + diff --git a/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt new file mode 100644 index 0000000..75bf729 --- /dev/null +++ b/venv/lib/python3.6/site-packages/MarkupSafe-1.1.1.dist-info/top_level.txt @@ -0,0 +1 @@ +markupsafe diff --git a/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/INSTALLER b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/LICENSE.rst b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/LICENSE.rst new file mode 100644 index 0000000..c37cae4 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/LICENSE.rst @@ -0,0 +1,28 @@ +Copyright 2007 Pallets + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/METADATA b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/METADATA new file mode 100644 index 0000000..eb5f709 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/METADATA @@ -0,0 +1,128 @@ +Metadata-Version: 2.1 +Name: Werkzeug +Version: 1.0.1 +Summary: The comprehensive WSGI web application library. +Home-page: https://palletsprojects.com/p/werkzeug/ +Author: Armin Ronacher +Author-email: armin.ronacher@active-4.com +Maintainer: Pallets +Maintainer-email: contact@palletsprojects.com +License: BSD-3-Clause +Project-URL: Documentation, https://werkzeug.palletsprojects.com/ +Project-URL: Code, https://github.com/pallets/werkzeug +Project-URL: Issue tracker, https://github.com/pallets/werkzeug/issues +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Internet :: WWW/HTTP :: WSGI +Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application +Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware +Classifier: Topic :: Software Development :: Libraries :: Application Frameworks +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Requires-Python: >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.* +Description-Content-Type: text/x-rst +Provides-Extra: dev +Requires-Dist: pytest ; extra == 'dev' +Requires-Dist: pytest-timeout ; extra == 'dev' +Requires-Dist: coverage ; extra == 'dev' +Requires-Dist: tox ; extra == 'dev' +Requires-Dist: sphinx ; extra == 'dev' +Requires-Dist: pallets-sphinx-themes ; extra == 'dev' +Requires-Dist: sphinx-issues ; extra == 'dev' +Provides-Extra: watchdog +Requires-Dist: watchdog ; extra == 'watchdog' + +Werkzeug +======== + +*werkzeug* German noun: "tool". Etymology: *werk* ("work"), *zeug* ("stuff") + +Werkzeug is a comprehensive `WSGI`_ web application library. It began as +a simple collection of various utilities for WSGI applications and has +become one of the most advanced WSGI utility libraries. + +It includes: + +- An interactive debugger that allows inspecting stack traces and + source code in the browser with an interactive interpreter for any + frame in the stack. +- A full-featured request object with objects to interact with + headers, query args, form data, files, and cookies. +- A response object that can wrap other WSGI applications and handle + streaming data. +- A routing system for matching URLs to endpoints and generating URLs + for endpoints, with an extensible system for capturing variables + from URLs. +- HTTP utilities to handle entity tags, cache control, dates, user + agents, cookies, files, and more. +- A threaded WSGI server for use while developing applications + locally. +- A test client for simulating HTTP requests during testing without + requiring running a server. + +Werkzeug is Unicode aware and doesn't enforce any dependencies. It is up +to the developer to choose a template engine, database adapter, and even +how to handle requests. It can be used to build all sorts of end user +applications such as blogs, wikis, or bulletin boards. + +`Flask`_ wraps Werkzeug, using it to handle the details of WSGI while +providing more structure and patterns for defining powerful +applications. + + +Installing +---------- + +Install and update using `pip`_: + +.. code-block:: text + + pip install -U Werkzeug + + +A Simple Example +---------------- + +.. code-block:: python + + from werkzeug.wrappers import Request, Response + + @Request.application + def application(request): + return Response('Hello, World!') + + if __name__ == '__main__': + from werkzeug.serving import run_simple + run_simple('localhost', 4000, application) + + +Links +----- + +- Website: https://palletsprojects.com/p/werkzeug/ +- Documentation: https://werkzeug.palletsprojects.com/ +- Releases: https://pypi.org/project/Werkzeug/ +- Code: https://github.com/pallets/werkzeug +- Issue tracker: https://github.com/pallets/werkzeug/issues +- Test status: https://dev.azure.com/pallets/werkzeug/_build +- Official chat: https://discord.gg/t6rrQZH + +.. _WSGI: https://wsgi.readthedocs.io/en/latest/ +.. _Flask: https://www.palletsprojects.com/p/flask/ +.. _pip: https://pip.pypa.io/en/stable/quickstart/ + + diff --git a/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/RECORD b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/RECORD new file mode 100644 index 0000000..e3628ff --- /dev/null +++ b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/RECORD @@ -0,0 +1,101 @@ +Werkzeug-1.0.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +Werkzeug-1.0.1.dist-info/LICENSE.rst,sha256=O0nc7kEF6ze6wQ-vG-JgQI_oXSUrjp3y4JefweCUQ3s,1475 +Werkzeug-1.0.1.dist-info/METADATA,sha256=d0zmVNa4UC2-nAo2A8_81oiy123D6JTGRSuY_Ymgyt4,4730 +Werkzeug-1.0.1.dist-info/RECORD,, +Werkzeug-1.0.1.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110 +Werkzeug-1.0.1.dist-info/top_level.txt,sha256=QRyj2VjwJoQkrwjwFIOlB8Xg3r9un0NtqVHQF-15xaw,9 +werkzeug/__init__.py,sha256=rb-yPiXOjTLbtDOl5fQp5hN7oBdaoXAoQ-slAAvfZAo,502 +werkzeug/__pycache__/__init__.cpython-36.pyc,, +werkzeug/__pycache__/_compat.cpython-36.pyc,, +werkzeug/__pycache__/_internal.cpython-36.pyc,, +werkzeug/__pycache__/_reloader.cpython-36.pyc,, +werkzeug/__pycache__/datastructures.cpython-36.pyc,, +werkzeug/__pycache__/exceptions.cpython-36.pyc,, +werkzeug/__pycache__/filesystem.cpython-36.pyc,, +werkzeug/__pycache__/formparser.cpython-36.pyc,, +werkzeug/__pycache__/http.cpython-36.pyc,, +werkzeug/__pycache__/local.cpython-36.pyc,, +werkzeug/__pycache__/posixemulation.cpython-36.pyc,, +werkzeug/__pycache__/routing.cpython-36.pyc,, +werkzeug/__pycache__/security.cpython-36.pyc,, +werkzeug/__pycache__/serving.cpython-36.pyc,, +werkzeug/__pycache__/test.cpython-36.pyc,, +werkzeug/__pycache__/testapp.cpython-36.pyc,, +werkzeug/__pycache__/urls.cpython-36.pyc,, +werkzeug/__pycache__/useragents.cpython-36.pyc,, +werkzeug/__pycache__/utils.cpython-36.pyc,, +werkzeug/__pycache__/wsgi.cpython-36.pyc,, +werkzeug/_compat.py,sha256=zjufTNrhQ8BgYSGSh-sVu6iW3r3O9WzjE9j-qJobx-g,6671 +werkzeug/_internal.py,sha256=d_4AqheyS6dHMViwdc0drFrjs67ZzT6Ej2gWf-Z-Iys,14351 +werkzeug/_reloader.py,sha256=I3mg3oRQ0lLzl06oEoVopN3bN7CtINuuUQdqDcmTnEs,11531 +werkzeug/datastructures.py,sha256=AonxOcwU0TPMEzfKF1368ySULxHgxE-JE-DEAGdo2ts,100480 +werkzeug/debug/__init__.py,sha256=3RtUMc5Y9hYyK11ugHltgkQ9Dt-ViR945Vy_X5NV7zU,17289 +werkzeug/debug/__pycache__/__init__.cpython-36.pyc,, +werkzeug/debug/__pycache__/console.cpython-36.pyc,, +werkzeug/debug/__pycache__/repr.cpython-36.pyc,, +werkzeug/debug/__pycache__/tbtools.cpython-36.pyc,, +werkzeug/debug/console.py,sha256=OATaO7KHYMqpbzIFe1HeW9Mnl3wZgA3jMQoGDPn5URc,5488 +werkzeug/debug/repr.py,sha256=lIwuhbyrMwVe3P_cFqNyqzHL7P93TLKod7lw9clydEw,9621 +werkzeug/debug/shared/FONT_LICENSE,sha256=LwAVEI1oYnvXiNMT9SnCH_TaLCxCpeHziDrMg0gPkAI,4673 +werkzeug/debug/shared/console.png,sha256=bxax6RXXlvOij_KeqvSNX0ojJf83YbnZ7my-3Gx9w2A,507 +werkzeug/debug/shared/debugger.js,sha256=rOhqZMRfpZnnu6_XCGn6wMWPhtfwRAcyZKksdIxPJas,6400 +werkzeug/debug/shared/jquery.js,sha256=CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo,88145 +werkzeug/debug/shared/less.png,sha256=-4-kNRaXJSONVLahrQKUxMwXGm9R4OnZ9SxDGpHlIR4,191 +werkzeug/debug/shared/more.png,sha256=GngN7CioHQoV58rH6ojnkYi8c_qED2Aka5FO5UXrReY,200 +werkzeug/debug/shared/source.png,sha256=RoGcBTE4CyCB85GBuDGTFlAnUqxwTBiIfDqW15EpnUQ,818 +werkzeug/debug/shared/style.css,sha256=gZ9uhmb5zj3XLuT9RvnMp6jMINgQ-VVBCp-2AZbG3YQ,6604 +werkzeug/debug/shared/ubuntu.ttf,sha256=1eaHFyepmy4FyDvjLVzpITrGEBu_CZYY94jE0nED1c0,70220 +werkzeug/debug/tbtools.py,sha256=2iJ8RURUZUSbopOIehy53LnVJWx47lsHN2V2l6hc7Wc,20363 +werkzeug/exceptions.py,sha256=UTYSDkmAsH-vt8VSidlEffwqBVNXuT7bRg-_NqgUe8A,25188 +werkzeug/filesystem.py,sha256=HzKl-j0Hd8Jl66j778UbPTAYNnY6vUZgYLlBZ0e7uw0,2101 +werkzeug/formparser.py,sha256=Sto0jZid9im9ZVIf56vilCdyX-arK33wSftkYsLCnzo,21788 +werkzeug/http.py,sha256=KVRV3yFK14PJeI56qClEq4qxFdvKUQVy4C_dwuWz9_Q,43107 +werkzeug/local.py,sha256=_Tk7gB238pPWUU7habxFkZF02fiCMRVW6d62YWL1Rh0,14371 +werkzeug/middleware/__init__.py,sha256=f1SFZo67IlW4k1uqKzNHxYQlsakUS-D6KK_j0e3jjwQ,549 +werkzeug/middleware/__pycache__/__init__.cpython-36.pyc,, +werkzeug/middleware/__pycache__/dispatcher.cpython-36.pyc,, +werkzeug/middleware/__pycache__/http_proxy.cpython-36.pyc,, +werkzeug/middleware/__pycache__/lint.cpython-36.pyc,, +werkzeug/middleware/__pycache__/profiler.cpython-36.pyc,, +werkzeug/middleware/__pycache__/proxy_fix.cpython-36.pyc,, +werkzeug/middleware/__pycache__/shared_data.cpython-36.pyc,, +werkzeug/middleware/dispatcher.py,sha256=_-KoMzHtcISHS7ouWKAOraqlCLprdh83YOAn_8DjLp8,2240 +werkzeug/middleware/http_proxy.py,sha256=lRjTdMmghHiZuZrS7_UJ3gZc-vlFizhBbFZ-XZPLwIA,7117 +werkzeug/middleware/lint.py,sha256=ItTwuWJnflF8xMT1uqU_Ty1ryhux-CjeUfskqaUpxsw,12967 +werkzeug/middleware/profiler.py,sha256=8B_s23d6BGrU_q54gJsm6kcCbOJbTSqrXCsioHON0Xs,4471 +werkzeug/middleware/proxy_fix.py,sha256=K5oZ3DPXOzdZi0Xba5zW7ClPOxgUuqXHQHvY2-AWCGw,6431 +werkzeug/middleware/shared_data.py,sha256=sPSRTKqtKSVBUyN8fr6jOJbdq9cdOLu6pg3gz4Y_1Xo,9599 +werkzeug/posixemulation.py,sha256=gSSiv1SCmOyzOM_nq1ZaZCtxP__C5MeDJl_4yXJmi4Q,3541 +werkzeug/routing.py,sha256=6-iZ7CKeUILYAehoKXLbmi5E6LgLbwuzUh8TNplnf5Q,79019 +werkzeug/security.py,sha256=81149MplFq7-hD4RK4sKp9kzXXejjV9D4lWBzaRyeQ8,8106 +werkzeug/serving.py,sha256=YvTqvurA-Mnj8mkqRe2kBdVr2ap4ibCq1ByQjOA6g1w,38694 +werkzeug/test.py,sha256=GJ9kxTMSJ-nB7kfGtxuROr9JGmXxDRev-2U1SkeUJGE,39564 +werkzeug/testapp.py,sha256=bHekqMsqRfVxwgFbvOMem-DYa_sdB7R47yUXpt1RUTo,9329 +werkzeug/urls.py,sha256=T8-hV_1vwhu6xhX93FwsHteK-W-kIE2orj5WoMf-WFw,39322 +werkzeug/useragents.py,sha256=TSoGv5IOvP375eK5gLLpsLQCeUgTR6sO1WftmAP_YvM,5563 +werkzeug/utils.py,sha256=hrVK4u_wi8z9viBO9bgOLlm1aaIvCpn-p2d1FeZQDEo,25251 +werkzeug/wrappers/__init__.py,sha256=S4VioKAmF_av9Ec9zQvG71X1EOkYfPx1TYck9jyDiyY,1384 +werkzeug/wrappers/__pycache__/__init__.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/accept.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/auth.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/base_request.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/base_response.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/common_descriptors.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/cors.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/etag.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/json.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/request.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/response.cpython-36.pyc,, +werkzeug/wrappers/__pycache__/user_agent.cpython-36.pyc,, +werkzeug/wrappers/accept.py,sha256=TIvjUc0g73fhTWX54wg_D9NNzKvpnG1X8u1w26tK1o8,1760 +werkzeug/wrappers/auth.py,sha256=Pmn6iaGHBrUyHbJpW0lZhO_q9RVoAa5QalaTqcavdAI,1158 +werkzeug/wrappers/base_request.py,sha256=4TuGlKWeKQdlq4eU94hJYcXSfWo8Rk7CS1Ef5lJ3ZM0,26012 +werkzeug/wrappers/base_response.py,sha256=JTxJZ8o-IBetpoWJqt2HFwPaNWNDAlM3_GXJe1Whw80,27784 +werkzeug/wrappers/common_descriptors.py,sha256=X2Ktd5zUWsmcd4ciaF62Dd8Lru9pLGP_XDUNukc8cXs,12829 +werkzeug/wrappers/cors.py,sha256=XMbaCol4dWTGvb-dCJBoN0p3JX91v93AIAHd7tnB3L4,3466 +werkzeug/wrappers/etag.py,sha256=XMXtyfByBsOjxwaX8U7ZtUY7JXkbQLP45oXZ0qkyTNs,12217 +werkzeug/wrappers/json.py,sha256=HvK_A4NpO0sLqgb10sTJcoZydYOwyNiPCJPV7SVgcgE,4343 +werkzeug/wrappers/request.py,sha256=QbHGqDpGPN684pnOPEokwkPESfm-NnfYM7ydOMxW_NI,1514 +werkzeug/wrappers/response.py,sha256=Oqv8TMG_dnOKTq_V30ddgkO5B7IJhkVPODvm7cbhZ3c,2524 +werkzeug/wrappers/user_agent.py,sha256=YJb-vr12cujG7sQMG9V89VsJa-03SWSenhg1W4cT0EY,435 +werkzeug/wsgi.py,sha256=ZGk85NzRyQTzkYis-xl8V9ydJgfClBdStvhzDzER2mw,34367 diff --git a/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/WHEEL b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/WHEEL new file mode 100644 index 0000000..ef99c6c --- /dev/null +++ b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.34.2) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/top_level.txt b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/top_level.txt new file mode 100644 index 0000000..6fe8da8 --- /dev/null +++ b/venv/lib/python3.6/site-packages/Werkzeug-1.0.1.dist-info/top_level.txt @@ -0,0 +1 @@ +werkzeug diff --git a/venv/lib/python3.6/site-packages/_distutils_hack/__init__.py b/venv/lib/python3.6/site-packages/_distutils_hack/__init__.py new file mode 100644 index 0000000..47ce249 --- /dev/null +++ b/venv/lib/python3.6/site-packages/_distutils_hack/__init__.py @@ -0,0 +1,128 @@ +import sys +import os +import re +import importlib +import warnings + + +is_pypy = '__pypy__' in sys.builtin_module_names + + +warnings.filterwarnings('ignore', + '.+ distutils .+ deprecated', + DeprecationWarning) + + +def warn_distutils_present(): + if 'distutils' not in sys.modules: + return + if is_pypy and sys.version_info < (3, 7): + # PyPy for 3.6 unconditionally imports distutils, so bypass the warning + # https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250 + return + warnings.warn( + "Distutils was imported before Setuptools, but importing Setuptools " + "also replaces the `distutils` module in `sys.modules`. This may lead " + "to undesirable behaviors or errors. To avoid these issues, avoid " + "using distutils directly, ensure that setuptools is installed in the " + "traditional way (e.g. not an editable install), and/or make sure " + "that setuptools is always imported before distutils.") + + +def clear_distutils(): + if 'distutils' not in sys.modules: + return + warnings.warn("Setuptools is replacing distutils.") + mods = [name for name in sys.modules if re.match(r'distutils\b', name)] + for name in mods: + del sys.modules[name] + + +def enabled(): + """ + Allow selection of distutils by environment variable. + """ + which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib') + return which == 'local' + + +def ensure_local_distutils(): + clear_distutils() + distutils = importlib.import_module('setuptools._distutils') + distutils.__name__ = 'distutils' + sys.modules['distutils'] = distutils + + # sanity check that submodules load as expected + core = importlib.import_module('distutils.core') + assert '_distutils' in core.__file__, core.__file__ + + +def do_override(): + """ + Ensure that the local copy of distutils is preferred over stdlib. + + See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401 + for more motivation. + """ + if enabled(): + warn_distutils_present() + ensure_local_distutils() + + +class DistutilsMetaFinder: + def find_spec(self, fullname, path, target=None): + if path is not None: + return + + method_name = 'spec_for_{fullname}'.format(**locals()) + method = getattr(self, method_name, lambda: None) + return method() + + def spec_for_distutils(self): + import importlib.abc + import importlib.util + + class DistutilsLoader(importlib.abc.Loader): + + def create_module(self, spec): + return importlib.import_module('setuptools._distutils') + + def exec_module(self, module): + pass + + return importlib.util.spec_from_loader('distutils', DistutilsLoader()) + + def spec_for_pip(self): + """ + Ensure stdlib distutils when running under pip. + See pypa/pip#8761 for rationale. + """ + if self.pip_imported_during_build(): + return + clear_distutils() + self.spec_for_distutils = lambda: None + + @staticmethod + def pip_imported_during_build(): + """ + Detect if pip is being imported in a build script. Ref #2355. + """ + import traceback + return any( + frame.f_globals['__file__'].endswith('setup.py') + for frame, line in traceback.walk_stack(None) + ) + + +DISTUTILS_FINDER = DistutilsMetaFinder() + + +def add_shim(): + sys.meta_path.insert(0, DISTUTILS_FINDER) + + +def remove_shim(): + try: + sys.meta_path.remove(DISTUTILS_FINDER) + except ValueError: + pass diff --git a/venv/lib/python3.6/site-packages/_distutils_hack/override.py b/venv/lib/python3.6/site-packages/_distutils_hack/override.py new file mode 100644 index 0000000..2cc433a --- /dev/null +++ b/venv/lib/python3.6/site-packages/_distutils_hack/override.py @@ -0,0 +1 @@ +__import__('_distutils_hack').do_override() diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/AUTHORS b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/AUTHORS new file mode 100644 index 0000000..1f14fe0 --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/AUTHORS @@ -0,0 +1,49 @@ +Behold, mortal, the origins of Beautiful Soup... +================================================ + +Leonard Richardson is the primary maintainer. + +Aaron DeVore and Isaac Muse have made significant contributions to the +code base. + +Mark Pilgrim provided the encoding detection code that forms the base +of UnicodeDammit. + +Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful +Soup 4 working under Python 3. + +Simon Willison wrote soupselect, which was used to make Beautiful Soup +support CSS selectors. Isaac Muse wrote SoupSieve, which made it +possible to _remove_ the CSS selector code from Beautiful Soup. + +Sam Ruby helped with a lot of edge cases. + +Jonathan Ellis was awarded the prestigious Beau Potage D'Or for his +work in solving the nestable tags conundrum. + +An incomplete list of people have contributed patches to Beautiful +Soup: + + Istvan Albert, Andrew Lin, Anthony Baxter, Oliver Beattie, Andrew +Boyko, Tony Chang, Francisco Canas, "Delong", Zephyr Fang, Fuzzy, +Roman Gaufman, Yoni Gilad, Richie Hindle, Toshihiro Kamiya, Peteris +Krumins, Kent Johnson, Marek Kapolka, Andreas Kostyrka, Roel Kramer, +Ben Last, Robert Leftwich, Stefaan Lippens, "liquider", Staffan +Malmgren, Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", +Ed Oskiewicz, Martijn Peters, Greg Phillips, Giles Radford, Stefano +Revera, Arthur Rudolph, Marko Samastur, James Salter, Jouni Seppnen, +Alexander Schmolck, Tim Shirley, Geoffrey Sneddon, Ville Skytt, +"Vikas", Jens Svalgaard, Andy Theyers, Eric Weiser, Glyn Webster, John +Wiseman, Paul Wright, Danny Yoo + +An incomplete list of people who made suggestions or found bugs or +found ways to break Beautiful Soup: + + Hanno Bck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel, + Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes, + Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams, + warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison, + Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed + Summers, Dennis Sutch, Chris Smith, Aaron Swartz, Stuart + Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de + Sousa Rocha, Yichun Wei, Per Vognsen diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/COPYING.txt b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/COPYING.txt new file mode 100644 index 0000000..fb6ae69 --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/COPYING.txt @@ -0,0 +1,27 @@ +Beautiful Soup is made available under the MIT license: + + Copyright (c) 2004-2017 Leonard Richardson + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +Beautiful Soup incorporates code from the html5lib library, which is +also made available under the MIT license. Copyright (c) 2006-2013 +James Graham and other contributors diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/INSTALLER b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/INSTALLER new file mode 100644 index 0000000..a1b589e --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/LICENSE b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/LICENSE new file mode 100644 index 0000000..4c068ba --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/LICENSE @@ -0,0 +1,30 @@ +Beautiful Soup is made available under the MIT license: + + Copyright (c) 2004-2019 Leonard Richardson + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +Beautiful Soup incorporates code from the html5lib library, which is +also made available under the MIT license. Copyright (c) 2006-2013 +James Graham and other contributors + +Beautiful Soup depends on the soupsieve library, which is also made +available under the MIT license. Copyright (c) 2018 Isaac Muse diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/METADATA b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/METADATA new file mode 100644 index 0000000..88c03b6 --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/METADATA @@ -0,0 +1,132 @@ +Metadata-Version: 2.1 +Name: beautifulsoup4 +Version: 4.9.3 +Summary: Screen-scraping library +Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/ +Author: Leonard Richardson +Author-email: leonardr@segfault.org +License: MIT +Download-URL: http://www.crummy.com/software/BeautifulSoup/bs4/download/ +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Topic :: Text Processing :: Markup :: XML +Classifier: Topic :: Text Processing :: Markup :: SGML +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Description-Content-Type: text/markdown +Requires-Dist: soupsieve (<2.0,>1.2) ; python_version < "3.0" +Requires-Dist: soupsieve (>1.2) ; python_version >= "3.0" +Provides-Extra: html5lib +Requires-Dist: html5lib ; extra == 'html5lib' +Provides-Extra: lxml +Requires-Dist: lxml ; extra == 'lxml' + +Beautiful Soup is a library that makes it easy to scrape information +from web pages. It sits atop an HTML or XML parser, providing Pythonic +idioms for iterating, searching, and modifying the parse tree. + +# Quick start + +``` +>>> from bs4 import BeautifulSoup +>>> soup = BeautifulSoup("

SomebadHTML") +>>> print(soup.prettify()) + + +

+ Some + + bad + + HTML + + +

+ + +>>> soup.find(text="bad") +'bad' +>>> soup.i +HTML +# +>>> soup = BeautifulSoup("SomebadXML", "xml") +# +>>> print(soup.prettify()) + + + Some + + bad + + XML + + +``` + +To go beyond the basics, [comprehensive documentation is available](http://www.crummy.com/software/BeautifulSoup/bs4/doc/). + +# Links + +* [Homepage](http://www.crummy.com/software/BeautifulSoup/bs4/) +* [Documentation](http://www.crummy.com/software/BeautifulSoup/bs4/doc/) +* [Discussion group](http://groups.google.com/group/beautifulsoup/) +* [Development](https://code.launchpad.net/beautifulsoup/) +* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/) +* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/CHANGELOG) + +# Note on Python 2 sunsetting + +Since 2012, Beautiful Soup has been developed as a Python 2 library +which is automatically converted to Python 3 code as necessary. This +makes it impossible to take advantage of some features of Python +3. + +For this reason, I plan to discontinue Beautiful Soup's Python 2 +support at some point after December 31, 2020: one year after the +sunset date for Python 2 itself. Beyond that point, new Beautiful Soup +development will exclusively target Python 3. Of course, older +releases of Beautiful Soup, which support both versions, will continue +to be available. + +# Supporting the project + +If you use Beautiful Soup as part of your professional work, please consider a +[Tidelift subscription](https://tidelift.com/subscription/pkg/pypi-beautifulsoup4?utm_source=pypi-beautifulsoup4&utm_medium=referral&utm_campaign=readme). +This will support many of the free software projects your organization +depends on, not just Beautiful Soup. + +If you use Beautiful Soup for personal projects, the best way to say +thank you is to read +[Tool Safety](https://www.crummy.com/software/BeautifulSoup/zine/), a zine I +wrote about what Beautiful Soup has taught me about software +development. + +# Building the documentation + +The bs4/doc/ directory contains full documentation in Sphinx +format. Run `make html` in that directory to create HTML +documentation. + +# Running the unit tests + +Beautiful Soup supports unit test discovery from the project root directory: + +``` +$ nosetests +``` + +``` +$ python -m unittest discover -s bs4 +``` + +If you checked out the source tree, you should see a script in the +home directory called test-all-versions. This script will run the unit +tests under Python 2, then create a temporary Python 3 conversion of +the source and run the unit tests again under Python 3. + + diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/RECORD b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/RECORD new file mode 100644 index 0000000..aad6030 --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/RECORD @@ -0,0 +1,44 @@ +beautifulsoup4-4.9.3.dist-info/AUTHORS,sha256=uSIdbrBb1sobdXl7VrlUvuvim2dN9kF3MH4Edn0WKGE,2176 +beautifulsoup4-4.9.3.dist-info/COPYING.txt,sha256=pH6lEjYJhGT-C09Vl0NZC1MwVtngD0nsv4Apn6tH4jE,1315 +beautifulsoup4-4.9.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +beautifulsoup4-4.9.3.dist-info/LICENSE,sha256=ynIn3bnu1syAnhV_Z7Ag543eBjJAAB0RhW-FxJy25CM,1447 +beautifulsoup4-4.9.3.dist-info/METADATA,sha256=iY3LTmChfV6eWiLC4MPfy_FZL9pllucV7IzOVrF117Q,4190 +beautifulsoup4-4.9.3.dist-info/RECORD,, +beautifulsoup4-4.9.3.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92 +beautifulsoup4-4.9.3.dist-info/top_level.txt,sha256=H8VT-IuPWLzQqwG9_eChjXDJ1z0H9RRebdSR90Bjnkw,4 +bs4/__init__.py,sha256=Xg4iRoWU7bD9HQo8OCHh6BP9g74T8n37QjQFhT3_GLA,32102 +bs4/__pycache__/__init__.cpython-36.pyc,, +bs4/__pycache__/dammit.cpython-36.pyc,, +bs4/__pycache__/diagnose.cpython-36.pyc,, +bs4/__pycache__/element.cpython-36.pyc,, +bs4/__pycache__/formatter.cpython-36.pyc,, +bs4/__pycache__/testing.cpython-36.pyc,, +bs4/builder/__init__.py,sha256=dJcyrx6CdAuKQhg5KfpKTAEQeDKvIW3PWHo-0aKDaLg,19777 +bs4/builder/__pycache__/__init__.cpython-36.pyc,, +bs4/builder/__pycache__/_html5lib.cpython-36.pyc,, +bs4/builder/__pycache__/_htmlparser.cpython-36.pyc,, +bs4/builder/__pycache__/_lxml.cpython-36.pyc,, +bs4/builder/_html5lib.py,sha256=hDxlzVrAku_eU7zEt4gZ-sAXzG58GvkLfMz6P4zUqoA,18748 +bs4/builder/_htmlparser.py,sha256=80-Nb3QXS3PrSa-ReEBTh1X8-C2HkLFLLAbC0fSKuoU,18405 +bs4/builder/_lxml.py,sha256=e4w91RZi3NII_QYe2e1-EiN_BxQtgJPSRwQ8Xgz41ZA,12234 +bs4/dammit.py,sha256=k_XPB3kbZsHM01ckf9BxCUB2Eu2dIQ3d3DDt7UEv9RA,34130 +bs4/diagnose.py,sha256=WOzytCTkvqh_fGhqYlMyaYVjtH50w4jbdf1Fd0iundE,7755 +bs4/element.py,sha256=DbavvJfetuG3GWM_mOsNPVk7WuyQtJQ6qti8FFzwZvE,81650 +bs4/formatter.py,sha256=Wayv1d6fUc9BSCa2k9uhvWwm89xCukdtJhyi9Sxvkuc,5654 +bs4/testing.py,sha256=8C72bkPqP_zPzpP-f9i1qtlNuKonHD-VtamNxUKwIBE,45930 +bs4/tests/__init__.py,sha256=bdUBDE750n7qNEfue7-3a1fBaUxJlvZMkvJvZa-lbYs,27 +bs4/tests/__pycache__/__init__.cpython-36.pyc,, +bs4/tests/__pycache__/test_builder_registry.cpython-36.pyc,, +bs4/tests/__pycache__/test_docs.cpython-36.pyc,, +bs4/tests/__pycache__/test_html5lib.cpython-36.pyc,, +bs4/tests/__pycache__/test_htmlparser.cpython-36.pyc,, +bs4/tests/__pycache__/test_lxml.cpython-36.pyc,, +bs4/tests/__pycache__/test_soup.cpython-36.pyc,, +bs4/tests/__pycache__/test_tree.cpython-36.pyc,, +bs4/tests/test_builder_registry.py,sha256=pllfRpArh9TYhjjRUiu1wITr9Ryyv4hiaAtRjij-k4E,5582 +bs4/tests/test_docs.py,sha256=FXfz2bGL4Xe0q6duwpmg9hmFiZuU4DVJPNZ0hTb6aH4,1067 +bs4/tests/test_html5lib.py,sha256=eWnLGHek_RO_TMq0Ixpb1RF3BEDrvhenMf2eaEBjjsg,6754 +bs4/tests/test_htmlparser.py,sha256=3294XvFbWVe0AYoTlnLPEDW_a0Om0BKRcsrwlJbxUaI,3941 +bs4/tests/test_lxml.py,sha256=xJr8eDrtHSb_vQw88lYEKyfdM1Hel4-dBaz14vQq78M,4105 +bs4/tests/test_soup.py,sha256=EhE1dhHKyctNu0y2l0ql6FOHg9qliEt8Kh7jfCx1lDw,29303 +bs4/tests/test_tree.py,sha256=UsXGvnlTBKixno0sbnFllZo6pm-7EiSvOY1dOeWcqFg,89437 diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/WHEEL b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/WHEEL new file mode 100644 index 0000000..b552003 --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.34.2) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/top_level.txt b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/top_level.txt new file mode 100644 index 0000000..1315442 --- /dev/null +++ b/venv/lib/python3.6/site-packages/beautifulsoup4-4.9.3.dist-info/top_level.txt @@ -0,0 +1 @@ +bs4 diff --git a/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/PKG-INFO b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/PKG-INFO new file mode 100644 index 0000000..60137e4 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/PKG-INFO @@ -0,0 +1,21 @@ +Metadata-Version: 1.1 +Name: bs4 +Version: 0.0.1 +Summary: Screen-scraping library +Home-page: https://pypi.python.org/pypi/beautifulsoup4 +Author: Leonard Richardson +Author-email: leonardr@segfault.org +License: MIT +Download-URL: http://www.crummy.com/software/BeautifulSoup/bs4/download/ +Description: Use `beautifulsoup4 `_ instead. +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Topic :: Text Processing :: Markup :: XML +Classifier: Topic :: Text Processing :: Markup :: SGML +Classifier: Topic :: Software Development :: Libraries :: Python Modules diff --git a/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/SOURCES.txt b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/SOURCES.txt new file mode 100644 index 0000000..b46d1c5 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/SOURCES.txt @@ -0,0 +1,7 @@ +setup.cfg +setup.py +bs4.egg-info/PKG-INFO +bs4.egg-info/SOURCES.txt +bs4.egg-info/dependency_links.txt +bs4.egg-info/requires.txt +bs4.egg-info/top_level.txt \ No newline at end of file diff --git a/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/dependency_links.txt b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/installed-files.txt b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/installed-files.txt new file mode 100644 index 0000000..62cfadb --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/installed-files.txt @@ -0,0 +1,5 @@ +PKG-INFO +SOURCES.txt +dependency_links.txt +requires.txt +top_level.txt diff --git a/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/requires.txt b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/requires.txt new file mode 100644 index 0000000..c1f5f71 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/requires.txt @@ -0,0 +1 @@ +beautifulsoup4 diff --git a/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/top_level.txt b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/top_level.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4-0.0.1-py3.6.egg-info/top_level.txt @@ -0,0 +1 @@ + diff --git a/venv/lib/python3.6/site-packages/bs4/__init__.py b/venv/lib/python3.6/site-packages/bs4/__init__.py new file mode 100644 index 0000000..74cab3f --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/__init__.py @@ -0,0 +1,791 @@ +"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend". + +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup uses a pluggable XML or HTML parser to parse a +(possibly invalid) document into a tree representation. Beautiful Soup +provides methods and Pythonic idioms that make it easy to navigate, +search, and modify the parse tree. + +Beautiful Soup works with Python 2.7 and up. It works better if lxml +and/or html5lib is installed. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ +""" + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "4.9.3" +__copyright__ = "Copyright (c) 2004-2020 Leonard Richardson" +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +__all__ = ['BeautifulSoup'] + +from collections import Counter +import os +import re +import sys +import traceback +import warnings + +from .builder import builder_registry, ParserRejectedMarkup +from .dammit import UnicodeDammit +from .element import ( + CData, + Comment, + DEFAULT_OUTPUT_ENCODING, + Declaration, + Doctype, + NavigableString, + PageElement, + ProcessingInstruction, + PYTHON_SPECIFIC_ENCODINGS, + ResultSet, + Script, + Stylesheet, + SoupStrainer, + Tag, + TemplateString, + ) + +# The very first thing we do is give a useful error if someone is +# running this code under Python 3 without converting it. +'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' + +# Define some custom warnings. +class GuessedAtParserWarning(UserWarning): + """The warning issued when BeautifulSoup has to guess what parser to + use -- probably because no parser was specified in the constructor. + """ + +class MarkupResemblesLocatorWarning(UserWarning): + """The warning issued when BeautifulSoup is given 'markup' that + actually looks like a resource locator -- a URL or a path to a file + on disk. + """ + + +class BeautifulSoup(Tag): + """A data structure representing a parsed HTML or XML document. + + Most of the methods you'll call on a BeautifulSoup object are inherited from + PageElement or Tag. + + Internally, this class defines the basic interface called by the + tree builders when converting an HTML/XML document into a data + structure. The interface abstracts away the differences between + parsers. To write a new tree builder, you'll need to understand + these methods as a whole. + + These methods will be called by the BeautifulSoup constructor: + * reset() + * feed(markup) + + The tree builder may call these methods from its feed() implementation: + * handle_starttag(name, attrs) # See note about return value + * handle_endtag(name) + * handle_data(data) # Appends to the current data node + * endData(containerClass) # Ends the current data node + + No matter how complicated the underlying parser is, you should be + able to build a tree using 'start tag' events, 'end tag' events, + 'data' events, and "done with data" events. + + If you encounter an empty-element tag (aka a self-closing tag, + like HTML's
tag), call handle_starttag and then + handle_endtag. + """ + + # Since BeautifulSoup subclasses Tag, it's possible to treat it as + # a Tag with a .name. This name makes it clear the BeautifulSoup + # object isn't a real markup tag. + ROOT_TAG_NAME = '[document]' + + # If the end-user gives no indication which tree builder they + # want, look for one with these features. + DEFAULT_BUILDER_FEATURES = ['html', 'fast'] + + # A string containing all ASCII whitespace characters, used in + # endData() to detect data chunks that seem 'empty'. + ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' + + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" + + def __init__(self, markup="", features=None, builder=None, + parse_only=None, from_encoding=None, exclude_encodings=None, + element_classes=None, **kwargs): + """Constructor. + + :param markup: A string or a file-like object representing + markup to be parsed. + + :param features: Desirable features of the parser to be + used. This may be the name of a specific parser ("lxml", + "lxml-xml", "html.parser", or "html5lib") or it may be the + type of markup to be used ("html", "html5", "xml"). It's + recommended that you name a specific parser, so that + Beautiful Soup gives you the same results across platforms + and virtual environments. + + :param builder: A TreeBuilder subclass to instantiate (or + instance to use) instead of looking one up based on + `features`. You only need to use this if you've implemented a + custom TreeBuilder. + + :param parse_only: A SoupStrainer. Only parts of the document + matching the SoupStrainer will be considered. This is useful + when parsing part of a document that would otherwise be too + large to fit into memory. + + :param from_encoding: A string indicating the encoding of the + document to be parsed. Pass this in if Beautiful Soup is + guessing wrongly about the document's encoding. + + :param exclude_encodings: A list of strings indicating + encodings known to be wrong. Pass this in if you don't know + the document's encoding but you know Beautiful Soup's guess is + wrong. + + :param element_classes: A dictionary mapping BeautifulSoup + classes like Tag and NavigableString, to other classes you'd + like to be instantiated instead as the parse tree is + built. This is useful for subclassing Tag or NavigableString + to modify default behavior. + + :param kwargs: For backwards compatibility purposes, the + constructor accepts certain keyword arguments used in + Beautiful Soup 3. None of these arguments do anything in + Beautiful Soup 4; they will result in a warning and then be + ignored. + + Apart from this, any keyword arguments passed into the + BeautifulSoup constructor are propagated to the TreeBuilder + constructor. This makes it possible to configure a + TreeBuilder by passing in arguments, not just by saying which + one to use. + """ + if 'convertEntities' in kwargs: + del kwargs['convertEntities'] + warnings.warn( + "BS4 does not respect the convertEntities argument to the " + "BeautifulSoup constructor. Entities are always converted " + "to Unicode characters.") + + if 'markupMassage' in kwargs: + del kwargs['markupMassage'] + warnings.warn( + "BS4 does not respect the markupMassage argument to the " + "BeautifulSoup constructor. The tree builder is responsible " + "for any necessary markup massage.") + + if 'smartQuotesTo' in kwargs: + del kwargs['smartQuotesTo'] + warnings.warn( + "BS4 does not respect the smartQuotesTo argument to the " + "BeautifulSoup constructor. Smart quotes are always converted " + "to Unicode characters.") + + if 'selfClosingTags' in kwargs: + del kwargs['selfClosingTags'] + warnings.warn( + "BS4 does not respect the selfClosingTags argument to the " + "BeautifulSoup constructor. The tree builder is responsible " + "for understanding self-closing tags.") + + if 'isHTML' in kwargs: + del kwargs['isHTML'] + warnings.warn( + "BS4 does not respect the isHTML argument to the " + "BeautifulSoup constructor. Suggest you use " + "features='lxml' for HTML and features='lxml-xml' for " + "XML.") + + def deprecated_argument(old_name, new_name): + if old_name in kwargs: + warnings.warn( + 'The "%s" argument to the BeautifulSoup constructor ' + 'has been renamed to "%s."' % (old_name, new_name)) + value = kwargs[old_name] + del kwargs[old_name] + return value + return None + + parse_only = parse_only or deprecated_argument( + "parseOnlyThese", "parse_only") + + from_encoding = from_encoding or deprecated_argument( + "fromEncoding", "from_encoding") + + if from_encoding and isinstance(markup, str): + warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") + from_encoding = None + + self.element_classes = element_classes or dict() + + # We need this information to track whether or not the builder + # was specified well enough that we can omit the 'you need to + # specify a parser' warning. + original_builder = builder + original_features = features + + if isinstance(builder, type): + # A builder class was passed in; it needs to be instantiated. + builder_class = builder + builder = None + elif builder is None: + if isinstance(features, str): + features = [features] + if features is None or len(features) == 0: + features = self.DEFAULT_BUILDER_FEATURES + builder_class = builder_registry.lookup(*features) + if builder_class is None: + raise FeatureNotFound( + "Couldn't find a tree builder with the features you " + "requested: %s. Do you need to install a parser library?" + % ",".join(features)) + + # At this point either we have a TreeBuilder instance in + # builder, or we have a builder_class that we can instantiate + # with the remaining **kwargs. + if builder is None: + builder = builder_class(**kwargs) + if not original_builder and not ( + original_features == builder.NAME or + original_features in builder.ALTERNATE_NAMES + ) and markup: + # The user did not tell us which TreeBuilder to use, + # and we had to guess. Issue a warning. + if builder.is_xml: + markup_type = "XML" + else: + markup_type = "HTML" + + # This code adapted from warnings.py so that we get the same line + # of code as our warnings.warn() call gets, even if the answer is wrong + # (as it may be in a multithreading situation). + caller = None + try: + caller = sys._getframe(1) + except ValueError: + pass + if caller: + globals = caller.f_globals + line_number = caller.f_lineno + else: + globals = sys.__dict__ + line_number= 1 + filename = globals.get('__file__') + if filename: + fnl = filename.lower() + if fnl.endswith((".pyc", ".pyo")): + filename = filename[:-1] + if filename: + # If there is no filename at all, the user is most likely in a REPL, + # and the warning is not necessary. + values = dict( + filename=filename, + line_number=line_number, + parser=builder.NAME, + markup_type=markup_type + ) + warnings.warn( + self.NO_PARSER_SPECIFIED_WARNING % values, + GuessedAtParserWarning, stacklevel=2 + ) + else: + if kwargs: + warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.") + + self.builder = builder + self.is_xml = builder.is_xml + self.known_xml = self.is_xml + self._namespaces = dict() + self.parse_only = parse_only + + self.builder.initialize_soup(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + elif len(markup) <= 256 and ( + (isinstance(markup, bytes) and not b'<' in markup) + or (isinstance(markup, str) and not '<' in markup) + ): + # Print out warnings for a couple beginner problems + # involving passing non-markup to Beautiful Soup. + # Beautiful Soup will still parse the input as markup, + # just in case that's what the user really wants. + if (isinstance(markup, str) + and not os.path.supports_unicode_filenames): + possible_filename = markup.encode("utf8") + else: + possible_filename = markup + is_file = False + try: + is_file = os.path.exists(possible_filename) + except Exception as e: + # This is almost certainly a problem involving + # characters not valid in filenames on this + # system. Just let it go. + pass + if is_file: + warnings.warn( + '"%s" looks like a filename, not markup. You should' + ' probably open this file and pass the filehandle into' + ' Beautiful Soup.' % self._decode_markup(markup), + MarkupResemblesLocatorWarning + ) + self._check_markup_is_url(markup) + + rejections = [] + success = False + for (self.markup, self.original_encoding, self.declared_html_encoding, + self.contains_replacement_characters) in ( + self.builder.prepare_markup( + markup, from_encoding, exclude_encodings=exclude_encodings)): + self.reset() + try: + self._feed() + success = True + break + except ParserRejectedMarkup as e: + rejections.append(e) + pass + + if not success: + other_exceptions = [str(e) for e in rejections] + raise ParserRejectedMarkup( + "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) + ) + + # Clear out the markup and remove the builder's circular + # reference to this object. + self.markup = None + self.builder.soup = None + + def __copy__(self): + """Copy a BeautifulSoup object by converting the document to a string and parsing it again.""" + copy = type(self)( + self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' + ) + + # Although we encoded the tree to UTF-8, that may not have + # been the encoding of the original markup. Set the copy's + # .original_encoding to reflect the original object's + # .original_encoding. + copy.original_encoding = self.original_encoding + return copy + + def __getstate__(self): + # Frequently a tree builder can't be pickled. + d = dict(self.__dict__) + if 'builder' in d and not self.builder.picklable: + d['builder'] = None + return d + + @classmethod + def _decode_markup(cls, markup): + """Ensure `markup` is bytes so it's safe to send into warnings.warn. + + TODO: warnings.warn had this problem back in 2010 but it might not + anymore. + """ + if isinstance(markup, bytes): + decoded = markup.decode('utf-8', 'replace') + else: + decoded = markup + return decoded + + @classmethod + def _check_markup_is_url(cls, markup): + """Error-handling method to raise a warning if incoming markup looks + like a URL. + + :param markup: A string. + """ + if isinstance(markup, bytes): + space = b' ' + cant_start_with = (b"http:", b"https:") + elif isinstance(markup, str): + space = ' ' + cant_start_with = ("http:", "https:") + else: + return + + if any(markup.startswith(prefix) for prefix in cant_start_with): + if not space in markup: + warnings.warn( + '"%s" looks like a URL. Beautiful Soup is not an' + ' HTTP client. You should probably use an HTTP client like' + ' requests to get the document behind the URL, and feed' + ' that document to Beautiful Soup.' % cls._decode_markup( + markup + ), + MarkupResemblesLocatorWarning + ) + + def _feed(self): + """Internal method that parses previously set markup, creating a large + number of Tag and NavigableString objects. + """ + # Convert the document to Unicode. + self.builder.reset() + + self.builder.feed(self.markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def reset(self): + """Reset this object to a state as though it had never parsed any + markup. + """ + Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) + self.hidden = 1 + self.builder.reset() + self.current_data = [] + self.currentTag = None + self.tagStack = [] + self.open_tag_counter = Counter() + self.preserve_whitespace_tag_stack = [] + self.string_container_stack = [] + self.pushTag(self) + + def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, + sourceline=None, sourcepos=None, **kwattrs): + """Create a new Tag associated with this BeautifulSoup object. + + :param name: The name of the new Tag. + :param namespace: The URI of the new Tag's XML namespace, if any. + :param prefix: The prefix for the new Tag's XML namespace, if any. + :param attrs: A dictionary of this Tag's attribute values; can + be used instead of `kwattrs` for attributes like 'class' + that are reserved words in Python. + :param sourceline: The line number where this tag was + (purportedly) found in its source document. + :param sourcepos: The character position within `sourceline` where this + tag was (purportedly) found. + :param kwattrs: Keyword arguments for the new Tag's attribute values. + + """ + kwattrs.update(attrs) + return self.element_classes.get(Tag, Tag)( + None, self.builder, name, namespace, nsprefix, kwattrs, + sourceline=sourceline, sourcepos=sourcepos + ) + + def string_container(self, base_class=None): + container = base_class or NavigableString + + # There may be a general override of NavigableString. + container = self.element_classes.get( + container, container + ) + + # On top of that, we may be inside a tag that needs a special + # container class. + if self.string_container_stack: + container = self.builder.string_containers.get( + self.string_container_stack[-1].name, container + ) + return container + + def new_string(self, s, subclass=None): + """Create a new NavigableString associated with this BeautifulSoup + object. + """ + container = self.string_container(subclass) + return container(s) + + def insert_before(self, *args): + """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement + it because there is nothing before or after it in the parse tree. + """ + raise NotImplementedError("BeautifulSoup objects don't support insert_before().") + + def insert_after(self, *args): + """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement + it because there is nothing before or after it in the parse tree. + """ + raise NotImplementedError("BeautifulSoup objects don't support insert_after().") + + def popTag(self): + """Internal method called by _popToTag when a tag is closed.""" + tag = self.tagStack.pop() + if tag.name in self.open_tag_counter: + self.open_tag_counter[tag.name] -= 1 + if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: + self.preserve_whitespace_tag_stack.pop() + if self.string_container_stack and tag == self.string_container_stack[-1]: + self.string_container_stack.pop() + #print("Pop", tag.name) + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + """Internal method called by handle_starttag when a tag is opened.""" + #print("Push", tag.name) + if self.currentTag is not None: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + if tag.name != self.ROOT_TAG_NAME: + self.open_tag_counter[tag.name] += 1 + if tag.name in self.builder.preserve_whitespace_tags: + self.preserve_whitespace_tag_stack.append(tag) + if tag.name in self.builder.string_containers: + self.string_container_stack.append(tag) + + def endData(self, containerClass=None): + """Method called by the TreeBuilder when the end of a data segment + occurs. + """ + containerClass = self.string_container(containerClass) + + if self.current_data: + current_data = ''.join(self.current_data) + # If whitespace is not preserved, and this string contains + # nothing but ASCII spaces, replace it with a single space + # or newline. + if not self.preserve_whitespace_tag_stack: + strippable = True + for i in current_data: + if i not in self.ASCII_SPACES: + strippable = False + break + if strippable: + if '\n' in current_data: + current_data = '\n' + else: + current_data = ' ' + + # Reset the data collector. + self.current_data = [] + + # Should we add this string to the tree at all? + if self.parse_only and len(self.tagStack) <= 1 and \ + (not self.parse_only.text or \ + not self.parse_only.search(current_data)): + return + + o = containerClass(current_data) + self.object_was_parsed(o) + + def object_was_parsed(self, o, parent=None, most_recent_element=None): + """Method called by the TreeBuilder to integrate an object into the parse tree.""" + if parent is None: + parent = self.currentTag + if most_recent_element is not None: + previous_element = most_recent_element + else: + previous_element = self._most_recent_element + + next_element = previous_sibling = next_sibling = None + if isinstance(o, Tag): + next_element = o.next_element + next_sibling = o.next_sibling + previous_sibling = o.previous_sibling + if previous_element is None: + previous_element = o.previous_element + + fix = parent.next_element is not None + + o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) + + self._most_recent_element = o + parent.contents.append(o) + + # Check if we are inserting into an already parsed node. + if fix: + self._linkage_fixer(parent) + + def _linkage_fixer(self, el): + """Make sure linkage of this fragment is sound.""" + + first = el.contents[0] + child = el.contents[-1] + descendant = child + + if child is first and el.parent is not None: + # Parent should be linked to first child + el.next_element = child + # We are no longer linked to whatever this element is + prev_el = child.previous_element + if prev_el is not None and prev_el is not el: + prev_el.next_element = None + # First child should be linked to the parent, and no previous siblings. + child.previous_element = el + child.previous_sibling = None + + # We have no sibling as we've been appended as the last. + child.next_sibling = None + + # This index is a tag, dig deeper for a "last descendant" + if isinstance(child, Tag) and child.contents: + descendant = child._last_descendant(False) + + # As the final step, link last descendant. It should be linked + # to the parent's next sibling (if found), else walk up the chain + # and find a parent with a sibling. It should have no next sibling. + descendant.next_element = None + descendant.next_sibling = None + target = el + while True: + if target is None: + break + elif target.next_sibling is not None: + descendant.next_element = target.next_sibling + target.next_sibling.previous_element = child + break + target = target.parent + + def _popToTag(self, name, nsprefix=None, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. + + If there are no open tags with the given name, nothing will be + popped. + + :param name: Pop up to the most recent tag with this name. + :param nsprefix: The namespace prefix that goes with `name`. + :param inclusivePop: It this is false, pops the tag stack up + to but *not* including the most recent instqance of the + given tag. + + """ + #print("Popping to %s" % name) + if name == self.ROOT_TAG_NAME: + # The BeautifulSoup object itself can never be popped. + return + + most_recently_popped = None + + stack_size = len(self.tagStack) + for i in range(stack_size - 1, 0, -1): + if not self.open_tag_counter.get(name): + break + t = self.tagStack[i] + if (name == t.name and nsprefix == t.prefix): + if inclusivePop: + most_recently_popped = self.popTag() + break + most_recently_popped = self.popTag() + + return most_recently_popped + + def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None, + sourcepos=None): + """Called by the tree builder when a new tag is encountered. + + :param name: Name of the tag. + :param nsprefix: Namespace prefix for the tag. + :param attrs: A dictionary of attribute values. + :param sourceline: The line number where this tag was found in its + source document. + :param sourcepos: The character position within `sourceline` where this + tag was found. + + If this method returns None, the tag was rejected by an active + SoupStrainer. You should proceed as if the tag had not occurred + in the document. For instance, if this was a self-closing tag, + don't call handle_endtag. + """ + # print("Start tag %s: %s" % (name, attrs)) + self.endData() + + if (self.parse_only and len(self.tagStack) <= 1 + and (self.parse_only.text + or not self.parse_only.search_tag(name, attrs))): + return None + + tag = self.element_classes.get(Tag, Tag)( + self, self.builder, name, namespace, nsprefix, attrs, + self.currentTag, self._most_recent_element, + sourceline=sourceline, sourcepos=sourcepos + ) + if tag is None: + return tag + if self._most_recent_element is not None: + self._most_recent_element.next_element = tag + self._most_recent_element = tag + self.pushTag(tag) + return tag + + def handle_endtag(self, name, nsprefix=None): + """Called by the tree builder when an ending tag is encountered. + + :param name: Name of the tag. + :param nsprefix: Namespace prefix for the tag. + """ + #print("End tag: " + name) + self.endData() + self._popToTag(name, nsprefix) + + def handle_data(self, data): + """Called by the tree builder when a chunk of textual data is encountered.""" + self.current_data.append(data) + + def decode(self, pretty_print=False, + eventual_encoding=DEFAULT_OUTPUT_ENCODING, + formatter="minimal"): + """Returns a string or Unicode representation of the parse tree + as an HTML or XML document. + + :param pretty_print: If this is True, indentation will be used to + make the document more readable. + :param eventual_encoding: The encoding of the final document. + If this is None, the document will be a Unicode string. + """ + if self.is_xml: + # Print the XML declaration + encoding_part = '' + if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS: + # This is a special Python encoding; it can't actually + # go into an XML document because it means nothing + # outside of Python. + eventual_encoding = None + if eventual_encoding != None: + encoding_part = ' encoding="%s"' % eventual_encoding + prefix = '\n' % encoding_part + else: + prefix = '' + if not pretty_print: + indent_level = None + else: + indent_level = 0 + return prefix + super(BeautifulSoup, self).decode( + indent_level, eventual_encoding, formatter) + +# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup' +_s = BeautifulSoup +_soup = BeautifulSoup + +class BeautifulStoneSoup(BeautifulSoup): + """Deprecated interface to an XML parser.""" + + def __init__(self, *args, **kwargs): + kwargs['features'] = 'xml' + warnings.warn( + 'The BeautifulStoneSoup class is deprecated. Instead of using ' + 'it, pass features="xml" into the BeautifulSoup constructor.') + super(BeautifulStoneSoup, self).__init__(*args, **kwargs) + + +class StopParsing(Exception): + """Exception raised by a TreeBuilder if it's unable to continue parsing.""" + pass + +class FeatureNotFound(ValueError): + """Exception raised by the BeautifulSoup constructor if no parser with the + requested features is found. + """ + pass + + +#If this file is run as a script, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print((soup.prettify())) diff --git a/venv/lib/python3.6/site-packages/bs4/builder/__init__.py b/venv/lib/python3.6/site-packages/bs4/builder/__init__.py new file mode 100644 index 0000000..03fbd6a --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/builder/__init__.py @@ -0,0 +1,519 @@ +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +from collections import defaultdict +import itertools +import sys +from bs4.element import ( + CharsetMetaAttributeValue, + ContentMetaAttributeValue, + Stylesheet, + Script, + TemplateString, + nonwhitespace_re +) + +__all__ = [ + 'HTMLTreeBuilder', + 'SAXTreeBuilder', + 'TreeBuilder', + 'TreeBuilderRegistry', + ] + +# Some useful features for a TreeBuilder to have. +FAST = 'fast' +PERMISSIVE = 'permissive' +STRICT = 'strict' +XML = 'xml' +HTML = 'html' +HTML_5 = 'html5' + + +class TreeBuilderRegistry(object): + """A way of looking up TreeBuilder subclasses by their name or by desired + features. + """ + + def __init__(self): + self.builders_for_feature = defaultdict(list) + self.builders = [] + + def register(self, treebuilder_class): + """Register a treebuilder based on its advertised features. + + :param treebuilder_class: A subclass of Treebuilder. its .features + attribute should list its features. + """ + for feature in treebuilder_class.features: + self.builders_for_feature[feature].insert(0, treebuilder_class) + self.builders.insert(0, treebuilder_class) + + def lookup(self, *features): + """Look up a TreeBuilder subclass with the desired features. + + :param features: A list of features to look for. If none are + provided, the most recently registered TreeBuilder subclass + will be used. + :return: A TreeBuilder subclass, or None if there's no + registered subclass with all the requested features. + """ + if len(self.builders) == 0: + # There are no builders at all. + return None + + if len(features) == 0: + # They didn't ask for any features. Give them the most + # recently registered builder. + return self.builders[0] + + # Go down the list of features in order, and eliminate any builders + # that don't match every feature. + features = list(features) + features.reverse() + candidates = None + candidate_set = None + while len(features) > 0: + feature = features.pop() + we_have_the_feature = self.builders_for_feature.get(feature, []) + if len(we_have_the_feature) > 0: + if candidates is None: + candidates = we_have_the_feature + candidate_set = set(candidates) + else: + # Eliminate any candidates that don't have this feature. + candidate_set = candidate_set.intersection( + set(we_have_the_feature)) + + # The only valid candidates are the ones in candidate_set. + # Go through the original list of candidates and pick the first one + # that's in candidate_set. + if candidate_set is None: + return None + for candidate in candidates: + if candidate in candidate_set: + return candidate + return None + +# The BeautifulSoup class will take feature lists from developers and use them +# to look up builders in this registry. +builder_registry = TreeBuilderRegistry() + +class TreeBuilder(object): + """Turn a textual document into a Beautiful Soup object tree.""" + + NAME = "[Unknown tree builder]" + ALTERNATE_NAMES = [] + features = [] + + is_xml = False + picklable = False + empty_element_tags = None # A tag will be considered an empty-element + # tag when and only when it has no contents. + + # A value for these tag/attribute combinations is a space- or + # comma-separated list of CDATA, rather than a single CDATA. + DEFAULT_CDATA_LIST_ATTRIBUTES = {} + + # Whitespace should be preserved inside these tags. + DEFAULT_PRESERVE_WHITESPACE_TAGS = set() + + # The textual contents of tags with these names should be + # instantiated with some class other than NavigableString. + DEFAULT_STRING_CONTAINERS = {} + + USE_DEFAULT = object() + + # Most parsers don't keep track of line numbers. + TRACKS_LINE_NUMBERS = False + + def __init__(self, multi_valued_attributes=USE_DEFAULT, + preserve_whitespace_tags=USE_DEFAULT, + store_line_numbers=USE_DEFAULT, + string_containers=USE_DEFAULT, + ): + """Constructor. + + :param multi_valued_attributes: If this is set to None, the + TreeBuilder will not turn any values for attributes like + 'class' into lists. Setting this to a dictionary will + customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES + for an example. + + Internally, these are called "CDATA list attributes", but that + probably doesn't make sense to an end-user, so the argument name + is `multi_valued_attributes`. + + :param preserve_whitespace_tags: A list of tags to treat + the way
 tags are treated in HTML. Tags in this list
+         are immune from pretty-printing; their contents will always be
+         output as-is.
+
+        :param string_containers: A dictionary mapping tag names to
+        the classes that should be instantiated to contain the textual
+        contents of those tags. The default is to use NavigableString
+        for every tag, no matter what the name. You can override the
+        default by changing DEFAULT_STRING_CONTAINERS.
+
+        :param store_line_numbers: If the parser keeps track of the
+         line numbers and positions of the original markup, that
+         information will, by default, be stored in each corresponding
+         `Tag` object. You can turn this off by passing
+         store_line_numbers=False. If the parser you're using doesn't 
+         keep track of this information, then setting store_line_numbers=True
+         will do nothing.
+        """
+        self.soup = None
+        if multi_valued_attributes is self.USE_DEFAULT:
+            multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES
+        self.cdata_list_attributes = multi_valued_attributes
+        if preserve_whitespace_tags is self.USE_DEFAULT:
+            preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS
+        self.preserve_whitespace_tags = preserve_whitespace_tags
+        if store_line_numbers == self.USE_DEFAULT:
+            store_line_numbers = self.TRACKS_LINE_NUMBERS
+        self.store_line_numbers = store_line_numbers 
+        if string_containers == self.USE_DEFAULT:
+            string_containers = self.DEFAULT_STRING_CONTAINERS
+        self.string_containers = string_containers
+        
+    def initialize_soup(self, soup):
+        """The BeautifulSoup object has been initialized and is now
+        being associated with the TreeBuilder.
+
+        :param soup: A BeautifulSoup object.
+        """
+        self.soup = soup
+        
+    def reset(self):
+        """Do any work necessary to reset the underlying parser
+        for a new document.
+
+        By default, this does nothing.
+        """
+        pass
+
+    def can_be_empty_element(self, tag_name):
+        """Might a tag with this name be an empty-element tag?
+
+        The final markup may or may not actually present this tag as
+        self-closing.
+
+        For instance: an HTMLBuilder does not consider a 

tag to be + an empty-element tag (it's not in + HTMLBuilder.empty_element_tags). This means an empty

tag + will be presented as "

", not "

" or "

". + + The default implementation has no opinion about which tags are + empty-element tags, so a tag will be presented as an + empty-element tag if and only if it has no children. + "" will become "", and "bar" will + be left alone. + + :param tag_name: The name of a markup tag. + """ + if self.empty_element_tags is None: + return True + return tag_name in self.empty_element_tags + + def feed(self, markup): + """Run some incoming markup through some parsing process, + populating the `BeautifulSoup` object in self.soup. + + This method is not implemented in TreeBuilder; it must be + implemented in subclasses. + + :return: None. + """ + raise NotImplementedError() + + def prepare_markup(self, markup, user_specified_encoding=None, + document_declared_encoding=None, exclude_encodings=None): + """Run any preliminary steps necessary to make incoming markup + acceptable to the parser. + + :param markup: Some markup -- probably a bytestring. + :param user_specified_encoding: The user asked to try this encoding. + :param document_declared_encoding: The markup itself claims to be + in this encoding. + :param exclude_encodings: The user asked _not_ to try any of + these encodings. + + :yield: A series of 4-tuples: + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for converting the + document to Unicode and parsing it. Each strategy will be tried + in turn. + + By default, the only strategy is to parse the markup + as-is. See `LXMLTreeBuilderForXML` and + `HTMLParserTreeBuilder` for implementations that take into + account the quirks of particular parsers. + """ + yield markup, None, None, False + + def test_fragment_to_document(self, fragment): + """Wrap an HTML fragment to make it look like a document. + + Different parsers do this differently. For instance, lxml + introduces an empty tag, and html5lib + doesn't. Abstracting this away lets us write simple tests + which run HTML fragments through the parser and compare the + results against other HTML fragments. + + This method should not be used outside of tests. + + :param fragment: A string -- fragment of HTML. + :return: A string -- a full HTML document. + """ + return fragment + + def set_up_substitutions(self, tag): + """Set up any substitutions that will need to be performed on + a `Tag` when it's output as a string. + + By default, this does nothing. See `HTMLTreeBuilder` for a + case where this is used. + + :param tag: A `Tag` + :return: Whether or not a substitution was performed. + """ + return False + + def _replace_cdata_list_attribute_values(self, tag_name, attrs): + """When an attribute value is associated with a tag that can + have multiple values for that attribute, convert the string + value to a list of strings. + + Basically, replaces class="foo bar" with class=["foo", "bar"] + + NOTE: This method modifies its input in place. + + :param tag_name: The name of a tag. + :param attrs: A dictionary containing the tag's attributes. + Any appropriate attribute values will be modified in place. + """ + if not attrs: + return attrs + if self.cdata_list_attributes: + universal = self.cdata_list_attributes.get('*', []) + tag_specific = self.cdata_list_attributes.get( + tag_name.lower(), None) + for attr in list(attrs.keys()): + if attr in universal or (tag_specific and attr in tag_specific): + # We have a "class"-type attribute whose string + # value is a whitespace-separated list of + # values. Split it into a list. + value = attrs[attr] + if isinstance(value, str): + values = nonwhitespace_re.findall(value) + else: + # html5lib sometimes calls setAttributes twice + # for the same tag when rearranging the parse + # tree. On the second call the attribute value + # here is already a list. If this happens, + # leave the value alone rather than trying to + # split it again. + values = value + attrs[attr] = values + return attrs + +class SAXTreeBuilder(TreeBuilder): + """A Beautiful Soup treebuilder that listens for SAX events. + + This is not currently used for anything, but it demonstrates + how a simple TreeBuilder would work. + """ + + def feed(self, markup): + raise NotImplementedError() + + def close(self): + pass + + def startElement(self, name, attrs): + attrs = dict((key[1], value) for key, value in list(attrs.items())) + #print("Start %s, %r" % (name, attrs)) + self.soup.handle_starttag(name, attrs) + + def endElement(self, name): + #print("End %s" % name) + self.soup.handle_endtag(name) + + def startElementNS(self, nsTuple, nodeName, attrs): + # Throw away (ns, nodeName) for now. + self.startElement(nodeName, attrs) + + def endElementNS(self, nsTuple, nodeName): + # Throw away (ns, nodeName) for now. + self.endElement(nodeName) + #handler.endElementNS((ns, node.nodeName), node.nodeName) + + def startPrefixMapping(self, prefix, nodeValue): + # Ignore the prefix for now. + pass + + def endPrefixMapping(self, prefix): + # Ignore the prefix for now. + # handler.endPrefixMapping(prefix) + pass + + def characters(self, content): + self.soup.handle_data(content) + + def startDocument(self): + pass + + def endDocument(self): + pass + + +class HTMLTreeBuilder(TreeBuilder): + """This TreeBuilder knows facts about HTML. + + Such as which tags are empty-element tags. + """ + + empty_element_tags = set([ + # These are from HTML5. + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', + + # These are from earlier versions of HTML and are removed in HTML5. + 'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer' + ]) + + # The HTML standard defines these as block-level elements. Beautiful + # Soup does not treat these elements differently from other elements, + # but it may do so eventually, and this information is available if + # you need to use it. + block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) + + # The HTML standard defines an unusual content model for these tags. + # We represent this by using a string class other than NavigableString + # inside these tags. + # + # I made this list by going through the HTML spec + # (https://html.spec.whatwg.org/#metadata-content) and looking for + # "metadata content" elements that can contain strings. + # + # TODO: Arguably

as a +# string. +# +# XXX This code can be removed once most Python 3 users are on 3.2.3. +if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT: + import re + attrfind_tolerant = re.compile( + r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') + HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant + + locatestarttagend = re.compile(r""" + <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name + (?:\s+ # whitespace before attribute name + (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name + (?:\s*=\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |\"[^\"]*\" # LIT-enclosed value + |[^'\">\s]+ # bare value + ) + )? + ) + )* + \s* # trailing whitespace +""", re.VERBOSE) + BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend + + from html.parser import tagfind, attrfind + + def parse_starttag(self, i): + self.__starttag_text = None + endpos = self.check_for_whole_start_tag(i) + if endpos < 0: + return endpos + rawdata = self.rawdata + self.__starttag_text = rawdata[i:endpos] + + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + match = tagfind.match(rawdata, i+1) + assert match, 'unexpected call to parse_starttag()' + k = match.end() + self.lasttag = tag = rawdata[i+1:k].lower() + while k < endpos: + if self.strict: + m = attrfind.match(rawdata, k) + else: + m = attrfind_tolerant.match(rawdata, k) + if not m: + break + attrname, rest, attrvalue = m.group(1, 2, 3) + if not rest: + attrvalue = None + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue = attrvalue[1:-1] + if attrvalue: + attrvalue = self.unescape(attrvalue) + attrs.append((attrname.lower(), attrvalue)) + k = m.end() + + end = rawdata[k:endpos].strip() + if end not in (">", "/>"): + lineno, offset = self.getpos() + if "\n" in self.__starttag_text: + lineno = lineno + self.__starttag_text.count("\n") + offset = len(self.__starttag_text) \ + - self.__starttag_text.rfind("\n") + else: + offset = offset + len(self.__starttag_text) + if self.strict: + self.error("junk characters in start tag: %r" + % (rawdata[k:endpos][:20],)) + self.handle_data(rawdata[i:endpos]) + return endpos + if end.endswith('/>'): + # XHTML-style empty tag: + self.handle_startendtag(tag, attrs) + else: + self.handle_starttag(tag, attrs) + if tag in self.CDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag) + return endpos + + def set_cdata_mode(self, elem): + self.cdata_elem = elem.lower() + self.interesting = re.compile(r'' % self.cdata_elem, re.I) + + BeautifulSoupHTMLParser.parse_starttag = parse_starttag + BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode + + CONSTRUCTOR_TAKES_STRICT = True diff --git a/venv/lib/python3.6/site-packages/bs4/builder/_lxml.py b/venv/lib/python3.6/site-packages/bs4/builder/_lxml.py new file mode 100644 index 0000000..432a2c8 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/builder/_lxml.py @@ -0,0 +1,332 @@ +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +__all__ = [ + 'LXMLTreeBuilderForXML', + 'LXMLTreeBuilder', + ] + +try: + from collections.abc import Callable # Python 3.6 +except ImportError as e: + from collections import Callable + +from io import BytesIO +from io import StringIO +from lxml import etree +from bs4.element import ( + Comment, + Doctype, + NamespacedAttribute, + ProcessingInstruction, + XMLProcessingInstruction, +) +from bs4.builder import ( + FAST, + HTML, + HTMLTreeBuilder, + PERMISSIVE, + ParserRejectedMarkup, + TreeBuilder, + XML) +from bs4.dammit import EncodingDetector + +LXML = 'lxml' + +def _invert(d): + "Invert a dictionary." + return dict((v,k) for k, v in list(d.items())) + +class LXMLTreeBuilderForXML(TreeBuilder): + DEFAULT_PARSER_CLASS = etree.XMLParser + + is_xml = True + processing_instruction_class = XMLProcessingInstruction + + NAME = "lxml-xml" + ALTERNATE_NAMES = ["xml"] + + # Well, it's permissive by XML parser standards. + features = [NAME, LXML, XML, FAST, PERMISSIVE] + + CHUNK_SIZE = 512 + + # This namespace mapping is specified in the XML Namespace + # standard. + DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace') + + DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS) + + # NOTE: If we parsed Element objects and looked at .sourceline, + # we'd be able to see the line numbers from the original document. + # But instead we build an XMLParser or HTMLParser object to serve + # as the target of parse messages, and those messages don't include + # line numbers. + # See: https://bugs.launchpad.net/lxml/+bug/1846906 + + def initialize_soup(self, soup): + """Let the BeautifulSoup object know about the standard namespace + mapping. + + :param soup: A `BeautifulSoup`. + """ + super(LXMLTreeBuilderForXML, self).initialize_soup(soup) + self._register_namespaces(self.DEFAULT_NSMAPS) + + def _register_namespaces(self, mapping): + """Let the BeautifulSoup object know about namespaces encountered + while parsing the document. + + This might be useful later on when creating CSS selectors. + + :param mapping: A dictionary mapping namespace prefixes to URIs. + """ + for key, value in list(mapping.items()): + if key and key not in self.soup._namespaces: + # Let the BeautifulSoup object know about a new namespace. + # If there are multiple namespaces defined with the same + # prefix, the first one in the document takes precedence. + self.soup._namespaces[key] = value + + def default_parser(self, encoding): + """Find the default parser for the given encoding. + + :param encoding: A string. + :return: Either a parser object or a class, which + will be instantiated with default arguments. + """ + if self._default_parser is not None: + return self._default_parser + return etree.XMLParser( + target=self, strip_cdata=False, recover=True, encoding=encoding) + + def parser_for(self, encoding): + """Instantiate an appropriate parser for the given encoding. + + :param encoding: A string. + :return: A parser object such as an `etree.XMLParser`. + """ + # Use the default parser. + parser = self.default_parser(encoding) + + if isinstance(parser, Callable): + # Instantiate the parser with default arguments + parser = parser( + target=self, strip_cdata=False, recover=True, encoding=encoding + ) + return parser + + def __init__(self, parser=None, empty_element_tags=None, **kwargs): + # TODO: Issue a warning if parser is present but not a + # callable, since that means there's no way to create new + # parsers for different encodings. + self._default_parser = parser + if empty_element_tags is not None: + self.empty_element_tags = set(empty_element_tags) + self.soup = None + self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED] + super(LXMLTreeBuilderForXML, self).__init__(**kwargs) + + def _getNsTag(self, tag): + # Split the namespace URL out of a fully-qualified lxml tag + # name. Copied from lxml's src/lxml/sax.py. + if tag[0] == '{': + return tuple(tag[1:].split('}', 1)) + else: + return (None, tag) + + def prepare_markup(self, markup, user_specified_encoding=None, + exclude_encodings=None, + document_declared_encoding=None): + """Run any preliminary steps necessary to make incoming markup + acceptable to the parser. + + lxml really wants to get a bytestring and convert it to + Unicode itself. So instead of using UnicodeDammit to convert + the bytestring to Unicode using different encodings, this + implementation uses EncodingDetector to iterate over the + encodings, and tell lxml to try to parse the document as each + one in turn. + + :param markup: Some markup -- hopefully a bytestring. + :param user_specified_encoding: The user asked to try this encoding. + :param document_declared_encoding: The markup itself claims to be + in this encoding. + :param exclude_encodings: The user asked _not_ to try any of + these encodings. + + :yield: A series of 4-tuples: + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for converting the + document to Unicode and parsing it. Each strategy will be tried + in turn. + """ + is_html = not self.is_xml + if is_html: + self.processing_instruction_class = ProcessingInstruction + else: + self.processing_instruction_class = XMLProcessingInstruction + + if isinstance(markup, str): + # We were given Unicode. Maybe lxml can parse Unicode on + # this system? + yield markup, None, document_declared_encoding, False + + if isinstance(markup, str): + # No, apparently not. Convert the Unicode to UTF-8 and + # tell lxml to parse it as UTF-8. + yield (markup.encode("utf8"), "utf8", + document_declared_encoding, False) + + try_encodings = [user_specified_encoding, document_declared_encoding] + detector = EncodingDetector( + markup, try_encodings, is_html, exclude_encodings) + for encoding in detector.encodings: + yield (detector.markup, encoding, document_declared_encoding, False) + + def feed(self, markup): + if isinstance(markup, bytes): + markup = BytesIO(markup) + elif isinstance(markup, str): + markup = StringIO(markup) + + # Call feed() at least once, even if the markup is empty, + # or the parser won't be initialized. + data = markup.read(self.CHUNK_SIZE) + try: + self.parser = self.parser_for(self.soup.original_encoding) + self.parser.feed(data) + while len(data) != 0: + # Now call feed() on the rest of the data, chunk by chunk. + data = markup.read(self.CHUNK_SIZE) + if len(data) != 0: + self.parser.feed(data) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(e) + + def close(self): + self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED] + + def start(self, name, attrs, nsmap={}): + # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. + attrs = dict(attrs) + nsprefix = None + # Invert each namespace map as it comes in. + if len(nsmap) == 0 and len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. + self.nsmaps.append(None) + elif len(nsmap) > 0: + # A new namespace mapping has come into play. + + # First, Let the BeautifulSoup object know about it. + self._register_namespaces(nsmap) + + # Then, add it to our running list of inverted namespace + # mappings. + self.nsmaps.append(_invert(nsmap)) + + # Also treat the namespace mapping as a set of attributes on the + # tag, so we can recreate it later. + attrs = attrs.copy() + for prefix, namespace in list(nsmap.items()): + attribute = NamespacedAttribute( + "xmlns", prefix, "http://www.w3.org/2000/xmlns/") + attrs[attribute] = namespace + + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in list(attrs.items()): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + + namespace, name = self._getNsTag(name) + nsprefix = self._prefix_for_namespace(namespace) + self.soup.handle_starttag(name, namespace, nsprefix, attrs) + + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + return None + + def end(self, name): + self.soup.endData() + completed_tag = self.soup.tagStack[-1] + namespace, name = self._getNsTag(name) + nsprefix = None + if namespace is not None: + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + nsprefix = inverted_nsmap[namespace] + break + self.soup.handle_endtag(name, nsprefix) + if len(self.nsmaps) > 1: + # This tag, or one of its parents, introduced a namespace + # mapping, so pop it off the stack. + self.nsmaps.pop() + + def pi(self, target, data): + self.soup.endData() + self.soup.handle_data(target + ' ' + data) + self.soup.endData(self.processing_instruction_class) + + def data(self, content): + self.soup.handle_data(content) + + def doctype(self, name, pubid, system): + self.soup.endData() + doctype = Doctype.for_name_and_ids(name, pubid, system) + self.soup.object_was_parsed(doctype) + + def comment(self, content): + "Handle comments as Comment objects." + self.soup.endData() + self.soup.handle_data(content) + self.soup.endData(Comment) + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '\n%s' % fragment + + +class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): + + NAME = LXML + ALTERNATE_NAMES = ["lxml-html"] + + features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] + is_xml = False + processing_instruction_class = ProcessingInstruction + + def default_parser(self, encoding): + return etree.HTMLParser + + def feed(self, markup): + encoding = self.soup.original_encoding + try: + self.parser = self.parser_for(encoding) + self.parser.feed(markup) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(e) + + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '%s' % fragment diff --git a/venv/lib/python3.6/site-packages/bs4/dammit.py b/venv/lib/python3.6/site-packages/bs4/dammit.py new file mode 100644 index 0000000..ee3708f --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/dammit.py @@ -0,0 +1,939 @@ +# -*- coding: utf-8 -*- +"""Beautiful Soup bonus library: Unicode, Dammit + +This library converts a bytestream to Unicode through any means +necessary. It is heavily based on code from Mark Pilgrim's Universal +Feed Parser. It works best on XML and HTML, but it does not rewrite the +XML or HTML to reflect a new encoding; that's the tree builder's job. +""" +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +import codecs +from html.entities import codepoint2name +import re +import logging +import string + +# Import a library to autodetect character encodings. +chardet_type = None +try: + # First try the fast C implementation. + # PyPI package: cchardet + import cchardet + def chardet_dammit(s): + if isinstance(s, str): + return None + return cchardet.detect(s)['encoding'] +except ImportError: + try: + # Fall back to the pure Python implementation + # Debian package: python-chardet + # PyPI package: chardet + import chardet + def chardet_dammit(s): + if isinstance(s, str): + return None + return chardet.detect(s)['encoding'] + #import chardet.constants + #chardet.constants._debug = 1 + except ImportError: + # No chardet available. + def chardet_dammit(s): + return None + +# Available from http://cjkpython.i18n.org/. +# +# TODO: This doesn't work anymore and the closest thing, iconv_codecs, +# is GPL-licensed. Check whether this is still necessary. +try: + import iconv_codec +except ImportError: + pass + +# Build bytestring and Unicode versions of regular expressions for finding +# a declared encoding inside an XML or HTML document. +xml_encoding = '^\\s*<\\?.*encoding=[\'"](.*?)[\'"].*\\?>' +html_meta = '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]' +encoding_res = dict() +encoding_res[bytes] = { + 'html' : re.compile(html_meta.encode("ascii"), re.I), + 'xml' : re.compile(xml_encoding.encode("ascii"), re.I), +} +encoding_res[str] = { + 'html' : re.compile(html_meta, re.I), + 'xml' : re.compile(xml_encoding, re.I) +} + +class EntitySubstitution(object): + """The ability to substitute XML or HTML entities for certain characters.""" + + def _populate_class_variables(): + lookup = {} + reverse_lookup = {} + characters_for_re = [] + + # &apos is an XHTML entity and an HTML 5, but not an HTML 4 + # entity. We don't want to use it, but we want to recognize it on the way in. + # + # TODO: Ideally we would be able to recognize all HTML 5 named + # entities, but that's a little tricky. + extra = [(39, 'apos')] + for codepoint, name in list(codepoint2name.items()) + extra: + character = chr(codepoint) + if codepoint not in (34, 39): + # There's no point in turning the quotation mark into + # " or the single quote into ', unless it + # happens within an attribute value, which is handled + # elsewhere. + characters_for_re.append(character) + lookup[character] = name + # But we do want to recognize those entities on the way in and + # convert them to Unicode characters. + reverse_lookup[name] = character + re_definition = "[%s]" % "".join(characters_for_re) + return lookup, reverse_lookup, re.compile(re_definition) + (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER, + CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables() + + CHARACTER_TO_XML_ENTITY = { + "'": "apos", + '"': "quot", + "&": "amp", + "<": "lt", + ">": "gt", + } + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)" + ")") + + AMPERSAND_OR_BRACKET = re.compile("([<>&])") + + @classmethod + def _substitute_html_entity(cls, matchobj): + """Used with a regular expression to substitute the + appropriate HTML entity for a special character.""" + entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) + return "&%s;" % entity + + @classmethod + def _substitute_xml_entity(cls, matchobj): + """Used with a regular expression to substitute the + appropriate XML entity for a special character.""" + entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)] + return "&%s;" % entity + + @classmethod + def quoted_attribute_value(self, value): + """Make a value into a quoted XML attribute, possibly escaping it. + + Most strings will be quoted using double quotes. + + Bob's Bar -> "Bob's Bar" + + If a string contains double quotes, it will be quoted using + single quotes. + + Welcome to "my bar" -> 'Welcome to "my bar"' + + If a string contains both single and double quotes, the + double quotes will be escaped, and the string will be quoted + using double quotes. + + Welcome to "Bob's Bar" -> "Welcome to "Bob's bar" + """ + quote_with = '"' + if '"' in value: + if "'" in value: + # The string contains both single and double + # quotes. Turn the double quotes into + # entities. We quote the double quotes rather than + # the single quotes because the entity name is + # """ whether this is HTML or XML. If we + # quoted the single quotes, we'd have to decide + # between ' and &squot;. + replace_with = """ + value = value.replace('"', replace_with) + else: + # There are double quotes but no single quotes. + # We can use single quotes to quote the attribute. + quote_with = "'" + return quote_with + value + quote_with + + @classmethod + def substitute_xml(cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + + :param value: A string to be substituted. The less-than sign + will become <, the greater-than sign will become >, + and any ampersands will become &. If you want ampersands + that appear to be part of an entity definition to be left + alone, use substitute_xml_containing_entities() instead. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets and ampersands. + value = cls.AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_xml_containing_entities( + cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + + :param value: A string to be substituted. The less-than sign will + become <, the greater-than sign will become >, and any + ampersands that are not part of an entity defition will + become &. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets, and ampersands that aren't part of + # entities. + value = cls.BARE_AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_html(cls, s): + """Replace certain Unicode characters with named HTML entities. + + This differs from data.encode(encoding, 'xmlcharrefreplace') + in that the goal is to make the result more readable (to those + with ASCII displays) rather than to recover from + errors. There's absolutely nothing wrong with a UTF-8 string + containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that + character with "é" will make it more readable to some + people. + + :param s: A Unicode string. + """ + return cls.CHARACTER_TO_HTML_ENTITY_RE.sub( + cls._substitute_html_entity, s) + + +class EncodingDetector: + """Suggests a number of possible encodings for a bytestring. + + Order of precedence: + + 1. Encodings you specifically tell EncodingDetector to try first + (the override_encodings argument to the constructor). + + 2. An encoding declared within the bytestring itself, either in an + XML declaration (if the bytestring is to be interpreted as an XML + document), or in a tag (if the bytestring is to be + interpreted as an HTML document.) + + 3. An encoding detected through textual analysis by chardet, + cchardet, or a similar external library. + + 4. UTF-8. + + 5. Windows-1252. + """ + def __init__(self, markup, override_encodings=None, is_html=False, + exclude_encodings=None): + """Constructor. + + :param markup: Some markup in an unknown encoding. + :param override_encodings: These encodings will be tried first. + :param is_html: If True, this markup is considered to be HTML. Otherwise + it's assumed to be XML. + :param exclude_encodings: These encodings will not be tried, even + if they otherwise would be. + """ + self.override_encodings = override_encodings or [] + exclude_encodings = exclude_encodings or [] + self.exclude_encodings = set([x.lower() for x in exclude_encodings]) + self.chardet_encoding = None + self.is_html = is_html + self.declared_encoding = None + + # First order of business: strip a byte-order mark. + self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) + + def _usable(self, encoding, tried): + """Should we even bother to try this encoding? + + :param encoding: Name of an encoding. + :param tried: Encodings that have already been tried. This will be modified + as a side effect. + """ + if encoding is not None: + encoding = encoding.lower() + if encoding in self.exclude_encodings: + return False + if encoding not in tried: + tried.add(encoding) + return True + return False + + @property + def encodings(self): + """Yield a number of encodings that might work for this markup. + + :yield: A sequence of strings. + """ + tried = set() + for e in self.override_encodings: + if self._usable(e, tried): + yield e + + # Did the document originally start with a byte-order mark + # that indicated its encoding? + if self._usable(self.sniffed_encoding, tried): + yield self.sniffed_encoding + + # Look within the document for an XML or HTML encoding + # declaration. + if self.declared_encoding is None: + self.declared_encoding = self.find_declared_encoding( + self.markup, self.is_html) + if self._usable(self.declared_encoding, tried): + yield self.declared_encoding + + # Use third-party character set detection to guess at the + # encoding. + if self.chardet_encoding is None: + self.chardet_encoding = chardet_dammit(self.markup) + if self._usable(self.chardet_encoding, tried): + yield self.chardet_encoding + + # As a last-ditch effort, try utf-8 and windows-1252. + for e in ('utf-8', 'windows-1252'): + if self._usable(e, tried): + yield e + + @classmethod + def strip_byte_order_mark(cls, data): + """If a byte-order mark is present, strip it and return the encoding it implies. + + :param data: Some markup. + :return: A 2-tuple (modified data, implied encoding) + """ + encoding = None + if isinstance(data, str): + # Unicode data cannot have a byte-order mark. + return data, encoding + if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == b'\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == b'\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == b'\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + return data, encoding + + @classmethod + def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False): + """Given a document, tries to find its declared encoding. + + An XML encoding is declared at the beginning of the document. + + An HTML encoding is declared in a tag, hopefully near the + beginning of the document. + + :param markup: Some markup. + :param is_html: If True, this markup is considered to be HTML. Otherwise + it's assumed to be XML. + :param search_entire_document: Since an encoding is supposed to declared near the beginning + of the document, most of the time it's only necessary to search a few kilobytes of data. + Set this to True to force this method to search the entire document. + """ + if search_entire_document: + xml_endpos = html_endpos = len(markup) + else: + xml_endpos = 1024 + html_endpos = max(2048, int(len(markup) * 0.05)) + + if isinstance(markup, bytes): + res = encoding_res[bytes] + else: + res = encoding_res[str] + + xml_re = res['xml'] + html_re = res['html'] + declared_encoding = None + declared_encoding_match = xml_re.search(markup, endpos=xml_endpos) + if not declared_encoding_match and is_html: + declared_encoding_match = html_re.search(markup, endpos=html_endpos) + if declared_encoding_match is not None: + declared_encoding = declared_encoding_match.groups()[0] + if declared_encoding: + if isinstance(declared_encoding, bytes): + declared_encoding = declared_encoding.decode('ascii', 'replace') + return declared_encoding.lower() + return None + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = {"macintosh": "mac-roman", + "x-sjis": "shift-jis"} + + ENCODINGS_WITH_SMART_QUOTES = [ + "windows-1252", + "iso-8859-1", + "iso-8859-2", + ] + + def __init__(self, markup, override_encodings=[], + smart_quotes_to=None, is_html=False, exclude_encodings=[]): + """Constructor. + + :param markup: A bytestring representing markup in an unknown encoding. + :param override_encodings: These encodings will be tried first, + before any sniffing code is run. + + :param smart_quotes_to: By default, Microsoft smart quotes will, like all other characters, be converted + to Unicode characters. Setting this to 'ascii' will convert them to ASCII quotes instead. + Setting it to 'xml' will convert them to XML entity references, and setting it to 'html' + will convert them to HTML entity references. + :param is_html: If True, this markup is considered to be HTML. Otherwise + it's assumed to be XML. + :param exclude_encodings: These encodings will not be considered, even + if the sniffing code thinks they might make sense. + """ + self.smart_quotes_to = smart_quotes_to + self.tried_encodings = [] + self.contains_replacement_characters = False + self.is_html = is_html + self.log = logging.getLogger(__name__) + self.detector = EncodingDetector( + markup, override_encodings, is_html, exclude_encodings) + + # Short-circuit if the data is in Unicode to begin with. + if isinstance(markup, str) or markup == '': + self.markup = markup + self.unicode_markup = str(markup) + self.original_encoding = None + return + + # The encoding detector may have stripped a byte-order mark. + # Use the stripped markup from this point on. + self.markup = self.detector.markup + + u = None + for encoding in self.detector.encodings: + markup = self.detector.markup + u = self._convert_from(encoding) + if u is not None: + break + + if not u: + # None of the encodings worked. As an absolute last resort, + # try them again with character replacement. + + for encoding in self.detector.encodings: + if encoding != "ascii": + u = self._convert_from(encoding, "replace") + if u is not None: + self.log.warning( + "Some characters could not be decoded, and were " + "replaced with REPLACEMENT CHARACTER." + ) + self.contains_replacement_characters = True + break + + # If none of that worked, we could at this point force it to + # ASCII, but that would destroy so much data that I think + # giving up is better. + self.unicode_markup = u + if not u: + self.original_encoding = None + + def _sub_ms_char(self, match): + """Changes a MS smart quote character to an XML or HTML + entity, or an ASCII character.""" + orig = match.group(1) + if self.smart_quotes_to == 'ascii': + sub = self.MS_CHARS_TO_ASCII.get(orig).encode() + else: + sub = self.MS_CHARS.get(orig) + if type(sub) == tuple: + if self.smart_quotes_to == 'xml': + sub = '&#x'.encode() + sub[1].encode() + ';'.encode() + else: + sub = '&'.encode() + sub[0].encode() + ';'.encode() + else: + sub = sub.encode() + return sub + + def _convert_from(self, proposed, errors="strict"): + """Attempt to convert the markup to the proposed encoding. + + :param proposed: The name of a character encoding. + """ + proposed = self.find_codec(proposed) + if not proposed or (proposed, errors) in self.tried_encodings: + return None + self.tried_encodings.append((proposed, errors)) + markup = self.markup + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if (self.smart_quotes_to is not None + and proposed in self.ENCODINGS_WITH_SMART_QUOTES): + smart_quotes_re = b"([\x80-\x9f])" + smart_quotes_compiled = re.compile(smart_quotes_re) + markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) + + try: + #print("Trying to convert document to %s (errors=%s)" % ( + # proposed, errors)) + u = self._to_unicode(markup, proposed, errors) + self.markup = u + self.original_encoding = proposed + except Exception as e: + #print("That didn't work!") + #print(e) + return None + #print("Correct encoding: %s" % proposed) + return self.markup + + def _to_unicode(self, data, encoding, errors="strict"): + """Given a string and its encoding, decodes the string into Unicode. + + :param encoding: The name of an encoding. + """ + return str(data, encoding, errors) + + @property + def declared_html_encoding(self): + """If the markup is an HTML document, returns the encoding declared _within_ + the document. + """ + if not self.is_html: + return None + return self.detector.declared_encoding + + def find_codec(self, charset): + """Convert the name of a character set to a codec name. + + :param charset: The name of a character set. + :return: The name of a codec. + """ + value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) + or (charset and self._codec(charset.replace("-", ""))) + or (charset and self._codec(charset.replace("-", "_"))) + or (charset and charset.lower()) + or charset + ) + if value: + return value.lower() + return None + + def _codec(self, charset): + if not charset: + return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + + # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities. + MS_CHARS = {b'\x80': ('euro', '20AC'), + b'\x81': ' ', + b'\x82': ('sbquo', '201A'), + b'\x83': ('fnof', '192'), + b'\x84': ('bdquo', '201E'), + b'\x85': ('hellip', '2026'), + b'\x86': ('dagger', '2020'), + b'\x87': ('Dagger', '2021'), + b'\x88': ('circ', '2C6'), + b'\x89': ('permil', '2030'), + b'\x8A': ('Scaron', '160'), + b'\x8B': ('lsaquo', '2039'), + b'\x8C': ('OElig', '152'), + b'\x8D': '?', + b'\x8E': ('#x17D', '17D'), + b'\x8F': '?', + b'\x90': '?', + b'\x91': ('lsquo', '2018'), + b'\x92': ('rsquo', '2019'), + b'\x93': ('ldquo', '201C'), + b'\x94': ('rdquo', '201D'), + b'\x95': ('bull', '2022'), + b'\x96': ('ndash', '2013'), + b'\x97': ('mdash', '2014'), + b'\x98': ('tilde', '2DC'), + b'\x99': ('trade', '2122'), + b'\x9a': ('scaron', '161'), + b'\x9b': ('rsaquo', '203A'), + b'\x9c': ('oelig', '153'), + b'\x9d': '?', + b'\x9e': ('#x17E', '17E'), + b'\x9f': ('Yuml', ''),} + + # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains + # horrors like stripping diacritical marks to turn á into a, but also + # contains non-horrors like turning “ into ". + MS_CHARS_TO_ASCII = { + b'\x80' : 'EUR', + b'\x81' : ' ', + b'\x82' : ',', + b'\x83' : 'f', + b'\x84' : ',,', + b'\x85' : '...', + b'\x86' : '+', + b'\x87' : '++', + b'\x88' : '^', + b'\x89' : '%', + b'\x8a' : 'S', + b'\x8b' : '<', + b'\x8c' : 'OE', + b'\x8d' : '?', + b'\x8e' : 'Z', + b'\x8f' : '?', + b'\x90' : '?', + b'\x91' : "'", + b'\x92' : "'", + b'\x93' : '"', + b'\x94' : '"', + b'\x95' : '*', + b'\x96' : '-', + b'\x97' : '--', + b'\x98' : '~', + b'\x99' : '(TM)', + b'\x9a' : 's', + b'\x9b' : '>', + b'\x9c' : 'oe', + b'\x9d' : '?', + b'\x9e' : 'z', + b'\x9f' : 'Y', + b'\xa0' : ' ', + b'\xa1' : '!', + b'\xa2' : 'c', + b'\xa3' : 'GBP', + b'\xa4' : '$', #This approximation is especially parochial--this is the + #generic currency symbol. + b'\xa5' : 'YEN', + b'\xa6' : '|', + b'\xa7' : 'S', + b'\xa8' : '..', + b'\xa9' : '', + b'\xaa' : '(th)', + b'\xab' : '<<', + b'\xac' : '!', + b'\xad' : ' ', + b'\xae' : '(R)', + b'\xaf' : '-', + b'\xb0' : 'o', + b'\xb1' : '+-', + b'\xb2' : '2', + b'\xb3' : '3', + b'\xb4' : ("'", 'acute'), + b'\xb5' : 'u', + b'\xb6' : 'P', + b'\xb7' : '*', + b'\xb8' : ',', + b'\xb9' : '1', + b'\xba' : '(th)', + b'\xbb' : '>>', + b'\xbc' : '1/4', + b'\xbd' : '1/2', + b'\xbe' : '3/4', + b'\xbf' : '?', + b'\xc0' : 'A', + b'\xc1' : 'A', + b'\xc2' : 'A', + b'\xc3' : 'A', + b'\xc4' : 'A', + b'\xc5' : 'A', + b'\xc6' : 'AE', + b'\xc7' : 'C', + b'\xc8' : 'E', + b'\xc9' : 'E', + b'\xca' : 'E', + b'\xcb' : 'E', + b'\xcc' : 'I', + b'\xcd' : 'I', + b'\xce' : 'I', + b'\xcf' : 'I', + b'\xd0' : 'D', + b'\xd1' : 'N', + b'\xd2' : 'O', + b'\xd3' : 'O', + b'\xd4' : 'O', + b'\xd5' : 'O', + b'\xd6' : 'O', + b'\xd7' : '*', + b'\xd8' : 'O', + b'\xd9' : 'U', + b'\xda' : 'U', + b'\xdb' : 'U', + b'\xdc' : 'U', + b'\xdd' : 'Y', + b'\xde' : 'b', + b'\xdf' : 'B', + b'\xe0' : 'a', + b'\xe1' : 'a', + b'\xe2' : 'a', + b'\xe3' : 'a', + b'\xe4' : 'a', + b'\xe5' : 'a', + b'\xe6' : 'ae', + b'\xe7' : 'c', + b'\xe8' : 'e', + b'\xe9' : 'e', + b'\xea' : 'e', + b'\xeb' : 'e', + b'\xec' : 'i', + b'\xed' : 'i', + b'\xee' : 'i', + b'\xef' : 'i', + b'\xf0' : 'o', + b'\xf1' : 'n', + b'\xf2' : 'o', + b'\xf3' : 'o', + b'\xf4' : 'o', + b'\xf5' : 'o', + b'\xf6' : 'o', + b'\xf7' : '/', + b'\xf8' : 'o', + b'\xf9' : 'u', + b'\xfa' : 'u', + b'\xfb' : 'u', + b'\xfc' : 'u', + b'\xfd' : 'y', + b'\xfe' : 'b', + b'\xff' : 'y', + } + + # A map used when removing rogue Windows-1252/ISO-8859-1 + # characters in otherwise UTF-8 documents. + # + # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in + # Windows-1252. + WINDOWS_1252_TO_UTF8 = { + 0x80 : b'\xe2\x82\xac', # € + 0x82 : b'\xe2\x80\x9a', # ‚ + 0x83 : b'\xc6\x92', # ƒ + 0x84 : b'\xe2\x80\x9e', # „ + 0x85 : b'\xe2\x80\xa6', # … + 0x86 : b'\xe2\x80\xa0', # † + 0x87 : b'\xe2\x80\xa1', # ‡ + 0x88 : b'\xcb\x86', # ˆ + 0x89 : b'\xe2\x80\xb0', # ‰ + 0x8a : b'\xc5\xa0', # Š + 0x8b : b'\xe2\x80\xb9', # ‹ + 0x8c : b'\xc5\x92', # Œ + 0x8e : b'\xc5\xbd', # Ž + 0x91 : b'\xe2\x80\x98', # ‘ + 0x92 : b'\xe2\x80\x99', # ’ + 0x93 : b'\xe2\x80\x9c', # “ + 0x94 : b'\xe2\x80\x9d', # ” + 0x95 : b'\xe2\x80\xa2', # • + 0x96 : b'\xe2\x80\x93', # – + 0x97 : b'\xe2\x80\x94', # — + 0x98 : b'\xcb\x9c', # ˜ + 0x99 : b'\xe2\x84\xa2', # ™ + 0x9a : b'\xc5\xa1', # š + 0x9b : b'\xe2\x80\xba', # › + 0x9c : b'\xc5\x93', # œ + 0x9e : b'\xc5\xbe', # ž + 0x9f : b'\xc5\xb8', # Ÿ + 0xa0 : b'\xc2\xa0', #   + 0xa1 : b'\xc2\xa1', # ¡ + 0xa2 : b'\xc2\xa2', # ¢ + 0xa3 : b'\xc2\xa3', # £ + 0xa4 : b'\xc2\xa4', # ¤ + 0xa5 : b'\xc2\xa5', # ¥ + 0xa6 : b'\xc2\xa6', # ¦ + 0xa7 : b'\xc2\xa7', # § + 0xa8 : b'\xc2\xa8', # ¨ + 0xa9 : b'\xc2\xa9', # © + 0xaa : b'\xc2\xaa', # ª + 0xab : b'\xc2\xab', # « + 0xac : b'\xc2\xac', # ¬ + 0xad : b'\xc2\xad', # ­ + 0xae : b'\xc2\xae', # ® + 0xaf : b'\xc2\xaf', # ¯ + 0xb0 : b'\xc2\xb0', # ° + 0xb1 : b'\xc2\xb1', # ± + 0xb2 : b'\xc2\xb2', # ² + 0xb3 : b'\xc2\xb3', # ³ + 0xb4 : b'\xc2\xb4', # ´ + 0xb5 : b'\xc2\xb5', # µ + 0xb6 : b'\xc2\xb6', # ¶ + 0xb7 : b'\xc2\xb7', # · + 0xb8 : b'\xc2\xb8', # ¸ + 0xb9 : b'\xc2\xb9', # ¹ + 0xba : b'\xc2\xba', # º + 0xbb : b'\xc2\xbb', # » + 0xbc : b'\xc2\xbc', # ¼ + 0xbd : b'\xc2\xbd', # ½ + 0xbe : b'\xc2\xbe', # ¾ + 0xbf : b'\xc2\xbf', # ¿ + 0xc0 : b'\xc3\x80', # À + 0xc1 : b'\xc3\x81', # Á + 0xc2 : b'\xc3\x82', #  + 0xc3 : b'\xc3\x83', # à + 0xc4 : b'\xc3\x84', # Ä + 0xc5 : b'\xc3\x85', # Å + 0xc6 : b'\xc3\x86', # Æ + 0xc7 : b'\xc3\x87', # Ç + 0xc8 : b'\xc3\x88', # È + 0xc9 : b'\xc3\x89', # É + 0xca : b'\xc3\x8a', # Ê + 0xcb : b'\xc3\x8b', # Ë + 0xcc : b'\xc3\x8c', # Ì + 0xcd : b'\xc3\x8d', # Í + 0xce : b'\xc3\x8e', # Î + 0xcf : b'\xc3\x8f', # Ï + 0xd0 : b'\xc3\x90', # Ð + 0xd1 : b'\xc3\x91', # Ñ + 0xd2 : b'\xc3\x92', # Ò + 0xd3 : b'\xc3\x93', # Ó + 0xd4 : b'\xc3\x94', # Ô + 0xd5 : b'\xc3\x95', # Õ + 0xd6 : b'\xc3\x96', # Ö + 0xd7 : b'\xc3\x97', # × + 0xd8 : b'\xc3\x98', # Ø + 0xd9 : b'\xc3\x99', # Ù + 0xda : b'\xc3\x9a', # Ú + 0xdb : b'\xc3\x9b', # Û + 0xdc : b'\xc3\x9c', # Ü + 0xdd : b'\xc3\x9d', # Ý + 0xde : b'\xc3\x9e', # Þ + 0xdf : b'\xc3\x9f', # ß + 0xe0 : b'\xc3\xa0', # à + 0xe1 : b'\xa1', # á + 0xe2 : b'\xc3\xa2', # â + 0xe3 : b'\xc3\xa3', # ã + 0xe4 : b'\xc3\xa4', # ä + 0xe5 : b'\xc3\xa5', # å + 0xe6 : b'\xc3\xa6', # æ + 0xe7 : b'\xc3\xa7', # ç + 0xe8 : b'\xc3\xa8', # è + 0xe9 : b'\xc3\xa9', # é + 0xea : b'\xc3\xaa', # ê + 0xeb : b'\xc3\xab', # ë + 0xec : b'\xc3\xac', # ì + 0xed : b'\xc3\xad', # í + 0xee : b'\xc3\xae', # î + 0xef : b'\xc3\xaf', # ï + 0xf0 : b'\xc3\xb0', # ð + 0xf1 : b'\xc3\xb1', # ñ + 0xf2 : b'\xc3\xb2', # ò + 0xf3 : b'\xc3\xb3', # ó + 0xf4 : b'\xc3\xb4', # ô + 0xf5 : b'\xc3\xb5', # õ + 0xf6 : b'\xc3\xb6', # ö + 0xf7 : b'\xc3\xb7', # ÷ + 0xf8 : b'\xc3\xb8', # ø + 0xf9 : b'\xc3\xb9', # ù + 0xfa : b'\xc3\xba', # ú + 0xfb : b'\xc3\xbb', # û + 0xfc : b'\xc3\xbc', # ü + 0xfd : b'\xc3\xbd', # ý + 0xfe : b'\xc3\xbe', # þ + } + + MULTIBYTE_MARKERS_AND_SIZES = [ + (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF + (0xe0, 0xef, 3), # 3-byte characters start with E0-EF + (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4 + ] + + FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0] + LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1] + + @classmethod + def detwingle(cls, in_bytes, main_encoding="utf8", + embedded_encoding="windows-1252"): + """Fix characters from one encoding embedded in some other encoding. + + Currently the only situation supported is Windows-1252 (or its + subset ISO-8859-1), embedded in UTF-8. + + :param in_bytes: A bytestring that you suspect contains + characters from multiple encodings. Note that this _must_ + be a bytestring. If you've already converted the document + to Unicode, you're too late. + :param main_encoding: The primary encoding of `in_bytes`. + :param embedded_encoding: The encoding that was used to embed characters + in the main document. + :return: A bytestring in which `embedded_encoding` + characters have been converted to their `main_encoding` + equivalents. + """ + if embedded_encoding.replace('_', '-').lower() not in ( + 'windows-1252', 'windows_1252'): + raise NotImplementedError( + "Windows-1252 and ISO-8859-1 are the only currently supported " + "embedded encodings.") + + if main_encoding.lower() not in ('utf8', 'utf-8'): + raise NotImplementedError( + "UTF-8 is the only currently supported main encoding.") + + byte_chunks = [] + + chunk_start = 0 + pos = 0 + while pos < len(in_bytes): + byte = in_bytes[pos] + if not isinstance(byte, int): + # Python 2.x + byte = ord(byte) + if (byte >= cls.FIRST_MULTIBYTE_MARKER + and byte <= cls.LAST_MULTIBYTE_MARKER): + # This is the start of a UTF-8 multibyte character. Skip + # to the end. + for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES: + if byte >= start and byte <= end: + pos += size + break + elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8: + # We found a Windows-1252 character! + # Save the string up to this point as a chunk. + byte_chunks.append(in_bytes[chunk_start:pos]) + + # Now translate the Windows-1252 character into UTF-8 + # and add it as another, one-byte chunk. + byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte]) + pos += 1 + chunk_start = pos + else: + # Go on to the next character. + pos += 1 + if chunk_start == 0: + # The string is unchanged. + return in_bytes + else: + # Store the final chunk. + byte_chunks.append(in_bytes[chunk_start:]) + return b''.join(byte_chunks) + diff --git a/venv/lib/python3.6/site-packages/bs4/diagnose.py b/venv/lib/python3.6/site-packages/bs4/diagnose.py new file mode 100644 index 0000000..500e92d --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/diagnose.py @@ -0,0 +1,242 @@ +"""Diagnostic functions, mainly for use when doing tech support.""" + +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +import cProfile +from io import StringIO +from html.parser import HTMLParser +import bs4 +from bs4 import BeautifulSoup, __version__ +from bs4.builder import builder_registry + +import os +import pstats +import random +import tempfile +import time +import traceback +import sys +import cProfile + +def diagnose(data): + """Diagnostic suite for isolating common problems. + + :param data: A string containing markup that needs to be explained. + :return: None; diagnostics are printed to standard output. + """ + print(("Diagnostic running on Beautiful Soup %s" % __version__)) + print(("Python version %s" % sys.version)) + + basic_parsers = ["html.parser", "html5lib", "lxml"] + for name in basic_parsers: + for builder in builder_registry.builders: + if name in builder.features: + break + else: + basic_parsers.remove(name) + print(( + "I noticed that %s is not installed. Installing it may help." % + name)) + + if 'lxml' in basic_parsers: + basic_parsers.append("lxml-xml") + try: + from lxml import etree + print(("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))) + except ImportError as e: + print( + "lxml is not installed or couldn't be imported.") + + + if 'html5lib' in basic_parsers: + try: + import html5lib + print(("Found html5lib version %s" % html5lib.__version__)) + except ImportError as e: + print( + "html5lib is not installed or couldn't be imported.") + + if hasattr(data, 'read'): + data = data.read() + elif data.startswith("http:") or data.startswith("https:"): + print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)) + print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") + return + else: + try: + if os.path.exists(data): + print(('"%s" looks like a filename. Reading data from the file.' % data)) + with open(data) as fp: + data = fp.read() + except ValueError: + # This can happen on some platforms when the 'filename' is + # too long. Assume it's data and not a filename. + pass + print("") + + for parser in basic_parsers: + print(("Trying to parse your markup with %s" % parser)) + success = False + try: + soup = BeautifulSoup(data, features=parser) + success = True + except Exception as e: + print(("%s could not parse the markup." % parser)) + traceback.print_exc() + if success: + print(("Here's what %s did with the markup:" % parser)) + print((soup.prettify())) + + print(("-" * 80)) + +def lxml_trace(data, html=True, **kwargs): + """Print out the lxml events that occur during parsing. + + This lets you see how lxml parses a document when no Beautiful + Soup code is running. You can use this to determine whether + an lxml-specific problem is in Beautiful Soup's lxml tree builders + or in lxml itself. + + :param data: Some markup. + :param html: If True, markup will be parsed with lxml's HTML parser. + if False, lxml's XML parser will be used. + """ + from lxml import etree + for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): + print(("%s, %4s, %s" % (event, element.tag, element.text))) + +class AnnouncingParser(HTMLParser): + """Subclass of HTMLParser that announces parse events, without doing + anything else. + + You can use this to get a picture of how html.parser sees a given + document. The easiest way to do this is to call `htmlparser_trace`. + """ + + def _p(self, s): + print(s) + + def handle_starttag(self, name, attrs): + self._p("%s START" % name) + + def handle_endtag(self, name): + self._p("%s END" % name) + + def handle_data(self, data): + self._p("%s DATA" % data) + + def handle_charref(self, name): + self._p("%s CHARREF" % name) + + def handle_entityref(self, name): + self._p("%s ENTITYREF" % name) + + def handle_comment(self, data): + self._p("%s COMMENT" % data) + + def handle_decl(self, data): + self._p("%s DECL" % data) + + def unknown_decl(self, data): + self._p("%s UNKNOWN-DECL" % data) + + def handle_pi(self, data): + self._p("%s PI" % data) + +def htmlparser_trace(data): + """Print out the HTMLParser events that occur during parsing. + + This lets you see how HTMLParser parses a document when no + Beautiful Soup code is running. + + :param data: Some markup. + """ + parser = AnnouncingParser() + parser.feed(data) + +_vowels = "aeiou" +_consonants = "bcdfghjklmnpqrstvwxyz" + +def rword(length=5): + "Generate a random word-like string." + s = '' + for i in range(length): + if i % 2 == 0: + t = _consonants + else: + t = _vowels + s += random.choice(t) + return s + +def rsentence(length=4): + "Generate a random sentence-like string." + return " ".join(rword(random.randint(4,9)) for i in range(length)) + +def rdoc(num_elements=1000): + """Randomly generate an invalid HTML document.""" + tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] + elements = [] + for i in range(num_elements): + choice = random.randint(0,3) + if choice == 0: + # New tag. + tag_name = random.choice(tag_names) + elements.append("<%s>" % tag_name) + elif choice == 1: + elements.append(rsentence(random.randint(1,4))) + elif choice == 2: + # Close a tag. + tag_name = random.choice(tag_names) + elements.append("" % tag_name) + return "" + "\n".join(elements) + "" + +def benchmark_parsers(num_elements=100000): + """Very basic head-to-head performance benchmark.""" + print(("Comparative parser benchmark on Beautiful Soup %s" % __version__)) + data = rdoc(num_elements) + print(("Generated a large invalid HTML document (%d bytes)." % len(data))) + + for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: + success = False + try: + a = time.time() + soup = BeautifulSoup(data, parser) + b = time.time() + success = True + except Exception as e: + print(("%s could not parse the markup." % parser)) + traceback.print_exc() + if success: + print(("BS4+%s parsed the markup in %.2fs." % (parser, b-a))) + + from lxml import etree + a = time.time() + etree.HTML(data) + b = time.time() + print(("Raw lxml parsed the markup in %.2fs." % (b-a))) + + import html5lib + parser = html5lib.HTMLParser() + a = time.time() + parser.parse(data) + b = time.time() + print(("Raw html5lib parsed the markup in %.2fs." % (b-a))) + +def profile(num_elements=100000, parser="lxml"): + """Use Python's profiler on a randomly generated document.""" + filehandle = tempfile.NamedTemporaryFile() + filename = filehandle.name + + data = rdoc(num_elements) + vars = dict(bs4=bs4, data=data, parser=parser) + cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) + + stats = pstats.Stats(filename) + # stats.strip_dirs() + stats.sort_stats("cumulative") + stats.print_stats('_html5lib|bs4', 50) + +# If this file is run as a script, standard input is diagnosed. +if __name__ == '__main__': + diagnose(sys.stdin.read()) diff --git a/venv/lib/python3.6/site-packages/bs4/element.py b/venv/lib/python3.6/site-packages/bs4/element.py new file mode 100644 index 0000000..81d9db9 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/element.py @@ -0,0 +1,2175 @@ +# Use of this source code is governed by the MIT license. +__license__ = "MIT" + +try: + from collections.abc import Callable # Python 3.6 +except ImportError as e: + from collections import Callable +import re +import sys +import warnings +try: + import soupsieve +except ImportError as e: + soupsieve = None + warnings.warn( + 'The soupsieve package is not installed. CSS selectors cannot be used.' + ) + +from bs4.formatter import ( + Formatter, + HTMLFormatter, + XMLFormatter, +) + +DEFAULT_OUTPUT_ENCODING = "utf-8" +PY3K = (sys.version_info[0] > 2) + +nonwhitespace_re = re.compile(r"\S+") + +# NOTE: This isn't used as of 4.7.0. I'm leaving it for a little bit on +# the off chance someone imported it for their own use. +whitespace_re = re.compile(r"\s+") + +def _alias(attr): + """Alias one attribute name to another for backward compatibility""" + @property + def alias(self): + return getattr(self, attr) + + @alias.setter + def alias(self): + return setattr(self, attr) + return alias + + +# These encodings are recognized by Python (so PageElement.encode +# could theoretically support them) but XML and HTML don't recognize +# them (so they should not show up in an XML or HTML document as that +# document's encoding). +# +# If an XML document is encoded in one of these encodings, no encoding +# will be mentioned in the XML declaration. If an HTML document is +# encoded in one of these encodings, and the HTML document has a +# tag that mentions an encoding, the encoding will be given as +# the empty string. +# +# Source: +# https://docs.python.org/3/library/codecs.html#python-specific-encodings +PYTHON_SPECIFIC_ENCODINGS = set([ + "idna", + "mbcs", + "oem", + "palmos", + "punycode", + "raw_unicode_escape", + "undefined", + "unicode_escape", + "raw-unicode-escape", + "unicode-escape", + "string-escape", + "string_escape", +]) + + +class NamespacedAttribute(str): + """A namespaced string (e.g. 'xml:lang') that remembers the namespace + ('xml') and the name ('lang') that were used to create it. + """ + + def __new__(cls, prefix, name=None, namespace=None): + if not name: + # This is the default namespace. Its name "has no value" + # per https://www.w3.org/TR/xml-names/#defaulting + name = None + + if name is None: + obj = str.__new__(cls, prefix) + elif prefix is None: + # Not really namespaced. + obj = str.__new__(cls, name) + else: + obj = str.__new__(cls, prefix + ":" + name) + obj.prefix = prefix + obj.name = name + obj.namespace = namespace + return obj + +class AttributeValueWithCharsetSubstitution(str): + """A stand-in object for a character encoding specified in HTML.""" + +class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): + """A generic stand-in for the value of a meta tag's 'charset' attribute. + + When Beautiful Soup parses the markup '', the + value of the 'charset' attribute will be one of these objects. + """ + + def __new__(cls, original_value): + obj = str.__new__(cls, original_value) + obj.original_value = original_value + return obj + + def encode(self, encoding): + """When an HTML document is being encoded to a given encoding, the + value of a meta tag's 'charset' is the name of the encoding. + """ + if encoding in PYTHON_SPECIFIC_ENCODINGS: + return '' + return encoding + + +class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): + """A generic stand-in for the value of a meta tag's 'content' attribute. + + When Beautiful Soup parses the markup: + + + The value of the 'content' attribute will be one of these objects. + """ + + CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M) + + def __new__(cls, original_value): + match = cls.CHARSET_RE.search(original_value) + if match is None: + # No substitution necessary. + return str.__new__(str, original_value) + + obj = str.__new__(cls, original_value) + obj.original_value = original_value + return obj + + def encode(self, encoding): + if encoding in PYTHON_SPECIFIC_ENCODINGS: + return '' + def rewrite(match): + return match.group(1) + encoding + return self.CHARSET_RE.sub(rewrite, self.original_value) + + +class PageElement(object): + """Contains the navigational information for some part of the page: + that is, its current location in the parse tree. + + NavigableString, Tag, etc. are all subclasses of PageElement. + """ + + def setup(self, parent=None, previous_element=None, next_element=None, + previous_sibling=None, next_sibling=None): + """Sets up the initial relations between this element and + other elements. + + :param parent: The parent of this element. + + :param previous_element: The element parsed immediately before + this one. + + :param next_element: The element parsed immediately before + this one. + + :param previous_sibling: The most recently encountered element + on the same level of the parse tree as this one. + + :param previous_sibling: The next element to be encountered + on the same level of the parse tree as this one. + """ + self.parent = parent + + self.previous_element = previous_element + if previous_element is not None: + self.previous_element.next_element = self + + self.next_element = next_element + if self.next_element is not None: + self.next_element.previous_element = self + + self.next_sibling = next_sibling + if self.next_sibling is not None: + self.next_sibling.previous_sibling = self + + if (previous_sibling is None + and self.parent is not None and self.parent.contents): + previous_sibling = self.parent.contents[-1] + + self.previous_sibling = previous_sibling + if previous_sibling is not None: + self.previous_sibling.next_sibling = self + + def format_string(self, s, formatter): + """Format the given string using the given formatter. + + :param s: A string. + :param formatter: A Formatter object, or a string naming one of the standard formatters. + """ + if formatter is None: + return s + if not isinstance(formatter, Formatter): + formatter = self.formatter_for_name(formatter) + output = formatter.substitute(s) + return output + + def formatter_for_name(self, formatter): + """Look up or create a Formatter for the given identifier, + if necessary. + + :param formatter: Can be a Formatter object (used as-is), a + function (used as the entity substitution hook for an + XMLFormatter or HTMLFormatter), or a string (used to look + up an XMLFormatter or HTMLFormatter in the appropriate + registry. + """ + if isinstance(formatter, Formatter): + return formatter + if self._is_xml: + c = XMLFormatter + else: + c = HTMLFormatter + if isinstance(formatter, Callable): + return c(entity_substitution=formatter) + return c.REGISTRY[formatter] + + @property + def _is_xml(self): + """Is this element part of an XML tree or an HTML tree? + + This is used in formatter_for_name, when deciding whether an + XMLFormatter or HTMLFormatter is more appropriate. It can be + inefficient, but it should be called very rarely. + """ + if self.known_xml is not None: + # Most of the time we will have determined this when the + # document is parsed. + return self.known_xml + + # Otherwise, it's likely that this element was created by + # direct invocation of the constructor from within the user's + # Python code. + if self.parent is None: + # This is the top-level object. It should have .known_xml set + # from tree creation. If not, take a guess--BS is usually + # used on HTML markup. + return getattr(self, 'is_xml', False) + return self.parent._is_xml + + nextSibling = _alias("next_sibling") # BS3 + previousSibling = _alias("previous_sibling") # BS3 + + def replace_with(self, replace_with): + """Replace this PageElement with another one, keeping the rest of the + tree the same. + + :param replace_with: A PageElement. + :return: `self`, no longer part of the tree. + """ + if self.parent is None: + raise ValueError( + "Cannot replace one element with another when the " + "element to be replaced is not part of a tree.") + if replace_with is self: + return + if replace_with is self.parent: + raise ValueError("Cannot replace a Tag with its parent.") + old_parent = self.parent + my_index = self.parent.index(self) + self.extract(_self_index=my_index) + old_parent.insert(my_index, replace_with) + return self + replaceWith = replace_with # BS3 + + def unwrap(self): + """Replace this PageElement with its contents. + + :return: `self`, no longer part of the tree. + """ + my_parent = self.parent + if self.parent is None: + raise ValueError( + "Cannot replace an element with its contents when that" + "element is not part of a tree.") + my_index = self.parent.index(self) + self.extract(_self_index=my_index) + for child in reversed(self.contents[:]): + my_parent.insert(my_index, child) + return self + replace_with_children = unwrap + replaceWithChildren = unwrap # BS3 + + def wrap(self, wrap_inside): + """Wrap this PageElement inside another one. + + :param wrap_inside: A PageElement. + :return: `wrap_inside`, occupying the position in the tree that used + to be occupied by `self`, and with `self` inside it. + """ + me = self.replace_with(wrap_inside) + wrap_inside.append(me) + return wrap_inside + + def extract(self, _self_index=None): + """Destructively rips this element out of the tree. + + :param _self_index: The location of this element in its parent's + .contents, if known. Passing this in allows for a performance + optimization. + + :return: `self`, no longer part of the tree. + """ + if self.parent is not None: + if _self_index is None: + _self_index = self.parent.index(self) + del self.parent.contents[_self_index] + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + last_child = self._last_descendant() + next_element = last_child.next_element + + if (self.previous_element is not None and + self.previous_element is not next_element): + self.previous_element.next_element = next_element + if next_element is not None and next_element is not self.previous_element: + next_element.previous_element = self.previous_element + self.previous_element = None + last_child.next_element = None + + self.parent = None + if (self.previous_sibling is not None + and self.previous_sibling is not self.next_sibling): + self.previous_sibling.next_sibling = self.next_sibling + if (self.next_sibling is not None + and self.next_sibling is not self.previous_sibling): + self.next_sibling.previous_sibling = self.previous_sibling + self.previous_sibling = self.next_sibling = None + return self + + def _last_descendant(self, is_initialized=True, accept_self=True): + """Finds the last element beneath this object to be parsed. + + :param is_initialized: Has `setup` been called on this PageElement + yet? + :param accept_self: Is `self` an acceptable answer to the question? + """ + if is_initialized and self.next_sibling is not None: + last_child = self.next_sibling.previous_element + else: + last_child = self + while isinstance(last_child, Tag) and last_child.contents: + last_child = last_child.contents[-1] + if not accept_self and last_child is self: + last_child = None + return last_child + # BS3: Not part of the API! + _lastRecursiveChild = _last_descendant + + def insert(self, position, new_child): + """Insert a new PageElement in the list of this PageElement's children. + + This works the same way as `list.insert`. + + :param position: The numeric position that should be occupied + in `self.children` by the new PageElement. + :param new_child: A PageElement. + """ + if new_child is None: + raise ValueError("Cannot insert None into a tag.") + if new_child is self: + raise ValueError("Cannot insert a tag into itself.") + if (isinstance(new_child, str) + and not isinstance(new_child, NavigableString)): + new_child = NavigableString(new_child) + + from bs4 import BeautifulSoup + if isinstance(new_child, BeautifulSoup): + # We don't want to end up with a situation where one BeautifulSoup + # object contains another. Insert the children one at a time. + for subchild in list(new_child.contents): + self.insert(position, subchild) + position += 1 + return + position = min(position, len(self.contents)) + if hasattr(new_child, 'parent') and new_child.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if new_child.parent is self: + current_index = self.index(new_child) + if current_index < position: + # We're moving this element further down the list + # of this object's children. That means that when + # we extract this element, our target index will + # jump down one. + position -= 1 + new_child.extract() + + new_child.parent = self + previous_child = None + if position == 0: + new_child.previous_sibling = None + new_child.previous_element = self + else: + previous_child = self.contents[position - 1] + new_child.previous_sibling = previous_child + new_child.previous_sibling.next_sibling = new_child + new_child.previous_element = previous_child._last_descendant(False) + if new_child.previous_element is not None: + new_child.previous_element.next_element = new_child + + new_childs_last_element = new_child._last_descendant(False) + + if position >= len(self.contents): + new_child.next_sibling = None + + parent = self + parents_next_sibling = None + while parents_next_sibling is None and parent is not None: + parents_next_sibling = parent.next_sibling + parent = parent.parent + if parents_next_sibling is not None: + # We found the element that comes next in the document. + break + if parents_next_sibling is not None: + new_childs_last_element.next_element = parents_next_sibling + else: + # The last element of this tag is the last element in + # the document. + new_childs_last_element.next_element = None + else: + next_child = self.contents[position] + new_child.next_sibling = next_child + if new_child.next_sibling is not None: + new_child.next_sibling.previous_sibling = new_child + new_childs_last_element.next_element = next_child + + if new_childs_last_element.next_element is not None: + new_childs_last_element.next_element.previous_element = new_childs_last_element + self.contents.insert(position, new_child) + + def append(self, tag): + """Appends the given PageElement to the contents of this one. + + :param tag: A PageElement. + """ + self.insert(len(self.contents), tag) + + def extend(self, tags): + """Appends the given PageElements to this one's contents. + + :param tags: A list of PageElements. + """ + if isinstance(tags, Tag): + # Calling self.append() on another tag's contents will change + # the list we're iterating over. Make a list that won't + # change. + tags = list(tags.contents) + for tag in tags: + self.append(tag) + + def insert_before(self, *args): + """Makes the given element(s) the immediate predecessor of this one. + + All the elements will have the same parent, and the given elements + will be immediately before this one. + + :param args: One or more PageElements. + """ + parent = self.parent + if parent is None: + raise ValueError( + "Element has no parent, so 'before' has no meaning.") + if any(x is self for x in args): + raise ValueError("Can't insert an element before itself.") + for predecessor in args: + # Extract first so that the index won't be screwed up if they + # are siblings. + if isinstance(predecessor, PageElement): + predecessor.extract() + index = parent.index(self) + parent.insert(index, predecessor) + + def insert_after(self, *args): + """Makes the given element(s) the immediate successor of this one. + + The elements will have the same parent, and the given elements + will be immediately after this one. + + :param args: One or more PageElements. + """ + # Do all error checking before modifying the tree. + parent = self.parent + if parent is None: + raise ValueError( + "Element has no parent, so 'after' has no meaning.") + if any(x is self for x in args): + raise ValueError("Can't insert an element after itself.") + + offset = 0 + for successor in args: + # Extract first so that the index won't be screwed up if they + # are siblings. + if isinstance(successor, PageElement): + successor.extract() + index = parent.index(self) + parent.insert(index+1+offset, successor) + offset += 1 + + def find_next(self, name=None, attrs={}, text=None, **kwargs): + """Find the first PageElement that matches the given criteria and + appears later in the document than this PageElement. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :kwargs: A dictionary of filters on attribute values. + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + return self._find_one(self.find_all_next, name, attrs, text, **kwargs) + findNext = find_next # BS3 + + def find_all_next(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Find all PageElements that match the given criteria and appear + later in the document than this PageElement. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :param limit: Stop looking after finding this many results. + :kwargs: A dictionary of filters on attribute values. + :return: A ResultSet containing PageElements. + """ + return self._find_all(name, attrs, text, limit, self.next_elements, + **kwargs) + findAllNext = find_all_next # BS3 + + def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs): + """Find the closest sibling to this PageElement that matches the + given criteria and appears later in the document. + + All find_* methods take a common set of arguments. See the + online documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :kwargs: A dictionary of filters on attribute values. + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + return self._find_one(self.find_next_siblings, name, attrs, text, + **kwargs) + findNextSibling = find_next_sibling # BS3 + + def find_next_siblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Find all siblings of this PageElement that match the given criteria + and appear later in the document. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :param limit: Stop looking after finding this many results. + :kwargs: A dictionary of filters on attribute values. + :return: A ResultSet of PageElements. + :rtype: bs4.element.ResultSet + """ + return self._find_all(name, attrs, text, limit, + self.next_siblings, **kwargs) + findNextSiblings = find_next_siblings # BS3 + fetchNextSiblings = find_next_siblings # BS2 + + def find_previous(self, name=None, attrs={}, text=None, **kwargs): + """Look backwards in the document from this PageElement and find the + first PageElement that matches the given criteria. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :kwargs: A dictionary of filters on attribute values. + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + return self._find_one( + self.find_all_previous, name, attrs, text, **kwargs) + findPrevious = find_previous # BS3 + + def find_all_previous(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Look backwards in the document from this PageElement and find all + PageElements that match the given criteria. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :param limit: Stop looking after finding this many results. + :kwargs: A dictionary of filters on attribute values. + :return: A ResultSet of PageElements. + :rtype: bs4.element.ResultSet + """ + return self._find_all(name, attrs, text, limit, self.previous_elements, + **kwargs) + findAllPrevious = find_all_previous # BS3 + fetchPrevious = find_all_previous # BS2 + + def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this PageElement that matches the + given criteria and appears earlier in the document. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :kwargs: A dictionary of filters on attribute values. + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + return self._find_one(self.find_previous_siblings, name, attrs, text, + **kwargs) + findPreviousSibling = find_previous_sibling # BS3 + + def find_previous_siblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns all siblings to this PageElement that match the + given criteria and appear earlier in the document. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param text: A filter for a NavigableString with specific text. + :param limit: Stop looking after finding this many results. + :kwargs: A dictionary of filters on attribute values. + :return: A ResultSet of PageElements. + :rtype: bs4.element.ResultSet + """ + return self._find_all(name, attrs, text, limit, + self.previous_siblings, **kwargs) + findPreviousSiblings = find_previous_siblings # BS3 + fetchPreviousSiblings = find_previous_siblings # BS2 + + def find_parent(self, name=None, attrs={}, **kwargs): + """Find the closest parent of this PageElement that matches the given + criteria. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :kwargs: A dictionary of filters on attribute values. + + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + # NOTE: We can't use _find_one because findParents takes a different + # set of arguments. + r = None + l = self.find_parents(name, attrs, 1, **kwargs) + if l: + r = l[0] + return r + findParent = find_parent # BS3 + + def find_parents(self, name=None, attrs={}, limit=None, **kwargs): + """Find all parents of this PageElement that match the given criteria. + + All find_* methods take a common set of arguments. See the online + documentation for detailed explanations. + + :param name: A filter on tag name. + :param attrs: A dictionary of filters on attribute values. + :param limit: Stop looking after finding this many results. + :kwargs: A dictionary of filters on attribute values. + + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + return self._find_all(name, attrs, None, limit, self.parents, + **kwargs) + findParents = find_parents # BS3 + fetchParents = find_parents # BS2 + + @property + def next(self): + """The PageElement, if any, that was parsed just after this one. + + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + return self.next_element + + @property + def previous(self): + """The PageElement, if any, that was parsed just before this one. + + :return: A PageElement. + :rtype: bs4.element.Tag | bs4.element.NavigableString + """ + return self.previous_element + + #These methods do the real heavy lifting. + + def _find_one(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _find_all(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if text is None and 'string' in kwargs: + text = kwargs['string'] + del kwargs['string'] + + if isinstance(name, SoupStrainer): + strainer = name + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + + if text is None and not limit and not attrs and not kwargs: + if name is True or name is None: + # Optimization to find all tags. + result = (element for element in generator + if isinstance(element, Tag)) + return ResultSet(strainer, result) + elif isinstance(name, str): + # Optimization to find all tags with a given name. + if name.count(':') == 1: + # This is a name with a prefix. If this is a namespace-aware document, + # we need to match the local name against tag.name. If not, + # we need to match the fully-qualified name against tag.name. + prefix, local_name = name.split(':', 1) + else: + prefix = None + local_name = name + result = (element for element in generator + if isinstance(element, Tag) + and ( + element.name == name + ) or ( + element.name == local_name + and (prefix is None or element.prefix == prefix) + ) + ) + return ResultSet(strainer, result) + results = ResultSet(strainer) + while True: + try: + i = next(generator) + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These generators can be used to navigate starting from both + #NavigableStrings and Tags. + @property + def next_elements(self): + """All PageElements that were parsed after this one. + + :yield: A sequence of PageElements. + """ + i = self.next_element + while i is not None: + yield i + i = i.next_element + + @property + def next_siblings(self): + """All PageElements that are siblings of this one but were parsed + later. + + :yield: A sequence of PageElements. + """ + i = self.next_sibling + while i is not None: + yield i + i = i.next_sibling + + @property + def previous_elements(self): + """All PageElements that were parsed before this one. + + :yield: A sequence of PageElements. + """ + i = self.previous_element + while i is not None: + yield i + i = i.previous_element + + @property + def previous_siblings(self): + """All PageElements that are siblings of this one but were parsed + earlier. + + :yield: A sequence of PageElements. + """ + i = self.previous_sibling + while i is not None: + yield i + i = i.previous_sibling + + @property + def parents(self): + """All PageElements that are parents of this PageElement. + + :yield: A sequence of PageElements. + """ + i = self.parent + while i is not None: + yield i + i = i.parent + + @property + def decomposed(self): + """Check whether a PageElement has been decomposed. + + :rtype: bool + """ + return getattr(self, '_decomposed', False) or False + + # Old non-property versions of the generators, for backwards + # compatibility with BS3. + def nextGenerator(self): + return self.next_elements + + def nextSiblingGenerator(self): + return self.next_siblings + + def previousGenerator(self): + return self.previous_elements + + def previousSiblingGenerator(self): + return self.previous_siblings + + def parentGenerator(self): + return self.parents + + +class NavigableString(str, PageElement): + """A Python Unicode string that is part of a parse tree. + + When Beautiful Soup parses the markup penguin, it will + create a NavigableString for the string "penguin". + """ + + PREFIX = '' + SUFFIX = '' + + # We can't tell just by looking at a string whether it's contained + # in an XML document or an HTML document. + + known_xml = None + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, str): + u = str.__new__(cls, value) + else: + u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + u.setup() + return u + + def __copy__(self): + """A copy of a NavigableString has the same contents and class + as the original, but it is not connected to the parse tree. + """ + return type(self)(self) + + def __getnewargs__(self): + return (str(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError( + "'%s' object has no attribute '%s'" % ( + self.__class__.__name__, attr)) + + def output_ready(self, formatter="minimal"): + """Run the string through the provided formatter. + + :param formatter: A Formatter object, or a string naming one of the standard formatters. + """ + output = self.format_string(self, formatter) + return self.PREFIX + output + self.SUFFIX + + @property + def name(self): + """Since a NavigableString is not a Tag, it has no .name. + + This property is implemented so that code like this doesn't crash + when run on a mixture of Tag and NavigableString objects: + [x.name for x in tag.children] + """ + return None + + @name.setter + def name(self, name): + """Prevent NavigableString.name from ever being set.""" + raise AttributeError("A NavigableString cannot be given a name.") + + +class PreformattedString(NavigableString): + """A NavigableString not subject to the normal formatting rules. + + This is an abstract class used for special kinds of strings such + as comments (the Comment class) and CDATA blocks (the CData + class). + """ + + PREFIX = '' + SUFFIX = '' + + def output_ready(self, formatter=None): + """Make this string ready for output by adding any subclass-specific + prefix or suffix. + + :param formatter: A Formatter object, or a string naming one + of the standard formatters. The string will be passed into the + Formatter, but only to trigger any side effects: the return + value is ignored. + + :return: The string, with any subclass-specific prefix and + suffix added on. + """ + if formatter is not None: + ignore = self.format_string(self, formatter) + return self.PREFIX + self + self.SUFFIX + +class CData(PreformattedString): + """A CDATA block.""" + PREFIX = '' + +class ProcessingInstruction(PreformattedString): + """A SGML processing instruction.""" + + PREFIX = '' + +class XMLProcessingInstruction(ProcessingInstruction): + """An XML processing instruction.""" + PREFIX = '' + +class Comment(PreformattedString): + """An HTML or XML comment.""" + PREFIX = '' + + +class Declaration(PreformattedString): + """An XML declaration.""" + PREFIX = '' + + +class Doctype(PreformattedString): + """A document type declaration.""" + @classmethod + def for_name_and_ids(cls, name, pub_id, system_id): + """Generate an appropriate document type declaration for a given + public ID and system ID. + + :param name: The name of the document's root element, e.g. 'html'. + :param pub_id: The Formal Public Identifier for this document type, + e.g. '-//W3C//DTD XHTML 1.1//EN' + :param system_id: The system identifier for this document type, + e.g. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' + + :return: A Doctype. + """ + value = name or '' + if pub_id is not None: + value += ' PUBLIC "%s"' % pub_id + if system_id is not None: + value += ' "%s"' % system_id + elif system_id is not None: + value += ' SYSTEM "%s"' % system_id + + return Doctype(value) + + PREFIX = '\n' + + +class Stylesheet(NavigableString): + """A NavigableString representing an stylesheet (probably + CSS). + + Used to distinguish embedded stylesheets from textual content. + """ + pass + + +class Script(NavigableString): + """A NavigableString representing an executable script (probably + Javascript). + + Used to distinguish executable code from textual content. + """ + pass + + +class TemplateString(NavigableString): + """A NavigableString representing a string found inside an HTML + template embedded in a larger document. + + Used to distinguish such strings from the main body of the document. + """ + pass + + +class Tag(PageElement): + """Represents an HTML or XML tag that is part of a parse tree, along + with its attributes and contents. + + When Beautiful Soup parses the markup penguin, it will + create a Tag object representing the tag. + """ + + def __init__(self, parser=None, builder=None, name=None, namespace=None, + prefix=None, attrs=None, parent=None, previous=None, + is_xml=None, sourceline=None, sourcepos=None, + can_be_empty_element=None, cdata_list_attributes=None, + preserve_whitespace_tags=None + ): + """Basic constructor. + + :param parser: A BeautifulSoup object. + :param builder: A TreeBuilder. + :param name: The name of the tag. + :param namespace: The URI of this Tag's XML namespace, if any. + :param prefix: The prefix for this Tag's XML namespace, if any. + :param attrs: A dictionary of this Tag's attribute values. + :param parent: The PageElement to use as this Tag's parent. + :param previous: The PageElement that was parsed immediately before + this tag. + :param is_xml: If True, this is an XML tag. Otherwise, this is an + HTML tag. + :param sourceline: The line number where this tag was found in its + source document. + :param sourcepos: The character position within `sourceline` where this + tag was found. + :param can_be_empty_element: If True, this tag should be + represented as . If False, this tag should be represented + as . + :param cdata_list_attributes: A list of attributes whose values should + be treated as CDATA if they ever show up on this tag. + :param preserve_whitespace_tags: A list of tag names whose contents + should have their whitespace preserved. + """ + if parser is None: + self.parser_class = None + else: + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected. + self.parser_class = parser.__class__ + if name is None: + raise ValueError("No value provided for new tag's name.") + self.name = name + self.namespace = namespace + self.prefix = prefix + if ((not builder or builder.store_line_numbers) + and (sourceline is not None or sourcepos is not None)): + self.sourceline = sourceline + self.sourcepos = sourcepos + if attrs is None: + attrs = {} + elif attrs: + if builder is not None and builder.cdata_list_attributes: + attrs = builder._replace_cdata_list_attribute_values( + self.name, attrs) + else: + attrs = dict(attrs) + else: + attrs = dict(attrs) + + # If possible, determine ahead of time whether this tag is an + # XML tag. + if builder: + self.known_xml = builder.is_xml + else: + self.known_xml = is_xml + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + + if builder is None: + # In the absence of a TreeBuilder, use whatever values were + # passed in here. They're probably None, unless this is a copy of some + # other tag. + self.can_be_empty_element = can_be_empty_element + self.cdata_list_attributes = cdata_list_attributes + self.preserve_whitespace_tags = preserve_whitespace_tags + else: + # Set up any substitutions for this tag, such as the charset in a META tag. + builder.set_up_substitutions(self) + + # Ask the TreeBuilder whether this tag might be an empty-element tag. + self.can_be_empty_element = builder.can_be_empty_element(name) + + # Keep track of the list of attributes of this tag that + # might need to be treated as a list. + # + # For performance reasons, we store the whole data structure + # rather than asking the question of every tag. Asking would + # require building a new data structure every time, and + # (unlike can_be_empty_element), we almost never need + # to check this. + self.cdata_list_attributes = builder.cdata_list_attributes + + # Keep track of the names that might cause this tag to be treated as a + # whitespace-preserved tag. + self.preserve_whitespace_tags = builder.preserve_whitespace_tags + + parserClass = _alias("parser_class") # BS3 + + def __copy__(self): + """A copy of a Tag is a new Tag, unconnected to the parse tree. + Its contents are a copy of the old Tag's contents. + """ + clone = type(self)( + None, self.builder, self.name, self.namespace, + self.prefix, self.attrs, is_xml=self._is_xml, + sourceline=self.sourceline, sourcepos=self.sourcepos, + can_be_empty_element=self.can_be_empty_element, + cdata_list_attributes=self.cdata_list_attributes, + preserve_whitespace_tags=self.preserve_whitespace_tags + ) + for attr in ('can_be_empty_element', 'hidden'): + setattr(clone, attr, getattr(self, attr)) + for child in self.contents: + clone.append(child.__copy__()) + return clone + + @property + def is_empty_element(self): + """Is this tag an empty-element tag? (aka a self-closing tag) + + A tag that has contents is never an empty-element tag. + + A tag that has no contents may or may not be an empty-element + tag. It depends on the builder used to create the tag. If the + builder has a designated list of empty-element tags, then only + a tag whose name shows up in that list is considered an + empty-element tag. + + If the builder has no designated list of empty-element tags, + then any tag with no contents is an empty-element tag. + """ + return len(self.contents) == 0 and self.can_be_empty_element + isSelfClosing = is_empty_element # BS3 + + @property + def string(self): + """Convenience property to get the single string within this + PageElement. + + TODO It might make sense to have NavigableString.string return + itself. + + :return: If this element has a single string child, return + value is that string. If this element has one child tag, + return value is the 'string' attribute of the child tag, + recursively. If this element is itself a string, has no + children, or has more than one child, return value is None. + """ + if len(self.contents) != 1: + return None + child = self.contents[0] + if isinstance(child, NavigableString): + return child + return child.string + + @string.setter + def string(self, string): + """Replace this PageElement's contents with `string`.""" + self.clear() + self.append(string.__class__(string)) + + def _all_strings(self, strip=False, types=(NavigableString, CData)): + """Yield all strings of certain classes, possibly stripping them. + + :param strip: If True, all strings will be stripped before being + yielded. + + :types: A tuple of NavigableString subclasses. Any strings of + a subclass not found in this list will be ignored. By + default, this means only NavigableString and CData objects + will be considered. So no comments, processing instructions, + etc. + + :yield: A sequence of strings. + """ + for descendant in self.descendants: + if ( + (types is None and not isinstance(descendant, NavigableString)) + or + (types is not None and type(descendant) not in types)): + continue + if strip: + descendant = descendant.strip() + if len(descendant) == 0: + continue + yield descendant + + strings = property(_all_strings) + + @property + def stripped_strings(self): + """Yield all strings in the document, stripping them first. + + :yield: A sequence of stripped strings. + """ + for string in self._all_strings(True): + yield string + + def get_text(self, separator="", strip=False, + types=(NavigableString, CData)): + """Get all child strings, concatenated using the given separator. + + :param separator: Strings will be concatenated using this separator. + + :param strip: If True, strings will be stripped before being + concatenated. + + :types: A tuple of NavigableString subclasses. Any strings of + a subclass not found in this list will be ignored. By + default, this means only NavigableString and CData objects + will be considered. So no comments, processing instructions, + stylesheets, etc. + + :return: A string. + """ + return separator.join([s for s in self._all_strings( + strip, types=types)]) + getText = get_text + text = property(get_text) + + def decompose(self): + """Recursively destroys this PageElement and its children. + + This element will be removed from the tree and wiped out; so + will everything beneath it. + + The behavior of a decomposed PageElement is undefined and you + should never use one for anything, but if you need to _check_ + whether an element has been decomposed, you can use the + `decomposed` property. + """ + self.extract() + i = self + while i is not None: + n = i.next_element + i.__dict__.clear() + i.contents = [] + i._decomposed = True + i = n + + def clear(self, decompose=False): + """Wipe out all children of this PageElement by calling extract() + on them. + + :param decompose: If this is True, decompose() (a more + destructive method) will be called instead of extract(). + """ + if decompose: + for element in self.contents[:]: + if isinstance(element, Tag): + element.decompose() + else: + element.extract() + else: + for element in self.contents[:]: + element.extract() + + def smooth(self): + """Smooth out this element's children by consolidating consecutive + strings. + + This makes pretty-printed output look more natural following a + lot of operations that modified the tree. + """ + # Mark the first position of every pair of children that need + # to be consolidated. Do this rather than making a copy of + # self.contents, since in most cases very few strings will be + # affected. + marked = [] + for i, a in enumerate(self.contents): + if isinstance(a, Tag): + # Recursively smooth children. + a.smooth() + if i == len(self.contents)-1: + # This is the last item in .contents, and it's not a + # tag. There's no chance it needs any work. + continue + b = self.contents[i+1] + if (isinstance(a, NavigableString) + and isinstance(b, NavigableString) + and not isinstance(a, PreformattedString) + and not isinstance(b, PreformattedString) + ): + marked.append(i) + + # Go over the marked positions in reverse order, so that + # removing items from .contents won't affect the remaining + # positions. + for i in reversed(marked): + a = self.contents[i] + b = self.contents[i+1] + b.extract() + n = NavigableString(a+b) + a.replace_with(n) + + def index(self, element): + """Find the index of a child by identity, not value. + + Avoids issues with tag.contents.index(element) getting the + index of equal elements. + + :param element: Look for this PageElement in `self.contents`. + """ + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self.attrs.get(key, default) + + def get_attribute_list(self, key, default=None): + """The same as get(), but always returns a list. + + :param key: The attribute to look for. + :param default: Use this value if the attribute is not present + on this PageElement. + :return: A list of values, probably containing only a single + value. + """ + value = self.get(key, default) + if not isinstance(value, list): + value = [value] + return value + + def has_attr(self, key): + """Does this PageElement have an attribute with the given name?""" + return key in self.attrs + + def __hash__(self): + return str(self).__hash__() + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the Tag, + and throws an exception if it's not there.""" + return self.attrs[key] + + def __iter__(self): + "Iterating over a Tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a Tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __bool__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self.attrs[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + self.attrs.pop(key, None) + + def __call__(self, *args, **kwargs): + """Calling a Tag like a function is the same as calling its + find_all() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return self.find_all(*args, **kwargs) + + def __getattr__(self, tag): + """Calling tag.subtag is the same as calling tag.find(name="subtag")""" + #print("Getattr %s.%s" % (self.__class__, tag)) + if len(tag) > 3 and tag.endswith('Tag'): + # BS3: soup.aTag -> "soup.find("a") + tag_name = tag[:-3] + warnings.warn( + '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict( + name=tag_name + ) + ) + return self.find(tag_name) + # We special case contents to avoid recursion. + elif not tag.startswith("__") and not tag == "contents": + return self.find(tag) + raise AttributeError( + "'%s' object has no attribute '%s'" % (self.__class__, tag)) + + def __eq__(self, other): + """Returns true iff this Tag has the same name, the same attributes, + and the same contents (recursively) as `other`.""" + if self is other: + return True + if (not hasattr(other, 'name') or + not hasattr(other, 'attrs') or + not hasattr(other, 'contents') or + self.name != other.name or + self.attrs != other.attrs or + len(self) != len(other)): + return False + for i, my_child in enumerate(self.contents): + if my_child != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this Tag is not identical to `other`, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding="unicode-escape"): + """Renders this PageElement as a string. + + :param encoding: The encoding to use (Python 2 only). + :return: Under Python 2, a bytestring; under Python 3, + a Unicode string. + """ + if PY3K: + # "The return value must be a string object", i.e. Unicode + return self.decode() + else: + # "The return value must be a string object", i.e. a bytestring. + # By convention, the return value of __repr__ should also be + # an ASCII string. + return self.encode(encoding) + + def __unicode__(self): + """Renders this PageElement as a Unicode string.""" + return self.decode() + + def __str__(self): + """Renders this PageElement as a generic string. + + :return: Under Python 2, a UTF-8 bytestring; under Python 3, + a Unicode string. + """ + if PY3K: + return self.decode() + else: + return self.encode() + + if PY3K: + __str__ = __repr__ = __unicode__ + + def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, + indent_level=None, formatter="minimal", + errors="xmlcharrefreplace"): + """Render a bytestring representation of this PageElement and its + contents. + + :param encoding: The destination encoding. + :param indent_level: Each line of the rendering will be + indented this many spaces. Used internally in + recursive calls while pretty-printing. + :param formatter: A Formatter object, or a string naming one of + the standard formatters. + :param errors: An error handling strategy such as + 'xmlcharrefreplace'. This value is passed along into + encode() and its value should be one of the constants + defined by Python. + :return: A bytestring. + + """ + # Turn the data structure into Unicode, then encode the + # Unicode. + u = self.decode(indent_level, encoding, formatter) + return u.encode(encoding, errors) + + def decode(self, indent_level=None, + eventual_encoding=DEFAULT_OUTPUT_ENCODING, + formatter="minimal"): + """Render a Unicode representation of this PageElement and its + contents. + + :param indent_level: Each line of the rendering will be + indented this many spaces. Used internally in + recursive calls while pretty-printing. + :param eventual_encoding: The tag is destined to be + encoded into this encoding. This method is _not_ + responsible for performing that encoding. This information + is passed in so that it can be substituted in if the + document contains a tag that mentions the document's + encoding. + :param formatter: A Formatter object, or a string naming one of + the standard formatters. + """ + + # First off, turn a non-Formatter `formatter` into a Formatter + # object. This will stop the lookup from happening over and + # over again. + if not isinstance(formatter, Formatter): + formatter = self.formatter_for_name(formatter) + attributes = formatter.attributes(self) + attrs = [] + for key, val in attributes: + if val is None: + decoded = key + else: + if isinstance(val, list) or isinstance(val, tuple): + val = ' '.join(val) + elif not isinstance(val, str): + val = str(val) + elif ( + isinstance(val, AttributeValueWithCharsetSubstitution) + and eventual_encoding is not None + ): + val = val.encode(eventual_encoding) + + text = formatter.attribute_value(val) + decoded = ( + str(key) + '=' + + formatter.quoted_attribute_value(text)) + attrs.append(decoded) + close = '' + closeTag = '' + + prefix = '' + if self.prefix: + prefix = self.prefix + ":" + + if self.is_empty_element: + close = formatter.void_element_close_prefix or '' + else: + closeTag = '' % (prefix, self.name) + + pretty_print = self._should_pretty_print(indent_level) + space = '' + indent_space = '' + if indent_level is not None: + indent_space = (' ' * (indent_level - 1)) + if pretty_print: + space = indent_space + indent_contents = indent_level + 1 + else: + indent_contents = None + contents = self.decode_contents( + indent_contents, eventual_encoding, formatter + ) + + if self.hidden: + # This is the 'document root' object. + s = contents + else: + s = [] + attribute_string = '' + if attrs: + attribute_string = ' ' + ' '.join(attrs) + if indent_level is not None: + # Even if this particular tag is not pretty-printed, + # we should indent up to the start of the tag. + s.append(indent_space) + s.append('<%s%s%s%s>' % ( + prefix, self.name, attribute_string, close)) + if pretty_print: + s.append("\n") + s.append(contents) + if pretty_print and contents and contents[-1] != "\n": + s.append("\n") + if pretty_print and closeTag: + s.append(space) + s.append(closeTag) + if indent_level is not None and closeTag and self.next_sibling: + # Even if this particular tag is not pretty-printed, + # we're now done with the tag, and we should add a + # newline if appropriate. + s.append("\n") + s = ''.join(s) + return s + + def _should_pretty_print(self, indent_level): + """Should this tag be pretty-printed? + + Most of them should, but some (such as
 in HTML
+        documents) should not.
+        """
+        return (
+            indent_level is not None
+            and (
+                not self.preserve_whitespace_tags
+                or self.name not in self.preserve_whitespace_tags
+            )
+        )
+
+    def prettify(self, encoding=None, formatter="minimal"):
+        """Pretty-print this PageElement as a string.
+
+        :param encoding: The eventual encoding of the string. If this is None,
+            a Unicode string will be returned.
+        :param formatter: A Formatter object, or a string naming one of
+            the standard formatters.
+        :return: A Unicode string (if encoding==None) or a bytestring 
+            (otherwise).
+        """
+        if encoding is None:
+            return self.decode(True, formatter=formatter)
+        else:
+            return self.encode(encoding, True, formatter=formatter)
+
+    def decode_contents(self, indent_level=None,
+                       eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+                       formatter="minimal"):
+        """Renders the contents of this tag as a Unicode string.
+
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces. Used internally in
+           recursive calls while pretty-printing.
+
+        :param eventual_encoding: The tag is destined to be
+           encoded into this encoding. decode_contents() is _not_
+           responsible for performing that encoding. This information
+           is passed in so that it can be substituted in if the
+           document contains a  tag that mentions the document's
+           encoding.
+
+        :param formatter: A Formatter object, or a string naming one of
+            the standard Formatters.
+        """
+        # First off, turn a string formatter into a Formatter object. This
+        # will stop the lookup from happening over and over again.
+        if not isinstance(formatter, Formatter):
+            formatter = self.formatter_for_name(formatter)
+
+        pretty_print = (indent_level is not None)
+        s = []
+        for c in self:
+            text = None
+            if isinstance(c, NavigableString):
+                text = c.output_ready(formatter)
+            elif isinstance(c, Tag):
+                s.append(c.decode(indent_level, eventual_encoding,
+                                  formatter))
+            preserve_whitespace = (
+                self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
+            )
+            if text and indent_level and not preserve_whitespace:
+                text = text.strip()
+            if text:
+                if pretty_print and not preserve_whitespace:
+                    s.append(" " * (indent_level - 1))
+                s.append(text)
+                if pretty_print and not preserve_whitespace:
+                    s.append("\n")
+        return ''.join(s)
+       
+    def encode_contents(
+        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
+        formatter="minimal"):
+        """Renders the contents of this PageElement as a bytestring.
+
+        :param indent_level: Each line of the rendering will be
+           indented this many spaces. Used internally in
+           recursive calls while pretty-printing.
+
+        :param eventual_encoding: The bytestring will be in this encoding.
+
+        :param formatter: A Formatter object, or a string naming one of
+            the standard Formatters.
+
+        :return: A bytestring.
+        """
+        contents = self.decode_contents(indent_level, encoding, formatter)
+        return contents.encode(encoding)
+
+    # Old method for BS3 compatibility
+    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                       prettyPrint=False, indentLevel=0):
+        """Deprecated method for BS3 compatibility."""
+        if not prettyPrint:
+            indentLevel = None
+        return self.encode_contents(
+            indent_level=indentLevel, encoding=encoding)
+
+    #Soup methods
+
+    def find(self, name=None, attrs={}, recursive=True, text=None,
+             **kwargs):
+        """Look in the children of this PageElement and find the first
+        PageElement that matches the given criteria.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param recursive: If this is True, find() will perform a
+            recursive search of this PageElement's children. Otherwise,
+            only the direct children will be considered.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A PageElement.
+        :rtype: bs4.element.Tag | bs4.element.NavigableString
+        """
+        r = None
+        l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+    findChild = find #BS2
+
+    def find_all(self, name=None, attrs={}, recursive=True, text=None,
+                 limit=None, **kwargs):
+        """Look in the children of this PageElement and find all
+        PageElements that match the given criteria.
+
+        All find_* methods take a common set of arguments. See the online
+        documentation for detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param recursive: If this is True, find_all() will perform a
+            recursive search of this PageElement's children. Otherwise,
+            only the direct children will be considered.
+        :param limit: Stop looking after finding this many results.
+        :kwargs: A dictionary of filters on attribute values.
+        :return: A ResultSet of PageElements.
+        :rtype: bs4.element.ResultSet
+        """
+        generator = self.descendants
+        if not recursive:
+            generator = self.children
+        return self._find_all(name, attrs, text, limit, generator, **kwargs)
+    findAll = find_all       # BS3
+    findChildren = find_all  # BS2
+
+    #Generator methods
+    @property
+    def children(self):
+        """Iterate over all direct children of this PageElement.
+
+        :yield: A sequence of PageElements.
+        """
+        # return iter() to make the purpose of the method clear
+        return iter(self.contents)  # XXX This seems to be untested.
+
+    @property
+    def descendants(self):
+        """Iterate over all children of this PageElement in a
+        breadth-first sequence.
+
+        :yield: A sequence of PageElements.
+        """
+        if not len(self.contents):
+            return
+        stopNode = self._last_descendant().next_element
+        current = self.contents[0]
+        while current is not stopNode:
+            yield current
+            current = current.next_element
+
+    # CSS selector code
+    def select_one(self, selector, namespaces=None, **kwargs):
+        """Perform a CSS selection operation on the current element.
+
+        :param selector: A CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+           used in the CSS selector to namespace URIs. By default,
+           Beautiful Soup will use the prefixes it encountered while
+           parsing the document.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+           soupsieve.select() method.
+
+        :return: A Tag.
+        :rtype: bs4.element.Tag
+        """
+        value = self.select(selector, namespaces, 1, **kwargs)
+        if value:
+            return value[0]
+        return None
+
+    def select(self, selector, namespaces=None, limit=None, **kwargs):
+        """Perform a CSS selection operation on the current element.
+
+        This uses the SoupSieve library.
+
+        :param selector: A string containing a CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+           used in the CSS selector to namespace URIs. By default,
+           Beautiful Soup will use the prefixes it encountered while
+           parsing the document.
+
+        :param limit: After finding this number of results, stop looking.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+           soupsieve.select() method.
+
+        :return: A ResultSet of Tags.
+        :rtype: bs4.element.ResultSet
+        """
+        if namespaces is None:
+            namespaces = self._namespaces
+        
+        if limit is None:
+            limit = 0
+        if soupsieve is None:
+            raise NotImplementedError(
+                "Cannot execute CSS selectors because the soupsieve package is not installed."
+            )
+            
+        results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
+
+        # We do this because it's more consistent and because
+        # ResultSet.__getattr__ has a helpful error message.
+        return ResultSet(None, results)
+
+    # Old names for backwards compatibility
+    def childGenerator(self):
+        """Deprecated generator."""
+        return self.children
+
+    def recursiveChildGenerator(self):
+        """Deprecated generator."""
+        return self.descendants
+
+    def has_key(self, key):
+        """Deprecated method. This was kind of misleading because has_key()
+        (attributes) was different from __in__ (contents).
+
+        has_key() is gone in Python 3, anyway.
+        """
+        warnings.warn('has_key is deprecated. Use has_attr("%s") instead.' % (
+                key))
+        return self.has_attr(key)
+
+# Next, a couple classes to represent queries and their results.
+class SoupStrainer(object):
+    """Encapsulates a number of ways of matching a markup element (tag or
+    string).
+
+    This is primarily used to underpin the find_* methods, but you can
+    create one yourself and pass it in as `parse_only` to the
+    `BeautifulSoup` constructor, to parse a subset of a large
+    document.
+    """
+
+    def __init__(self, name=None, attrs={}, text=None, **kwargs):
+        """Constructor.
+
+        The SoupStrainer constructor takes the same arguments passed
+        into the find_* methods. See the online documentation for
+        detailed explanations.
+
+        :param name: A filter on tag name.
+        :param attrs: A dictionary of filters on attribute values.
+        :param text: A filter for a NavigableString with specific text.
+        :kwargs: A dictionary of filters on attribute values.
+        """        
+        self.name = self._normalize_search_value(name)
+        if not isinstance(attrs, dict):
+            # Treat a non-dict value for attrs as a search for the 'class'
+            # attribute.
+            kwargs['class'] = attrs
+            attrs = None
+
+        if 'class_' in kwargs:
+            # Treat class_="foo" as a search for the 'class'
+            # attribute, overriding any non-dict value for attrs.
+            kwargs['class'] = kwargs['class_']
+            del kwargs['class_']
+
+        if kwargs:
+            if attrs:
+                attrs = attrs.copy()
+                attrs.update(kwargs)
+            else:
+                attrs = kwargs
+        normalized_attrs = {}
+        for key, value in list(attrs.items()):
+            normalized_attrs[key] = self._normalize_search_value(value)
+
+        self.attrs = normalized_attrs
+        self.text = self._normalize_search_value(text)
+
+    def _normalize_search_value(self, value):
+        # Leave it alone if it's a Unicode string, a callable, a
+        # regular expression, a boolean, or None.
+        if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match')
+            or isinstance(value, bool) or value is None):
+            return value
+
+        # If it's a bytestring, convert it to Unicode, treating it as UTF-8.
+        if isinstance(value, bytes):
+            return value.decode("utf8")
+
+        # If it's listlike, convert it into a list of strings.
+        if hasattr(value, '__iter__'):
+            new_value = []
+            for v in value:
+                if (hasattr(v, '__iter__') and not isinstance(v, bytes)
+                    and not isinstance(v, str)):
+                    # This is almost certainly the user's mistake. In the
+                    # interests of avoiding infinite loops, we'll let
+                    # it through as-is rather than doing a recursive call.
+                    new_value.append(v)
+                else:
+                    new_value.append(self._normalize_search_value(v))
+            return new_value
+
+        # Otherwise, convert it into a Unicode string.
+        # The unicode(str()) thing is so this will do the same thing on Python 2
+        # and Python 3.
+        return str(str(value))
+
+    def __str__(self):
+        """A human-readable representation of this SoupStrainer."""
+        if self.text:
+            return self.text
+        else:
+            return "%s|%s" % (self.name, self.attrs)
+
+    def search_tag(self, markup_name=None, markup_attrs={}):
+        """Check whether a Tag with the given name and attributes would
+        match this SoupStrainer.
+
+        Used prospectively to decide whether to even bother creating a Tag
+        object.
+
+        :param markup_name: A tag name as found in some markup.
+        :param markup_attrs: A dictionary of attributes as found in some markup.
+
+        :return: True if the prospective tag would match this SoupStrainer;
+            False otherwise.
+        """
+        found = None
+        markup = None
+        if isinstance(markup_name, Tag):
+            markup = markup_name
+            markup_attrs = markup
+
+        if isinstance(self.name, str):
+            # Optimization for a very common case where the user is
+            # searching for a tag with one specific name, and we're
+            # looking at a tag with a different name.
+            if markup and not markup.prefix and self.name != markup.name:
+                 return False
+            
+        call_function_with_tag_data = (
+            isinstance(self.name, Callable)
+            and not isinstance(markup_name, Tag))
+
+        if ((not self.name)
+            or call_function_with_tag_data
+            or (markup and self._matches(markup, self.name))
+            or (not markup and self._matches(markup_name, self.name))):
+            if call_function_with_tag_data:
+                match = self.name(markup_name, markup_attrs)
+            else:
+                match = True
+                markup_attr_map = None
+                for attr, match_against in list(self.attrs.items()):
+                    if not markup_attr_map:
+                        if hasattr(markup_attrs, 'get'):
+                            markup_attr_map = markup_attrs
+                        else:
+                            markup_attr_map = {}
+                            for k, v in markup_attrs:
+                                markup_attr_map[k] = v
+                    attr_value = markup_attr_map.get(attr)
+                    if not self._matches(attr_value, match_against):
+                        match = False
+                        break
+            if match:
+                if markup:
+                    found = markup
+                else:
+                    found = markup_name
+        if found and self.text and not self._matches(found.string, self.text):
+            found = None
+        return found
+
+    # For BS3 compatibility.
+    searchTag = search_tag
+
+    def search(self, markup):
+        """Find all items in `markup` that match this SoupStrainer.
+
+        Used by the core _find_all() method, which is ultimately
+        called by all find_* methods.
+
+        :param markup: A PageElement or a list of them.
+        """
+        # print('looking for %s in %s' % (self, markup))
+        found = None
+        # If given a list of items, scan it for a text element that
+        # matches.
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
+            for element in markup:
+                if isinstance(element, NavigableString) \
+                       and self.search(element):
+                    found = element
+                    break
+        # If it's a Tag, make sure its name or attributes match.
+        # Don't bother with Tags if we're searching for text.
+        elif isinstance(markup, Tag):
+            if not self.text or self.name or self.attrs:
+                found = self.search_tag(markup)
+        # If it's text, make sure the text matches.
+        elif isinstance(markup, NavigableString) or \
+                 isinstance(markup, str):
+            if not self.name and not self.attrs and self._matches(markup, self.text):
+                found = markup
+        else:
+            raise Exception(
+                "I don't know how to match against a %s" % markup.__class__)
+        return found
+
+    def _matches(self, markup, match_against, already_tried=None):
+        # print(u"Matching %s against %s" % (markup, match_against))
+        result = False
+        if isinstance(markup, list) or isinstance(markup, tuple):
+            # This should only happen when searching a multi-valued attribute
+            # like 'class'.
+            for item in markup:
+                if self._matches(item, match_against):
+                    return True
+            # We didn't match any particular value of the multivalue
+            # attribute, but maybe we match the attribute value when
+            # considered as a string.
+            if self._matches(' '.join(markup), match_against):
+                return True
+            return False
+        
+        if match_against is True:
+            # True matches any non-None value.
+            return markup is not None
+
+        if isinstance(match_against, Callable):
+            return match_against(markup)
+
+        # Custom callables take the tag as an argument, but all
+        # other ways of matching match the tag name as a string.
+        original_markup = markup
+        if isinstance(markup, Tag):
+            markup = markup.name
+
+        # Ensure that `markup` is either a Unicode string, or None.
+        markup = self._normalize_search_value(markup)
+
+        if markup is None:
+            # None matches None, False, an empty string, an empty list, and so on.
+            return not match_against
+
+        if (hasattr(match_against, '__iter__')
+            and not isinstance(match_against, str)):
+            # We're asked to match against an iterable of items.
+            # The markup must be match at least one item in the
+            # iterable. We'll try each one in turn.
+            #
+            # To avoid infinite recursion we need to keep track of
+            # items we've already seen.
+            if not already_tried:
+                already_tried = set()
+            for item in match_against:
+                if item.__hash__:
+                    key = item
+                else:
+                    key = id(item)
+                if key in already_tried:
+                    continue
+                else:
+                    already_tried.add(key)
+                    if self._matches(original_markup, item, already_tried):
+                        return True
+            else:
+                return False
+        
+        # Beyond this point we might need to run the test twice: once against
+        # the tag's name and once against its prefixed name.
+        match = False
+        
+        if not match and isinstance(match_against, str):
+            # Exact string match
+            match = markup == match_against
+
+        if not match and hasattr(match_against, 'search'):
+            # Regexp match
+            return match_against.search(markup)
+
+        if (not match
+            and isinstance(original_markup, Tag)
+            and original_markup.prefix):
+            # Try the whole thing again with the prefixed tag name.
+            return self._matches(
+                original_markup.prefix + ':' + original_markup.name, match_against
+            )
+
+        return match
+
+
+class ResultSet(list):
+    """A ResultSet is just a list that keeps track of the SoupStrainer
+    that created it."""
+    def __init__(self, source, result=()):
+        """Constructor.
+
+        :param source: A SoupStrainer.
+        :param result: A list of PageElements.
+        """
+        super(ResultSet, self).__init__(result)
+        self.source = source
+
+    def __getattr__(self, key):
+        """Raise a helpful exception to explain a common code fix."""
+        raise AttributeError(
+            "ResultSet object has no attribute '%s'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?" % key
+        )
diff --git a/venv/lib/python3.6/site-packages/bs4/formatter.py b/venv/lib/python3.6/site-packages/bs4/formatter.py
new file mode 100644
index 0000000..2cbab4c
--- /dev/null
+++ b/venv/lib/python3.6/site-packages/bs4/formatter.py
@@ -0,0 +1,152 @@
+from bs4.dammit import EntitySubstitution
+
+class Formatter(EntitySubstitution):
+    """Describes a strategy to use when outputting a parse tree to a string.
+
+    Some parts of this strategy come from the distinction between
+    HTML4, HTML5, and XML. Others are configurable by the user.
+
+    Formatters are passed in as the `formatter` argument to methods
+    like `PageElement.encode`. Most people won't need to think about
+    formatters, and most people who need to think about them can pass
+    in one of these predefined strings as `formatter` rather than
+    making a new Formatter object:
+
+    For HTML documents:
+     * 'html' - HTML entity substitution for generic HTML documents. (default)
+     * 'html5' - HTML entity substitution for HTML5 documents.
+     * 'minimal' - Only make the substitutions necessary to guarantee
+                   valid HTML.
+     * None - Do not perform any substitution. This will be faster
+              but may result in invalid markup.
+
+    For XML documents:
+     * 'html' - Entity substitution for XHTML documents.
+     * 'minimal' - Only make the substitutions necessary to guarantee
+                   valid XML. (default)
+     * None - Do not perform any substitution. This will be faster
+              but may result in invalid markup.
+    """
+    # Registries of XML and HTML formatters.
+    XML_FORMATTERS = {}
+    HTML_FORMATTERS = {}
+
+    HTML = 'html'
+    XML = 'xml'
+
+    HTML_DEFAULTS = dict(
+        cdata_containing_tags=set(["script", "style"]),
+    )
+
+    def _default(self, language, value, kwarg):
+        if value is not None:
+            return value
+        if language == self.XML:
+            return set()
+        return self.HTML_DEFAULTS[kwarg]
+
+    def __init__(
+            self, language=None, entity_substitution=None,
+            void_element_close_prefix='/', cdata_containing_tags=None,
+    ):
+        """Constructor.
+
+        :param language: This should be Formatter.XML if you are formatting
+           XML markup and Formatter.HTML if you are formatting HTML markup.
+
+        :param entity_substitution: A function to call to replace special
+           characters with XML/HTML entities. For examples, see 
+           bs4.dammit.EntitySubstitution.substitute_html and substitute_xml.
+        :param void_element_close_prefix: By default, void elements
+           are represented as  (XML rules) rather than 
+           (HTML rules). To get , pass in the empty string.
+        :param cdata_containing_tags: The list of tags that are defined
+           as containing CDATA in this dialect. For example, in HTML,
+           
+
This numeric entity is missing the final semicolon:
+ +
a
+
This document contains (do you see it?)
+
This document ends with That attribute value was bogus
+The doctype is invalid because it contains extra whitespace +
That boolean attribute had no value
+
Here's a nonexistent entity: &#foo; (do you see it?)
+
This document ends before the entity finishes: > +

Paragraphs shouldn't contain block display elements, but this one does:

you see?

+Multiple values for the same attribute. +
Here's a table
+
+
This tag contains nothing but whitespace:
+

This p tag is cut off by

the end of the blockquote tag
+
Here's a nested table:
foo
This table contains bare markup
+ +
This document contains a surprise doctype
+ +
Tag name contains Unicode characters
+ + +""" + + +class SoupTest(unittest.TestCase): + + @property + def default_builder(self): + return default_builder + + def soup(self, markup, **kwargs): + """Build a Beautiful Soup object from markup.""" + builder = kwargs.pop('builder', self.default_builder) + return BeautifulSoup(markup, builder=builder, **kwargs) + + def document_for(self, markup, **kwargs): + """Turn an HTML fragment into a document. + + The details depend on the builder. + """ + return self.default_builder(**kwargs).test_fragment_to_document(markup) + + def assertSoupEquals(self, to_parse, compare_parsed_to=None): + builder = self.default_builder + obj = BeautifulSoup(to_parse, builder=builder) + if compare_parsed_to is None: + compare_parsed_to = to_parse + + # Verify that the documents come out the same. + self.assertEqual(obj.decode(), self.document_for(compare_parsed_to)) + + # Also run some checks on the BeautifulSoup object itself: + + # Verify that every tag that was opened was eventually closed. + + # There are no tags in the open tag counter. + assert all(v==0 for v in list(obj.open_tag_counter.values())) + + # The only tag in the tag stack is the one for the root + # document. + self.assertEqual( + [obj.ROOT_TAG_NAME], [x.name for x in obj.tagStack] + ) + + def assertConnectedness(self, element): + """Ensure that next_element and previous_element are properly + set for all descendants of the given element. + """ + earlier = None + for e in element.descendants: + if earlier: + self.assertEqual(e, earlier.next_element) + self.assertEqual(earlier, e.previous_element) + earlier = e + + def linkage_validator(self, el, _recursive_call=False): + """Ensure proper linkage throughout the document.""" + descendant = None + # Document element should have no previous element or previous sibling. + # It also shouldn't have a next sibling. + if el.parent is None: + assert el.previous_element is None,\ + "Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( + el, el.previous_element, None + ) + assert el.previous_sibling is None,\ + "Bad previous_sibling\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( + el, el.previous_sibling, None + ) + assert el.next_sibling is None,\ + "Bad next_sibling\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format( + el, el.next_sibling, None + ) + + idx = 0 + child = None + last_child = None + last_idx = len(el.contents) - 1 + for child in el.contents: + descendant = None + + # Parent should link next element to their first child + # That child should have no previous sibling + if idx == 0: + if el.parent is not None: + assert el.next_element is child,\ + "Bad next_element\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format( + el, el.next_element, child + ) + assert child.previous_element is el,\ + "Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( + child, child.previous_element, el + ) + assert child.previous_sibling is None,\ + "Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED: {}".format( + child, child.previous_sibling, None + ) + + # If not the first child, previous index should link as sibling to this index + # Previous element should match the last index or the last bubbled up descendant + else: + assert child.previous_sibling is el.contents[idx - 1],\ + "Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED {}".format( + child, child.previous_sibling, el.contents[idx - 1] + ) + assert el.contents[idx - 1].next_sibling is child,\ + "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + el.contents[idx - 1], el.contents[idx - 1].next_sibling, child + ) + + if last_child is not None: + assert child.previous_element is last_child,\ + "Bad previous_element\nNODE: {}\nPREV {}\nEXPECTED {}\nCONTENTS {}".format( + child, child.previous_element, last_child, child.parent.contents + ) + assert last_child.next_element is child,\ + "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + last_child, last_child.next_element, child + ) + + if isinstance(child, Tag) and child.contents: + descendant = self.linkage_validator(child, True) + # A bubbled up descendant should have no next siblings + assert descendant.next_sibling is None,\ + "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + descendant, descendant.next_sibling, None + ) + + # Mark last child as either the bubbled up descendant or the current child + if descendant is not None: + last_child = descendant + else: + last_child = child + + # If last child, there are non next siblings + if idx == last_idx: + assert child.next_sibling is None,\ + "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + child, child.next_sibling, None + ) + idx += 1 + + child = descendant if descendant is not None else child + if child is None: + child = el + + if not _recursive_call and child is not None: + target = el + while True: + if target is None: + assert child.next_element is None, \ + "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + child, child.next_element, None + ) + break + elif target.next_sibling is not None: + assert child.next_element is target.next_sibling, \ + "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( + child, child.next_element, target.next_sibling + ) + break + target = target.parent + + # We are done, so nothing to return + return None + else: + # Return the child to the recursive caller + return child + + +class HTMLTreeBuilderSmokeTest(object): + + """A basic test of a treebuilder's competence. + + Any HTML treebuilder, present or future, should be able to pass + these tests. With invalid markup, there's room for interpretation, + and different parsers can handle it differently. But with the + markup in these tests, there's not much room for interpretation. + """ + + def test_empty_element_tags(self): + """Verify that all HTML4 and HTML5 empty element (aka void element) tags + are handled correctly. + """ + for name in [ + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', + 'spacer', 'frame' + ]: + soup = self.soup("") + new_tag = soup.new_tag(name) + self.assertEqual(True, new_tag.is_empty_element) + + def test_special_string_containers(self): + soup = self.soup( + "" + ) + assert isinstance(soup.style.string, Stylesheet) + assert isinstance(soup.script.string, Script) + + soup = self.soup( + "" + ) + assert isinstance(soup.style.string, Stylesheet) + # The contents of the style tag resemble an HTML comment, but + # it's not treated as a comment. + self.assertEqual("", soup.style.string) + assert isinstance(soup.style.string, Stylesheet) + + def test_pickle_and_unpickle_identity(self): + # Pickling a tree, then unpickling it, yields a tree identical + # to the original. + tree = self.soup("foo") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + self.assertEqual(loaded.__class__, BeautifulSoup) + self.assertEqual(loaded.decode(), tree.decode()) + + def assertDoctypeHandled(self, doctype_fragment): + """Assert that a given doctype string is handled correctly.""" + doctype_str, soup = self._document_with_doctype(doctype_fragment) + + # Make sure a Doctype object was created. + doctype = soup.contents[0] + self.assertEqual(doctype.__class__, Doctype) + self.assertEqual(doctype, doctype_fragment) + self.assertEqual( + soup.encode("utf8")[:len(doctype_str)], + doctype_str + ) + + # Make sure that the doctype was correctly associated with the + # parse tree and that the rest of the document parsed. + self.assertEqual(soup.p.contents[0], 'foo') + + def _document_with_doctype(self, doctype_fragment, doctype_string="DOCTYPE"): + """Generate and parse a document with the given doctype.""" + doctype = '' % (doctype_string, doctype_fragment) + markup = doctype + '\n

foo

' + soup = self.soup(markup) + return doctype.encode("utf8"), soup + + def test_normal_doctypes(self): + """Make sure normal, everyday HTML doctypes are handled correctly.""" + self.assertDoctypeHandled("html") + self.assertDoctypeHandled( + 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') + + def test_empty_doctype(self): + soup = self.soup("") + doctype = soup.contents[0] + self.assertEqual("", doctype.strip()) + + def test_mixed_case_doctype(self): + # A lowercase or mixed-case doctype becomes a Doctype. + for doctype_fragment in ("doctype", "DocType"): + doctype_str, soup = self._document_with_doctype( + "html", doctype_fragment + ) + + # Make sure a Doctype object was created and that the DOCTYPE + # is uppercase. + doctype = soup.contents[0] + self.assertEqual(doctype.__class__, Doctype) + self.assertEqual(doctype, "html") + self.assertEqual( + soup.encode("utf8")[:len(doctype_str)], + b"" + ) + + # Make sure that the doctype was correctly associated with the + # parse tree and that the rest of the document parsed. + self.assertEqual(soup.p.contents[0], 'foo') + + def test_public_doctype_with_url(self): + doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"' + self.assertDoctypeHandled(doctype) + + def test_system_doctype(self): + self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"') + + def test_namespaced_system_doctype(self): + # We can handle a namespaced doctype with a system ID. + self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"') + + def test_namespaced_public_doctype(self): + # Test a namespaced doctype with a public id. + self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"') + + def test_real_xhtml_document(self): + """A real XHTML document should come out more or less the same as it went in.""" + markup = b""" + + +Hello. +Goodbye. +""" + soup = self.soup(markup) + self.assertEqual( + soup.encode("utf-8").replace(b"\n", b""), + markup.replace(b"\n", b"")) + + def test_namespaced_html(self): + """When a namespaced XML document is parsed as HTML it should + be treated as HTML with weird tag names. + """ + markup = b"""content""" + soup = self.soup(markup) + self.assertEqual(2, len(soup.find_all("ns1:foo"))) + + def test_processing_instruction(self): + # We test both Unicode and bytestring to verify that + # process_markup correctly sets processing_instruction_class + # even when the markup is already Unicode and there is no + # need to process anything. + markup = """""" + soup = self.soup(markup) + self.assertEqual(markup, soup.decode()) + + markup = b"""""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + + def test_deepcopy(self): + """Make sure you can copy the tree builder. + + This is important because the builder is part of a + BeautifulSoup object, and we want to be able to copy that. + """ + copy.deepcopy(self.default_builder) + + def test_p_tag_is_never_empty_element(self): + """A

tag is never designated as an empty-element tag. + + Even if the markup shows it as an empty-element tag, it + shouldn't be presented that way. + """ + soup = self.soup("

") + self.assertFalse(soup.p.is_empty_element) + self.assertEqual(str(soup.p), "

") + + def test_unclosed_tags_get_closed(self): + """A tag that's not closed by the end of the document should be closed. + + This applies to all tags except empty-element tags. + """ + self.assertSoupEquals("

", "

") + self.assertSoupEquals("", "") + + self.assertSoupEquals("
", "
") + + def test_br_is_always_empty_element_tag(self): + """A
tag is designated as an empty-element tag. + + Some parsers treat

as one
tag, some parsers as + two tags, but it should always be an empty-element tag. + """ + soup = self.soup("

") + self.assertTrue(soup.br.is_empty_element) + self.assertEqual(str(soup.br), "
") + + def test_nested_formatting_elements(self): + self.assertSoupEquals("") + + def test_double_head(self): + html = ''' + + +Ordinary HEAD element test + + + +Hello, world! + + +''' + soup = self.soup(html) + self.assertEqual("text/javascript", soup.find('script')['type']) + + def test_comment(self): + # Comments are represented as Comment objects. + markup = "

foobaz

" + self.assertSoupEquals(markup) + + soup = self.soup(markup) + comment = soup.find(text="foobar") + self.assertEqual(comment.__class__, Comment) + + # The comment is properly integrated into the tree. + foo = soup.find(text="foo") + self.assertEqual(comment, foo.next_element) + baz = soup.find(text="baz") + self.assertEqual(comment, baz.previous_element) + + def test_preserved_whitespace_in_pre_and_textarea(self): + """Whitespace must be preserved in
 and "
+        self.assertSoupEquals(pre_markup)
+        self.assertSoupEquals(textarea_markup)
+
+        soup = self.soup(pre_markup)
+        self.assertEqual(soup.pre.prettify(), pre_markup)
+
+        soup = self.soup(textarea_markup)
+        self.assertEqual(soup.textarea.prettify(), textarea_markup)
+
+        soup = self.soup("")
+        self.assertEqual(soup.textarea.prettify(), "")
+
+    def test_nested_inline_elements(self):
+        """Inline elements can be nested indefinitely."""
+        b_tag = "Inside a B tag"
+        self.assertSoupEquals(b_tag)
+
+        nested_b_tag = "

A nested tag

" + self.assertSoupEquals(nested_b_tag) + + double_nested_b_tag = "

A doubly nested tag

" + self.assertSoupEquals(nested_b_tag) + + def test_nested_block_level_elements(self): + """Block elements can be nested.""" + soup = self.soup('

Foo

') + blockquote = soup.blockquote + self.assertEqual(blockquote.p.b.string, 'Foo') + self.assertEqual(blockquote.b.string, 'Foo') + + def test_correctly_nested_tables(self): + """One table can go inside another one.""" + markup = ('' + '' + "') + + self.assertSoupEquals( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assertSoupEquals( + "" + "" + "
Foo
Bar
Baz
") + + def test_multivalued_attribute_with_whitespace(self): + # Whitespace separating the values of a multi-valued attribute + # should be ignored. + + markup = '
' + soup = self.soup(markup) + self.assertEqual(['foo', 'bar'], soup.div['class']) + + # If you search by the literal name of the class it's like the whitespace + # wasn't there. + self.assertEqual(soup.div, soup.find('div', class_="foo bar")) + + def test_deeply_nested_multivalued_attribute(self): + # html5lib can set the attributes of the same tag many times + # as it rearranges the tree. This has caused problems with + # multivalued attributes. + markup = '
' + soup = self.soup(markup) + self.assertEqual(["css"], soup.div.div['class']) + + def test_multivalued_attribute_on_html(self): + # html5lib uses a different API to set the attributes ot the + # tag. This has caused problems with multivalued + # attributes. + markup = '' + soup = self.soup(markup) + self.assertEqual(["a", "b"], soup.html['class']) + + def test_angle_brackets_in_attribute_values_are_escaped(self): + self.assertSoupEquals('', '') + + def test_strings_resembling_character_entity_references(self): + # "&T" and "&p" look like incomplete character entities, but they are + # not. + self.assertSoupEquals( + "

• AT&T is in the s&p 500

", + "

\u2022 AT&T is in the s&p 500

" + ) + + def test_apos_entity(self): + self.assertSoupEquals( + "

Bob's Bar

", + "

Bob's Bar

", + ) + + def test_entities_in_foreign_document_encoding(self): + # “ and ” are invalid numeric entities referencing + # Windows-1252 characters. - references a character common + # to Windows-1252 and Unicode, and ☃ references a + # character only found in Unicode. + # + # All of these entities should be converted to Unicode + # characters. + markup = "

“Hello” -☃

" + soup = self.soup(markup) + self.assertEqual("“Hello” -☃", soup.p.string) + + def test_entities_in_attributes_converted_to_unicode(self): + expect = '

' + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + + def test_entities_in_text_converted_to_unicode(self): + expect = '

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + + def test_quot_entity_converted_to_quotation_mark(self): + self.assertSoupEquals("

I said "good day!"

", + '

I said "good day!"

') + + def test_out_of_range_entity(self): + expect = "\N{REPLACEMENT CHARACTER}" + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("

\nfoo

") + self.assertEqual("p", soup.h2.string.next_element.name) + self.assertEqual("p", soup.p.name) + self.assertConnectedness(soup) + + def test_empty_element_tags(self): + """Verify consistent handling of empty-element tags, + no matter how they come in through the markup. + """ + self.assertSoupEquals('


', "


") + self.assertSoupEquals('


', "


") + + def test_head_tag_between_head_and_body(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + foo + +""" + soup = self.soup(content) + self.assertNotEqual(None, soup.html.body) + self.assertConnectedness(soup) + + def test_multiple_copies_of_a_tag(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + + + + +""" + soup = self.soup(content) + self.assertConnectedness(soup.article) + + def test_basic_namespaces(self): + """Parsers don't need to *understand* namespaces, but at the + very least they should not choke on namespaces or lose + data.""" + + markup = b'4' + soup = self.soup(markup) + self.assertEqual(markup, soup.encode()) + html = soup.html + self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) + self.assertEqual( + 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) + self.assertEqual( + 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) + + def test_multivalued_attribute_value_becomes_list(self): + markup = b'' + soup = self.soup(markup) + self.assertEqual(['foo', 'bar'], soup.a['class']) + + # + # Generally speaking, tests below this point are more tests of + # Beautiful Soup than tests of the tree builders. But parsers are + # weird, so we run these tests separately for every tree builder + # to detect any differences between them. + # + + def test_can_parse_unicode_document(self): + # A seemingly innocuous document... but it's in Unicode! And + # it contains characters that can't be represented in the + # encoding found in the declaration! The horror! + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual('Sacr\xe9 bleu!', soup.body.string) + + def test_soupstrainer(self): + """Parsers should be able to work with SoupStrainers.""" + strainer = SoupStrainer("b") + soup = self.soup("A bold statement", + parse_only=strainer) + self.assertEqual(soup.decode(), "bold") + + def test_single_quote_attribute_values_become_double_quotes(self): + self.assertSoupEquals("", + '') + + def test_attribute_values_with_nested_quotes_are_left_alone(self): + text = """a""" + self.assertSoupEquals(text) + + def test_attribute_values_with_double_nested_quotes_get_quoted(self): + text = """a""" + soup = self.soup(text) + soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' + self.assertSoupEquals( + soup.foo.decode(), + """a""") + + def test_ampersand_in_attribute_value_gets_escaped(self): + self.assertSoupEquals('', + '') + + self.assertSoupEquals( + 'foo', + 'foo') + + def test_escaped_ampersand_in_attribute_value_is_left_alone(self): + self.assertSoupEquals('') + + def test_entities_in_strings_converted_during_parsing(self): + # Both XML and HTML entities are converted to Unicode characters + # during parsing. + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" + self.assertSoupEquals(text, expected) + + def test_smart_quotes_converted_on_the_way_in(self): + # Microsoft smart quotes are converted to Unicode characters during + # parsing. + quote = b"

\x91Foo\x92

" + soup = self.soup(quote) + self.assertEqual( + soup.p.string, + "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") + + def test_non_breaking_spaces_converted_on_the_way_in(self): + soup = self.soup("  ") + self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) + + def test_entities_converted_on_the_way_out(self): + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") + soup = self.soup(text) + self.assertEqual(soup.p.encode("utf-8"), expected) + + def test_real_iso_latin_document(self): + # Smoke test of interrelated functionality, using an + # easy-to-understand document. + + # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. + unicode_html = '

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' + + # That's because we're going to encode it into ISO-Latin-1, and use + # that to test. + iso_latin_html = unicode_html.encode("iso-8859-1") + + # Parse the ISO-Latin-1 HTML. + soup = self.soup(iso_latin_html) + # Encode it to UTF-8. + result = soup.encode("utf-8") + + # What do we expect the result to look like? Well, it would + # look like unicode_html, except that the META tag would say + # UTF-8 instead of ISO-Latin-1. + expected = unicode_html.replace("ISO-Latin-1", "utf-8") + + # And, of course, it would be in UTF-8, not Unicode. + expected = expected.encode("utf-8") + + # Ta-da! + self.assertEqual(result, expected) + + def test_real_shift_jis_document(self): + # Smoke test to make sure the parser can handle a document in + # Shift-JIS encoding, without choking. + shift_jis_html = ( + b'
'
+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
+            b'
') + unicode_html = shift_jis_html.decode("shift-jis") + soup = self.soup(unicode_html) + + # Make sure the parse tree is correctly encoded to various + # encodings. + self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) + self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) + + def test_real_hebrew_document(self): + # A real-world test to make sure we can convert ISO-8859-9 (a + # Hebrew encoding) to UTF-8. + hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' + soup = self.soup( + hebrew_document, from_encoding="iso8859-8") + # Some tree builders call it iso8859-8, others call it iso-8859-9. + # That's not a difference we really care about. + assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') + self.assertEqual( + soup.encode('utf-8'), + hebrew_document.decode("iso8859-8").encode("utf-8")) + + def test_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) + content = parsed_meta['content'] + self.assertEqual('text/html; charset=x-sjis', content) + + # But that value is actually a ContentMetaAttributeValue object. + self.assertTrue(isinstance(content, ContentMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('text/html; charset=utf8', content.encode("utf8")) + + # For the rest of the story, see TestSubstitutions in + # test_tree.py. + + def test_html5_style_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', id="encoding") + charset = parsed_meta['charset'] + self.assertEqual('x-sjis', charset) + + # But that value is actually a CharsetMetaAttributeValue object. + self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('utf8', charset.encode("utf8")) + + def test_python_specific_encodings_not_used_in_charset(self): + # You can encode an HTML document using a Python-specific + # encoding, but that encoding won't be mentioned _inside_ the + # resulting document. Instead, the document will appear to + # have no encoding. + for markup in [ + b'' + b'' + ]: + soup = self.soup(markup) + for encoding in PYTHON_SPECIFIC_ENCODINGS: + if encoding in ( + 'idna', 'mbcs', 'oem', 'undefined', + 'string_escape', 'string-escape' + ): + # For one reason or another, these will raise an + # exception if we actually try to use them, so don't + # bother. + continue + encoded = soup.encode(encoding) + assert b'meta charset=""' in encoded + assert encoding.encode("ascii") not in encoded + + def test_tag_with_no_attributes_can_have_attributes_added(self): + data = self.soup("text") + data.a['foo'] = 'bar' + self.assertEqual('text', data.a.decode()) + + def test_closing_tag_with_no_opening_tag(self): + # Without BeautifulSoup.open_tag_counter, the tag will + # cause _popToTag to be called over and over again as we look + # for a tag that wasn't there. The result is that 'text2' + # will show up outside the body of the document. + soup = self.soup("

text1

text2
") + self.assertEqual( + "

text1

text2
", soup.body.decode() + ) + + def test_worst_case(self): + """Test the worst case (currently) for linking issues.""" + + soup = self.soup(BAD_DOCUMENT) + self.linkage_validator(soup) + + +class XMLTreeBuilderSmokeTest(object): + + def test_pickle_and_unpickle_identity(self): + # Pickling a tree, then unpickling it, yields a tree identical + # to the original. + tree = self.soup("foo") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + self.assertEqual(loaded.__class__, BeautifulSoup) + self.assertEqual(loaded.decode(), tree.decode()) + + def test_docstring_generated(self): + soup = self.soup("") + self.assertEqual( + soup.encode(), b'\n') + + def test_xml_declaration(self): + markup = b"""\n""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + + def test_python_specific_encodings_not_used_in_xml_declaration(self): + # You can encode an XML document using a Python-specific + # encoding, but that encoding won't be mentioned _inside_ the + # resulting document. + markup = b"""\n""" + soup = self.soup(markup) + for encoding in PYTHON_SPECIFIC_ENCODINGS: + if encoding in ( + 'idna', 'mbcs', 'oem', 'undefined', + 'string_escape', 'string-escape' + ): + # For one reason or another, these will raise an + # exception if we actually try to use them, so don't + # bother. + continue + encoded = soup.encode(encoding) + assert b'' in encoded + assert encoding.encode("ascii") not in encoded + + def test_processing_instruction(self): + markup = b"""\n""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + + def test_real_xhtml_document(self): + """A real XHTML document should come out *exactly* the same as it went in.""" + markup = b""" + + +Hello. +Goodbye. +""" + soup = self.soup(markup) + self.assertEqual( + soup.encode("utf-8"), markup) + + def test_nested_namespaces(self): + doc = b""" + + + + + +""" + soup = self.soup(doc) + self.assertEqual(doc, soup.encode()) + + def test_formatter_processes_script_tag_for_xml_documents(self): + doc = """ + +""" + soup = BeautifulSoup(doc, "lxml-xml") + # lxml would have stripped this while parsing, but we can add + # it later. + soup.script.string = 'console.log("< < hey > > ");' + encoded = soup.encode() + self.assertTrue(b"< < hey > >" in encoded) + + def test_can_parse_unicode_document(self): + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual('Sacr\xe9 bleu!', soup.root.string) + + def test_popping_namespaced_tag(self): + markup = 'b2012-07-02T20:33:42Zcd' + soup = self.soup(markup) + self.assertEqual( + str(soup.rss), markup) + + def test_docstring_includes_correct_encoding(self): + soup = self.soup("") + self.assertEqual( + soup.encode("latin1"), + b'\n') + + def test_large_xml_document(self): + """A large XML document should come out the same as it went in.""" + markup = (b'\n' + + b'0' * (2**12) + + b'') + soup = self.soup(markup) + self.assertEqual(soup.encode("utf-8"), markup) + + + def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): + self.assertSoupEquals("

", "

") + self.assertSoupEquals("

foo

") + + def test_namespaces_are_preserved(self): + markup = 'This tag is in the a namespaceThis tag is in the b namespace' + soup = self.soup(markup) + root = soup.root + self.assertEqual("http://example.com/", root['xmlns:a']) + self.assertEqual("http://example.net/", root['xmlns:b']) + + def test_closing_namespaced_tag(self): + markup = '

20010504

' + soup = self.soup(markup) + self.assertEqual(str(soup.p), markup) + + def test_namespaced_attributes(self): + markup = '' + soup = self.soup(markup) + self.assertEqual(str(soup.foo), markup) + + def test_namespaced_attributes_xml_namespace(self): + markup = 'bar' + soup = self.soup(markup) + self.assertEqual(str(soup.foo), markup) + + def test_find_by_prefixed_name(self): + doc = """ +foo + bar + baz + +""" + soup = self.soup(doc) + + # There are three tags. + self.assertEqual(3, len(soup.find_all('tag'))) + + # But two of them are ns1:tag and one of them is ns2:tag. + self.assertEqual(2, len(soup.find_all('ns1:tag'))) + self.assertEqual(1, len(soup.find_all('ns2:tag'))) + + self.assertEqual(1, len(soup.find_all('ns2:tag', key='value'))) + self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag']))) + + def test_copy_tag_preserves_namespace(self): + xml = """ +""" + + soup = self.soup(xml) + tag = soup.document + duplicate = copy.copy(tag) + + # The two tags have the same namespace prefix. + self.assertEqual(tag.prefix, duplicate.prefix) + + def test_worst_case(self): + """Test the worst case (currently) for linking issues.""" + + soup = self.soup(BAD_DOCUMENT) + self.linkage_validator(soup) + + +class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): + """Smoke test for a tree builder that supports HTML5.""" + + def test_real_xhtml_document(self): + # Since XHTML is not HTML5, HTML5 parsers are not tested to handle + # XHTML documents in any particular way. + pass + + def test_html_tags_have_namespace(self): + markup = "" + soup = self.soup(markup) + self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) + + def test_svg_tags_have_namespace(self): + markup = '' + soup = self.soup(markup) + namespace = "http://www.w3.org/2000/svg" + self.assertEqual(namespace, soup.svg.namespace) + self.assertEqual(namespace, soup.circle.namespace) + + + def test_mathml_tags_have_namespace(self): + markup = '5' + soup = self.soup(markup) + namespace = 'http://www.w3.org/1998/Math/MathML' + self.assertEqual(namespace, soup.math.namespace) + self.assertEqual(namespace, soup.msqrt.namespace) + + def test_xml_declaration_becomes_comment(self): + markup = '' + soup = self.soup(markup) + self.assertTrue(isinstance(soup.contents[0], Comment)) + self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') + self.assertEqual("html", soup.contents[0].next_element.name) + +def skipIf(condition, reason): + def nothing(test, *args, **kwargs): + return None + + def decorator(test_item): + if condition: + return nothing + else: + return test_item + + return decorator diff --git a/venv/lib/python3.6/site-packages/bs4/tests/__init__.py b/venv/lib/python3.6/site-packages/bs4/tests/__init__.py new file mode 100644 index 0000000..142c8cc --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/tests/__init__.py @@ -0,0 +1 @@ +"The beautifulsoup tests." diff --git a/venv/lib/python3.6/site-packages/bs4/tests/test_builder_registry.py b/venv/lib/python3.6/site-packages/bs4/tests/test_builder_registry.py new file mode 100644 index 0000000..90cad82 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/tests/test_builder_registry.py @@ -0,0 +1,147 @@ +"""Tests of the builder registry.""" + +import unittest +import warnings + +from bs4 import BeautifulSoup +from bs4.builder import ( + builder_registry as registry, + HTMLParserTreeBuilder, + TreeBuilderRegistry, +) + +try: + from bs4.builder import HTML5TreeBuilder + HTML5LIB_PRESENT = True +except ImportError: + HTML5LIB_PRESENT = False + +try: + from bs4.builder import ( + LXMLTreeBuilderForXML, + LXMLTreeBuilder, + ) + LXML_PRESENT = True +except ImportError: + LXML_PRESENT = False + + +class BuiltInRegistryTest(unittest.TestCase): + """Test the built-in registry with the default builders registered.""" + + def test_combination(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('fast', 'html'), + LXMLTreeBuilder) + + if LXML_PRESENT: + self.assertEqual(registry.lookup('permissive', 'xml'), + LXMLTreeBuilderForXML) + self.assertEqual(registry.lookup('strict', 'html'), + HTMLParserTreeBuilder) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html5lib', 'html'), + HTML5TreeBuilder) + + def test_lookup_by_markup_type(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) + self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) + else: + self.assertEqual(registry.lookup('xml'), None) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) + else: + self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) + + def test_named_library(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('lxml', 'xml'), + LXMLTreeBuilderForXML) + self.assertEqual(registry.lookup('lxml', 'html'), + LXMLTreeBuilder) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html5lib'), + HTML5TreeBuilder) + + self.assertEqual(registry.lookup('html.parser'), + HTMLParserTreeBuilder) + + def test_beautifulsoup_constructor_does_lookup(self): + + with warnings.catch_warnings(record=True) as w: + # This will create a warning about not explicitly + # specifying a parser, but we'll ignore it. + + # You can pass in a string. + BeautifulSoup("", features="html") + # Or a list of strings. + BeautifulSoup("", features=["html", "fast"]) + + # You'll get an exception if BS can't find an appropriate + # builder. + self.assertRaises(ValueError, BeautifulSoup, + "", features="no-such-feature") + +class RegistryTest(unittest.TestCase): + """Test the TreeBuilderRegistry class in general.""" + + def setUp(self): + self.registry = TreeBuilderRegistry() + + def builder_for_features(self, *feature_list): + cls = type('Builder_' + '_'.join(feature_list), + (object,), {'features' : feature_list}) + + self.registry.register(cls) + return cls + + def test_register_with_no_features(self): + builder = self.builder_for_features() + + # Since the builder advertises no features, you can't find it + # by looking up features. + self.assertEqual(self.registry.lookup('foo'), None) + + # But you can find it by doing a lookup with no features, if + # this happens to be the only registered builder. + self.assertEqual(self.registry.lookup(), builder) + + def test_register_with_features_makes_lookup_succeed(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEqual(self.registry.lookup('foo'), builder) + self.assertEqual(self.registry.lookup('bar'), builder) + + def test_lookup_fails_when_no_builder_implements_feature(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEqual(self.registry.lookup('baz'), None) + + def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): + builder1 = self.builder_for_features('foo') + builder2 = self.builder_for_features('bar') + self.assertEqual(self.registry.lookup(), builder2) + + def test_lookup_fails_when_no_tree_builders_registered(self): + self.assertEqual(self.registry.lookup(), None) + + def test_lookup_gets_most_recent_builder_supporting_all_features(self): + has_one = self.builder_for_features('foo') + has_the_other = self.builder_for_features('bar') + has_both_early = self.builder_for_features('foo', 'bar', 'baz') + has_both_late = self.builder_for_features('foo', 'bar', 'quux') + lacks_one = self.builder_for_features('bar') + has_the_other = self.builder_for_features('foo') + + # There are two builders featuring 'foo' and 'bar', but + # the one that also features 'quux' was registered later. + self.assertEqual(self.registry.lookup('foo', 'bar'), + has_both_late) + + # There is only one builder featuring 'foo', 'bar', and 'baz'. + self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), + has_both_early) + + def test_lookup_fails_when_cannot_reconcile_requested_features(self): + builder1 = self.builder_for_features('foo', 'bar') + builder2 = self.builder_for_features('foo', 'baz') + self.assertEqual(self.registry.lookup('bar', 'baz'), None) diff --git a/venv/lib/python3.6/site-packages/bs4/tests/test_docs.py b/venv/lib/python3.6/site-packages/bs4/tests/test_docs.py new file mode 100644 index 0000000..5b9f677 --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/tests/test_docs.py @@ -0,0 +1,36 @@ +"Test harness for doctests." + +# pylint: disable-msg=E0611,W0142 + +__metaclass__ = type +__all__ = [ + 'additional_tests', + ] + +import atexit +import doctest +import os +#from pkg_resources import ( +# resource_filename, resource_exists, resource_listdir, cleanup_resources) +import unittest + +DOCTEST_FLAGS = ( + doctest.ELLIPSIS | + doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) + + +# def additional_tests(): +# "Run the doc tests (README.txt and docs/*, if any exist)" +# doctest_files = [ +# os.path.abspath(resource_filename('bs4', 'README.txt'))] +# if resource_exists('bs4', 'docs'): +# for name in resource_listdir('bs4', 'docs'): +# if name.endswith('.txt'): +# doctest_files.append( +# os.path.abspath( +# resource_filename('bs4', 'docs/%s' % name))) +# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) +# atexit.register(cleanup_resources) +# return unittest.TestSuite(( +# doctest.DocFileSuite(*doctest_files, **kwargs))) diff --git a/venv/lib/python3.6/site-packages/bs4/tests/test_html5lib.py b/venv/lib/python3.6/site-packages/bs4/tests/test_html5lib.py new file mode 100644 index 0000000..b77659b --- /dev/null +++ b/venv/lib/python3.6/site-packages/bs4/tests/test_html5lib.py @@ -0,0 +1,190 @@ +"""Tests to ensure that the html5lib tree builder generates good trees.""" + +import warnings + +try: + from bs4.builder import HTML5TreeBuilder + HTML5LIB_PRESENT = True +except ImportError as e: + HTML5LIB_PRESENT = False +from bs4.element import SoupStrainer +from bs4.testing import ( + HTML5TreeBuilderSmokeTest, + SoupTest, + skipIf, +) + +@skipIf( + not HTML5LIB_PRESENT, + "html5lib seems not to be present, not testing its tree builder.") +class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): + """See ``HTML5TreeBuilderSmokeTest``.""" + + @property + def default_builder(self): + return HTML5TreeBuilder + + def test_soupstrainer(self): + # The html5lib tree builder does not support SoupStrainers. + strainer = SoupStrainer("b") + markup = "

A bold statement.

" + with warnings.catch_warnings(record=True) as w: + soup = self.soup(markup, parse_only=strainer) + self.assertEqual( + soup.decode(), self.document_for(markup)) + + self.assertTrue( + "the html5lib tree builder doesn't support parse_only" in + str(w[0].message)) + + def test_correctly_nested_tables(self): + """html5lib inserts tags where other parsers don't.""" + markup = ('' + '' + "') + + self.assertSoupEquals( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assertSoupEquals( + "" + "" + "
Foo
Bar
Baz
") + + def test_xml_declaration_followed_by_doctype(self): + markup = ''' + + + + + +

foo

+ +''' + soup = self.soup(markup) + # Verify that we can reach the

tag; this means the tree is connected. + self.assertEqual(b"

foo

", soup.p.encode()) + + def test_reparented_markup(self): + markup = '

foo

\n

bar

' + soup = self.soup(markup) + self.assertEqual("

foo

\n

bar

", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) + + + def test_reparented_markup_ends_with_whitespace(self): + markup = '

foo

\n

bar

\n' + soup = self.soup(markup) + self.assertEqual("

foo

\n

bar

\n", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) + + def test_reparented_markup_containing_identical_whitespace_nodes(self): + """Verify that we keep the two whitespace nodes in this + document distinct when reparenting the adjacent tags. + """ + markup = '
' + soup = self.soup(markup) + space1, space2 = soup.find_all(string=' ') + tbody1, tbody2 = soup.find_all('tbody') + assert space1.next_element is tbody1 + assert tbody2.next_element is space2 + + def test_reparented_markup_containing_children(self): + markup = '' + soup = self.soup(markup) + noscript = soup.noscript + self.assertEqual("target", noscript.next_element) + target = soup.find(string='target') + + # The 'aftermath' string was duplicated; we want the second one. + final_aftermath = soup.find_all(string='aftermath')[-1] + + # The