From 378aa54eed51c22182e760aaa4487798423d0513 Mon Sep 17 00:00:00 2001 From: Vince Veselosky Date: Fri, 11 Feb 2022 13:22:34 -0500 Subject: [PATCH 1/6] Update gitignore --- .gitignore | 164 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 148 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index f24cd99..7079be9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,27 +1,159 @@ -*.py[co] -# Packages -*.egg -*.egg-info -dist -build -eggs -parts -bin -var -sdist -develop-eggs +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ .installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec # Installer logs pip-log.txt +pip-delete-this-directory.txt # Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ .coverage -.tox +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ -#Translations +# Translations *.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ -#Mr Developer -.mr.developer.cfg +# End of https://www.toptal.com/developers/gitignore/api/python From 80accd4a05fe9ef59fa3f822c9e765e875757c28 Mon Sep 17 00:00:00 2001 From: Vince Veselosky Date: Fri, 11 Feb 2022 13:23:12 -0500 Subject: [PATCH 2/6] Fix tests for py3 and eliminate deprecations --- src/eparser/tests.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/eparser/tests.py b/src/eparser/tests.py index c84a22b..83db269 100644 --- a/src/eparser/tests.py +++ b/src/eparser/tests.py @@ -1,5 +1,5 @@ import unittest -from parsers import EmailAddressParser +from eparser.parsers import EmailAddressParser class TestEmailAddressParser(unittest.TestCase): @@ -15,44 +15,44 @@ def test_as_unicode(self): first_pair, second_pair = pairs[0], pairs[1] - self.failIf(first_pair.name != u"Foo Bar") + self.assertFalse(first_pair.name != u"Foo Bar") - self.failIf(first_pair.email != u"foo@bar.com") + self.assertFalse(first_pair.email != u"foo@bar.com") - self.failIf(second_pair.name != u"Foo Baz") + self.assertFalse(second_pair.name != u"Foo Baz") - self.failIf(second_pair.email != u"foo@baz.com") + self.assertFalse(second_pair.email != u"foo@baz.com") def test_common_cases(self): emails = """\"Bar, Foo\" , Foo Bar , \"Foo B.\" foo@bar.com, , \"\"foo@bar.com, \"\"""" pairs = self.parser.parse(emails) - self.failIf(len(pairs) != 7) + self.assertFalse(len(pairs) != 7) for pair in pairs: - self.failIf(pair.email != "foo@bar.com") + self.assertFalse(pair.email != "foo@bar.com") - self.failIf(pairs[0].name != "Bar, Foo") + self.assertFalse(pairs[0].name != "Bar, Foo") - self.failIf(pairs[1].name != "Foo Bar") + self.assertFalse(pairs[1].name != "Foo Bar") - self.failIf(pairs[2].name != "Foo B.") + self.assertFalse(pairs[2].name != "Foo B.") - self.failIf(pairs[3].name != "") + self.assertFalse(pairs[3].name != "") def test_edge_cases(self): emails = '"Foo Bar","Bar Baz","Bar Baz"' pairs = self.parser.parse(emails) - self.failIf(pairs[0].name != "Foo Bar") + self.assertFalse(pairs[0].name != "Foo Bar") - self.failIf(pairs[0].email != "foo@bar.com") + self.assertFalse(pairs[0].email != "foo@bar.com") - self.failIf(pairs[1].name != "Bar Baz") + self.assertFalse(pairs[1].name != "Bar Baz") - self.failIf(pairs[1].email != "bar@baz.org") + self.assertFalse(pairs[1].email != "bar@baz.org") - self.failIf(pairs[2].name != "Bar Baz") + self.assertFalse(pairs[2].name != "Bar Baz") - self.failIf(pairs[2].email != "bar@baz.org") \ No newline at end of file + self.assertFalse(pairs[2].email != "bar@baz.org") \ No newline at end of file From d35fa6b42e22e9c5b33b13681aa2b52e88bddebc Mon Sep 17 00:00:00 2001 From: Vince Veselosky Date: Fri, 11 Feb 2022 13:23:36 -0500 Subject: [PATCH 3/6] Eliminate warnings under recent pythons --- src/eparser/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eparser/parsers.py b/src/eparser/parsers.py index 828d7ee..a6431a3 100644 --- a/src/eparser/parsers.py +++ b/src/eparser/parsers.py @@ -3,7 +3,7 @@ # RFC 2822 compliant (mostly) regular expression for pulling email addresses. (http://snipplr.com/view/19594/) -EMAIL_RE = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' +EMAIL_RE = r'''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' @python_2_unicode_compatible @@ -23,7 +23,7 @@ def __repr__(self): class EmailAddressParser: def __init__(self, bad_tokens=None): - self._splitter = re.compile("\s") + self._splitter = re.compile(r"\s") self._splitter_tokens = ["<", ">", "\"", "'", ",", " "] self._email_re = re.compile(EMAIL_RE, re.IGNORECASE) From 7eff21b6be6a4bf1e977c2a65f88799e2639b9f7 Mon Sep 17 00:00:00 2001 From: Vince Veselosky Date: Fri, 11 Feb 2022 13:23:55 -0500 Subject: [PATCH 4/6] toxify --- tox.ini | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tox.ini diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..eda020d --- /dev/null +++ b/tox.ini @@ -0,0 +1,16 @@ +[tox] +skip_missing_interpreters = true +envlist = {py36,py37,py38,py39,py310} + +[testenv] +commands = python -m pytest src/eparser/tests.py +deps = + pytest>=6.2.2 + +[gh-actions] +python = + 3.6: py36 + 3.7: py37 + 3.8: py38 + 3.9: py39 + 3.10: py310 From df6e4f9e5633419578c9df8b48c6d76c31bdf760 Mon Sep 17 00:00:00 2001 From: Vince Veselosky Date: Fri, 11 Feb 2022 13:39:28 -0500 Subject: [PATCH 5/6] Add gh-actions workflows --- .github/workflows/release.yml | 36 +++++++++++++++++++++++++++++++++++ .github/workflows/tests.yml | 22 +++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 .github/workflows/release.yml create mode 100644 .github/workflows/tests.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..458c50c --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,36 @@ +name: Release + +on: + push: + tags: + - "*" + +jobs: + release: + runs-on: ubuntu-18.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install build packages + run: python -m pip install -U build + - name: Build packages + run: python -m build + - name: Create Release + uses: softprops/action-gh-release@v1 + with: + files: | + dist/*.tar.gz + dist/*.whl + - name: Install system pkgs + run: sudo apt-get update && sudo apt-get install awscli + - name: Upload to S3 + env: + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + AWS_PACKAGE_BUCKET: ${{ secrets.AWS_PACKAGE_BUCKET }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + run: for F in dist/*; do /usr/bin/aws s3 cp ${F} s3://${AWS_PACKAGE_BUCKET}/python/ ; done diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..6205939 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,22 @@ +name: Run Tests + +on: [push, pull_request] + +jobs: + test: + strategy: + matrix: + python: ['3.6', '3.8'] + platform: [ubuntu-18.04] + runs-on: ${{ matrix.platform }} + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install test dependencies + run: python -m pip install -U tox + - name: Test + run: python -m tox -e py From 939466232e9ecd33c8260814be3b2d9656a9ca6c Mon Sep 17 00:00:00 2001 From: Vince Veselosky Date: Fri, 11 Feb 2022 13:43:53 -0500 Subject: [PATCH 6/6] Bump patch version --- README.md | 19 +++++++++++-------- setup.py | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 32e430e..b76e598 100644 --- a/README.md +++ b/README.md @@ -8,25 +8,28 @@ Parse email addresses from an outlook-like "To:" field. The nice thing about this parser is you could give it a ridiculously jacked up list of emails: no commas, no names, names and emails stuck together (no spaces), and it'll still parse that string and give you a pretty list back. -To use: +To use: from eparser.parsers import email_address_parser - + ... - + emails = email_address_parser.parse(some_email_list) Alternatively, you can instantiate your own with a list of additional bad tokens you want stripped from names: - from eparser.parsers import EmailAddressParser - + from eparser.parsers import EmailAddressParser + parser = EmailAddressParser(bad_tokens=[";"]) - + emails = parser.parse(some_email_list) -String and Unicode representations are in the format: `"Foo Bar" ` +String and Unicode representations are in the format: `"Foo Bar" ` If there is no name, it's just the email. +Change Log +==================== - +* 1.0.1: 2022-02 Fix warnings under Python 3.6-3.10. Add test automation. +* 1.0.0: Add six for compatibility across Python 2-3. diff --git a/setup.py b/setup.py index 474a8c4..9fbc205 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="eparser", - version="1.0.0", + version="1.0.1", description="Generic Email-Address List Parser", url="https://github.com/HireIQ/email-address-parser", author="Alex Milstead",