Skip to content
This repository is currently being migrated. It's locked while the migration is in progress.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Release

on:
push:
tags:
- "*"

jobs:
release:
runs-on: ubuntu-18.04
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install build packages
run: python -m pip install -U build
- name: Build packages
run: python -m build
- name: Create Release
uses: softprops/action-gh-release@v1
with:
files: |
dist/*.tar.gz
dist/*.whl
- name: Install system pkgs
run: sudo apt-get update && sudo apt-get install awscli
- name: Upload to S3
env:
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
AWS_PACKAGE_BUCKET: ${{ secrets.AWS_PACKAGE_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: for F in dist/*; do /usr/bin/aws s3 cp ${F} s3://${AWS_PACKAGE_BUCKET}/python/ ; done
22 changes: 22 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Run Tests

on: [push, pull_request]

jobs:
test:
strategy:
matrix:
python: ['3.6', '3.8']
platform: [ubuntu-18.04]
runs-on: ${{ matrix.platform }}
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Install test dependencies
run: python -m pip install -U tox
- name: Test
run: python -m tox -e py
164 changes: 148 additions & 16 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,27 +1,159 @@
*.py[co]

# Packages
*.egg
*.egg-info
dist
build
eggs
parts
bin
var
sdist
develop-eggs
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.tox
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

#Translations
# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

#Mr Developer
.mr.developer.cfg
# End of https://www.toptal.com/developers/gitignore/api/python
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,28 @@ Parse email addresses from an outlook-like "To:" field.
The nice thing about this parser is you could give it a ridiculously jacked up list of emails: no commas, no names, names and emails stuck together (no spaces), and it'll still parse that string and give you a pretty list back.


To use:
To use:

from eparser.parsers import email_address_parser

...

emails = email_address_parser.parse(some_email_list)


Alternatively, you can instantiate your own with a list of additional bad tokens you want stripped from names:

from eparser.parsers import EmailAddressParser
from eparser.parsers import EmailAddressParser

parser = EmailAddressParser(bad_tokens=[";"])

emails = parser.parse(some_email_list)

String and Unicode representations are in the format: `"Foo Bar" <foo@bar.com>`
String and Unicode representations are in the format: `"Foo Bar" <foo@bar.com>`
If there is no name, it's just the email.

Change Log
====================


* 1.0.1: 2022-02 Fix warnings under Python 3.6-3.10. Add test automation.
* 1.0.0: Add six for compatibility across Python 2-3.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="eparser",
version="1.0.0",
version="1.0.1",
description="Generic Email-Address List Parser",
url="https://github.com/HireIQ/email-address-parser",
author="Alex Milstead",
Expand Down
4 changes: 2 additions & 2 deletions src/eparser/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


# RFC 2822 compliant (mostly) regular expression for pulling email addresses. (http://snipplr.com/view/19594/)
EMAIL_RE = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'''
EMAIL_RE = r'''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'''


@python_2_unicode_compatible
Expand All @@ -23,7 +23,7 @@ def __repr__(self):

class EmailAddressParser:
def __init__(self, bad_tokens=None):
self._splitter = re.compile("\s")
self._splitter = re.compile(r"\s")
self._splitter_tokens = ["<", ">", "\"", "'", ",", " "]
self._email_re = re.compile(EMAIL_RE, re.IGNORECASE)

Expand Down
34 changes: 17 additions & 17 deletions src/eparser/tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import unittest
from parsers import EmailAddressParser
from eparser.parsers import EmailAddressParser


class TestEmailAddressParser(unittest.TestCase):
Expand All @@ -15,44 +15,44 @@ def test_as_unicode(self):

first_pair, second_pair = pairs[0], pairs[1]

self.failIf(first_pair.name != u"Foo Bar")
self.assertFalse(first_pair.name != u"Foo Bar")

self.failIf(first_pair.email != u"foo@bar.com")
self.assertFalse(first_pair.email != u"foo@bar.com")

self.failIf(second_pair.name != u"Foo Baz")
self.assertFalse(second_pair.name != u"Foo Baz")

self.failIf(second_pair.email != u"foo@baz.com")
self.assertFalse(second_pair.email != u"foo@baz.com")

def test_common_cases(self):
emails = """\"Bar, Foo\" <foo@bar.com>, Foo Bar <foo@bar.com>, \"Foo B.\" <foo@bar.com> foo@bar.com, <foo@bar.com>, \"\"foo@bar.com, \"\"<foo@bar.com>"""

pairs = self.parser.parse(emails)

self.failIf(len(pairs) != 7)
self.assertFalse(len(pairs) != 7)

for pair in pairs:
self.failIf(pair.email != "foo@bar.com")
self.assertFalse(pair.email != "foo@bar.com")

self.failIf(pairs[0].name != "Bar, Foo")
self.assertFalse(pairs[0].name != "Bar, Foo")

self.failIf(pairs[1].name != "Foo Bar")
self.assertFalse(pairs[1].name != "Foo Bar")

self.failIf(pairs[2].name != "Foo B.")
self.assertFalse(pairs[2].name != "Foo B.")

self.failIf(pairs[3].name != "")
self.assertFalse(pairs[3].name != "")

def test_edge_cases(self):
emails = '"Foo Bar"<foo@bar.com>,"Bar Baz"<bar@baz.org>,"Bar Baz"<bar@baz.org>'
pairs = self.parser.parse(emails)

self.failIf(pairs[0].name != "Foo Bar")
self.assertFalse(pairs[0].name != "Foo Bar")

self.failIf(pairs[0].email != "foo@bar.com")
self.assertFalse(pairs[0].email != "foo@bar.com")

self.failIf(pairs[1].name != "Bar Baz")
self.assertFalse(pairs[1].name != "Bar Baz")

self.failIf(pairs[1].email != "bar@baz.org")
self.assertFalse(pairs[1].email != "bar@baz.org")

self.failIf(pairs[2].name != "Bar Baz")
self.assertFalse(pairs[2].name != "Bar Baz")

self.failIf(pairs[2].email != "bar@baz.org")
self.assertFalse(pairs[2].email != "bar@baz.org")
16 changes: 16 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tox]
skip_missing_interpreters = true
envlist = {py36,py37,py38,py39,py310}

[testenv]
commands = python -m pytest src/eparser/tests.py
deps =
pytest>=6.2.2

[gh-actions]
python =
3.6: py36
3.7: py37
3.8: py38
3.9: py39
3.10: py310