ozmig77 · Joshua0321 · Aug 24, 2018 · Aug 24, 2018 · Oct 1, 2018 · Apr 8, 2019
diff --git a/.idea/deployment.xml b/.idea/deployment.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1,2 @@
 include g2p_en/homographs.en
-include g2p_en/logdir/*
+include g2p_en/checkpoint20.npz
diff --git a/README.md b/README.md
@@ -1,4 +1,9 @@
-# g2p_en: A Simple Python Module for English Grapheme To Phoneme Conversion
+[![image](https://img.shields.io/pypi/v/g2p-en.svg)](https://pypi.org/project/g2p-en/)
+[![image](https://img.shields.io/pypi/l/g2p-en.svg)](https://pypi.org/project/g2p-en/)
+
+# g2pE: A Simple Python Module for English Grapheme To Phoneme Conversion
+
+* [v.2.0] We removed TensorFlow from the dependencies. After all, it changes its APIs quite often, and we don't expect you to have a GPU. Instead, NumPy is used for inference.
 
 This module is designed to convert English graphemes (spelling) to phonemes (pronunciation).
 It is considered essential in several tasks such as speech synthesis.
@@ -20,23 +25,21 @@ In this project, we employ a deep learning seq2seq framework based on TensorFlow
 ## Algorithm
 
 1. Spells out arabic numbers and some currency symbols. (e.g. $200 -> two hundred dollars) (This is borrowed from [Keith Ito's code](https://github.com/keithito/tacotron/blob/master/text/numbers.py))
-2. Attempts to retrieve the correct pronunciation for homographs based on their POS)
+2. Attempts to retrieve the correct pronunciation for heteronyms based on their POS)
 3. Looks up [The CMU Pronouncing Dictionary](http://www.speech.cs.cmu.edu/cgi-bin/cmudict) for non-homographs.
 4. For OOVs, we predict their pronunciations using our neural net model.
 
 ## Environment
 
-* python 2.x or 3.x
+* python 3.x
 
 ## Dependencies
 
 * numpy >= 1.13.1
-* tensorflow >= 1.3.0
 * nltk >= 3.2.4
 * python -m nltk.downloader "averaged_perceptron_tagger" "cmudict"
 * inflect >= 0.3.1
 * Distance >= 0.1.3
-* future >= 0.16.0
 
 ## Installation
 
@@ -47,28 +50,41 @@ OR
 
 nltk package will be automatically downloaded at your first run.
 
-## Training (Note that pretrained model is already included)
-
-    python train.py
 
 ## Usage
 
-    from g2p_en import g2p
-
-    text = "I refuse to collect the refuse around here."
-    print(g2p(text))
-    >>>[u'AY1', ' ', u'R', u'IH0', u'F', u'Y', u'UW1', u'Z', ' ', u'T', u'UW1', ' ', u'K', u'AH0', u'L', u'EH1', u'K', u'T', ' ', u'DH', u'AH0', ' ', u'R', u'EH1', u'F', u'Y', u'UW2', u'Z', ' ', u'ER0', u'AW1', u'N', u'D', ' ', u'HH', u'EH1', u'R']
-
-    text = "I am an activationist."
-    print(g2p(text))
-    >>>[u'AY1', u'M', ' ', u'AE1', u'N', ' ', u'AE2', u'K', u'T', u'AH0', u'V', u'EY1', u'SH', u'AH0', u'N', u'IH0', u'S', u'T']
-
-If you need to convert lots of texts, you can use the global tf session.
-
-    import g2p_en as g2p
-
-    with g2p.Session():
-        phs = [g2p.g2p(text) for text in texts]
+    from g2p_en import G2p
+
+    texts = ["I have $250 in my pocket.", # number -> spell-out
+             "popular pets, e.g. cats and dogs", # e.g. -> for example
+             "I refuse to collect the refuse around here.", # homograph
+             "I'm an activationist."] # newly coined word
+    g2p = G2p()
+    for text in texts:
+        out = g2p(text)
+        print(out)
+    >>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.']
+    >>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z']
+    >>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.']
+    >>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.']
+
+## References
+
+If you use this code for research, please cite:
+
+```
+@misc{g2pE2019,
+  author = {Park, Kyubyong & Kim, Jongseok},
+  title = {g2pE},
+  year = {2019},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/Kyubyong/g2p}}
+}
+```
+
+## Cited in
+* [Learning pronunciation from a foreign language in speech synthesis networks](https://arxiv.org/abs/1811.09364)
 
 May, 2018.
 

diff --git a/README.rst b/README.rst
@@ -1,6 +1,8 @@
 g2p\_en: A Simple Python Module for English Grapheme To Phoneme Conversion
 ==========================================================================
 
+[Update] * We removed TensorFlow from the dependencies. After all, it changes its APIs quite often, and we don't expect you to have a GPU. Instead, NumPy is used for inference.
+
 This module is designed to convert English graphemes (spelling) to
 phonemes (pronunciation). It is considered essential in several tasks
 such as speech synthesis. Unlike many languages like Spanish or German
@@ -42,18 +44,16 @@ Algorithm
 Environment
 -----------
 
--  python 2.x or 3.x
+-  python 3.x
 
 Dependencies
 ------------
 
 -  numpy >= 1.13.1
--  tensorflow >= 1.3.0
 -  nltk >= 3.2.4
 -  python -m nltk.downloader "averaged\_perceptron\_tagger" "cmudict"
 -  inflect >= 0.3.1
 -  Distance >= 0.1.3
--  future >= 0.16.0
 
 Installation
 ------------
@@ -70,36 +70,27 @@ OR
 
 nltk package will be automatically downloaded at your first run.
 
-Training (Note that pretrained model is already included)
----------------------------------------------------------
-
-::
-
-    python train.py
 
 Usage
 -----
 
 ::
 
-    from g2p_en import g2p
-
-    text = "I refuse to collect the refuse around here."
-    print(g2p(text))
-    >>>[u'AY1', ' ', u'R', u'IH0', u'F', u'Y', u'UW1', u'Z', ' ', u'T', u'UW1', ' ', u'K', u'AH0', u'L', u'EH1', u'K', u'T', ' ', u'DH', u'AH0', ' ', u'R', u'EH1', u'F', u'Y', u'UW2', u'Z', ' ', u'ER0', u'AW1', u'N', u'D', ' ', u'HH', u'EH1', u'R']
-
-    text = "I am an activationist."
-    print(g2p(text))
-    >>>[u'AY1', u'M', ' ', u'AE1', u'N', ' ', u'AE2', u'K', u'T', u'AH0', u'V', u'EY1', u'SH', u'AH0', u'N', u'IH0', u'S', u'T']
-
-If you need to convert lots of texts, you can use the global tf session.
-
-::
-
-    import g2p_en as g2p
+    from g2p_en import G2p
+
+    texts = ["I have $250 in my pocket.", # number -> spell-out
+             "popular pets, e.g. cats and dogs", # e.g. -> for example
+             "I refuse to collect the refuse around here.", # homograph
+             "I'm an activationist."] # newly coined word
+    g2p = G2p()
+    for text in texts:
+        out = g2p(text)
+        print(out)
+    >>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.']
+    >>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z']
+    >>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.']
+    >>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.']
 
-    with g2p.Session():
-        phs = [g2p.g2p(text) for text in texts]
 
 May, 2018.
 

diff --git a/g2p.pdf b/g2p.pdf
diff --git a/g2p_en.egg-info/PKG-INFO b/g2p_en.egg-info/PKG-INFO
diff --git a/g2p_en.egg-info/SOURCES.txt b/g2p_en.egg-info/SOURCES.txt
diff --git a/g2p_en.egg-info/dependency_links.txt b/g2p_en.egg-info/dependency_links.txt
diff --git a/g2p_en.egg-info/requires.txt b/g2p_en.egg-info/requires.txt
diff --git a/g2p_en.egg-info/top_level.txt b/g2p_en.egg-info/top_level.txt
diff --git a/g2p_en/__init__.py b/g2p_en/__init__.py
@@ -1 +1 @@
-from g2p import  g2p, Session
+from .g2p import G2p
diff --git a/g2p_en/checkpoint20.npz b/g2p_en/checkpoint20.npz
diff --git a/g2p_en/expand.py b/g2p_en/expand.py
@@ -12,8 +12,6 @@
 
 
 
-
-
 _inflect = inflect.engine()
 _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
 _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,8 +12,6 @@





		_inflect = inflect.engine()
		_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
		_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
Expand Down