diff --git a/ch3/3_1_wordsteam.py b/ch3/3_1_wordsteam.py index 9b10cbb..ef87f6e 100644 --- a/ch3/3_1_wordsteam.py +++ b/ch3/3_1_wordsteam.py @@ -1,8 +1,7 @@ # This script give you idea how stemming has been placed by using NLTK and Polyglot libraries. # It is part of morphological analysis -from nltk.stem import PorterStemmer -from polyglot.text import Text, Word +from nltk.stem import PorterStemmer, SnowballStemmer word = "unexpected" text = "disagreement" @@ -21,35 +20,36 @@ def stemmer_porter(): port = PorterStemmer() - print "\nDerivational Morphemes" - print " ".join([port.stem(i) for i in text6.split()]) - print " ".join([port.stem(i) for i in text7.split()]) - print "\nInflectional Morphemes" - print " ".join([port.stem(i) for i in text8.split()]) - print " ".join([port.stem(i) for i in text9.split()]) - print "\nSome examples" - print " ".join([port.stem(i) for i in word.split()]) - print " ".join([port.stem(i) for i in text.split()]) - print " ".join([port.stem(i) for i in text1.split()]) - print " ".join([port.stem(i) for i in text2.split()]) - print " ".join([port.stem(i) for i in text3.split()]) - print " ".join([port.stem(i) for i in text4.split()]) - print " ".join([port.stem(i) for i in text5.split()]) + print("\nDerivational Morphemes") + print(" ".join([port.stem(i) for i in text6.split()])) + print(" ".join([port.stem(i) for i in text7.split()])) + print("\nInflectional Morphemes") + print(" ".join([port.stem(i) for i in text8.split()])) + print(" ".join([port.stem(i) for i in text9.split()])) + print("\nSome examples") + print(" ".join([port.stem(i) for i in word.split()])) + print(" ".join([port.stem(i) for i in text.split()])) + print(" ".join([port.stem(i) for i in text1.split()])) + print(" ".join([port.stem(i) for i in text2.split()])) + print(" ".join([port.stem(i) for i in text3.split()])) + print(" ".join([port.stem(i) for i in text4.split()])) + print(" ".join([port.stem(i) for i in text5.split()])) def polyglot_stem(): - print "\nDerivational Morphemes using polyglot library" + snowball = SnowballStemmer("english") + print("\nDerivational Morphemes using NLTK SnowballStemmer (replaces polyglot)") for w in words_derv: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nInflectional Morphemes using polyglot library" + stem = snowball.stem(w) + print("{:<20}{}".format(w, stem)) + print("\nInflectional Morphemes using NLTK SnowballStemmer (replaces polyglot)") for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nSome Morphemes examples using polyglot library" + stem = snowball.stem(w) + print("{:<20}{}".format(w, stem)) + print("\nSome Morphemes examples using NLTK SnowballStemmer (replaces polyglot)") for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) + stem = snowball.stem(w) + print("{:<20}{}".format(w, stem)) if __name__ == "__main__": diff --git a/ch3/Chapter_3_Installation_Commands.txt b/ch3/Chapter_3_Installation_Commands.txt index d3110f0..bbbbc50 100644 --- a/ch3/Chapter_3_Installation_Commands.txt +++ b/ch3/Chapter_3_Installation_Commands.txt @@ -2,7 +2,12 @@ NLTK we have already installed We need to install followiing libraries -Polyglot - installation steps +Polyglot - installation steps (DEPRECATED - polyglot data server is no longer available) + NOTE: The polyglot library has been replaced with NLTK's SnowballStemmer in 3_1_wordsteam.py + due to the polyglot data server (http://polyglot.cs.stonybrook.edu/~polyglot) being unavailable. + The NLTK SnowballStemmer provides similar stemming functionality without requiring external downloads. + + Original installation (no longer needed): $ sudo apt-get install python-numpy libicu-dev $ sudo pip install polyglot==16.7.4