Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions ch3/3_1_wordsteam.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# This script give you idea how stemming has been placed by using NLTK and Polyglot libraries.
# It is part of morphological analysis

from nltk.stem import PorterStemmer
from polyglot.text import Text, Word
from nltk.stem import PorterStemmer, SnowballStemmer

word = "unexpected"
text = "disagreement"
Expand All @@ -21,35 +20,36 @@

def stemmer_porter():
port = PorterStemmer()
print "\nDerivational Morphemes"
print " ".join([port.stem(i) for i in text6.split()])
print " ".join([port.stem(i) for i in text7.split()])
print "\nInflectional Morphemes"
print " ".join([port.stem(i) for i in text8.split()])
print " ".join([port.stem(i) for i in text9.split()])
print "\nSome examples"
print " ".join([port.stem(i) for i in word.split()])
print " ".join([port.stem(i) for i in text.split()])
print " ".join([port.stem(i) for i in text1.split()])
print " ".join([port.stem(i) for i in text2.split()])
print " ".join([port.stem(i) for i in text3.split()])
print " ".join([port.stem(i) for i in text4.split()])
print " ".join([port.stem(i) for i in text5.split()])
print("\nDerivational Morphemes")
print(" ".join([port.stem(i) for i in text6.split()]))
print(" ".join([port.stem(i) for i in text7.split()]))
print("\nInflectional Morphemes")
print(" ".join([port.stem(i) for i in text8.split()]))
print(" ".join([port.stem(i) for i in text9.split()]))
print("\nSome examples")
print(" ".join([port.stem(i) for i in word.split()]))
print(" ".join([port.stem(i) for i in text.split()]))
print(" ".join([port.stem(i) for i in text1.split()]))
print(" ".join([port.stem(i) for i in text2.split()]))
print(" ".join([port.stem(i) for i in text3.split()]))
print(" ".join([port.stem(i) for i in text4.split()]))
print(" ".join([port.stem(i) for i in text5.split()]))


def polyglot_stem():
print "\nDerivational Morphemes using polyglot library"
snowball = SnowballStemmer("english")
print("\nDerivational Morphemes using NLTK SnowballStemmer (replaces polyglot)")
for w in words_derv:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nInflectional Morphemes using polyglot library"
stem = snowball.stem(w)
print("{:<20}{}".format(w, stem))
print("\nInflectional Morphemes using NLTK SnowballStemmer (replaces polyglot)")
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nSome Morphemes examples using polyglot library"
stem = snowball.stem(w)
print("{:<20}{}".format(w, stem))
print("\nSome Morphemes examples using NLTK SnowballStemmer (replaces polyglot)")
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
stem = snowball.stem(w)
print("{:<20}{}".format(w, stem))


if __name__ == "__main__":
Expand Down
7 changes: 6 additions & 1 deletion ch3/Chapter_3_Installation_Commands.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@ NLTK we have already installed

We need to install followiing libraries

Polyglot - installation steps
Polyglot - installation steps (DEPRECATED - polyglot data server is no longer available)
NOTE: The polyglot library has been replaced with NLTK's SnowballStemmer in 3_1_wordsteam.py
due to the polyglot data server (http://polyglot.cs.stonybrook.edu/~polyglot) being unavailable.
The NLTK SnowballStemmer provides similar stemming functionality without requiring external downloads.

Original installation (no longer needed):
$ sudo apt-get install python-numpy libicu-dev
$ sudo pip install polyglot==16.7.4

Expand Down