Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 43 additions & 29 deletions ch3/3_1_wordsteam.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# This script give you idea how stemming has been placed by using NLTK and Polyglot libraries.
# This script give you idea how stemming has been placed by using NLTK library.
# It is part of morphological analysis

from nltk.stem import PorterStemmer
from polyglot.text import Text, Word
from nltk.stem import WordNetLemmatizer
import nltk

word = "unexpected"
text = "disagreement"
Expand All @@ -17,41 +18,54 @@
text9 = "expected"
words_derv = ["happiness", "unkind"]
word_infle = ["dogs", "expected"]
words = ["unexpected", "disagreement", "disagree", "agreement", "quirkiness", "canonical" "historical"]
words = ["unexpected", "disagreement", "disagree", "agreement", "quirkiness", "canonical", "historical"]

def stemmer_porter():
port = PorterStemmer()
print "\nDerivational Morphemes"
print " ".join([port.stem(i) for i in text6.split()])
print " ".join([port.stem(i) for i in text7.split()])
print "\nInflectional Morphemes"
print " ".join([port.stem(i) for i in text8.split()])
print " ".join([port.stem(i) for i in text9.split()])
print "\nSome examples"
print " ".join([port.stem(i) for i in word.split()])
print " ".join([port.stem(i) for i in text.split()])
print " ".join([port.stem(i) for i in text1.split()])
print " ".join([port.stem(i) for i in text2.split()])
print " ".join([port.stem(i) for i in text3.split()])
print " ".join([port.stem(i) for i in text4.split()])
print " ".join([port.stem(i) for i in text5.split()])
print("\nDerivational Morphemes")
print(" ".join([port.stem(i) for i in text6.split()]))
print(" ".join([port.stem(i) for i in text7.split()]))
print("\nInflectional Morphemes")
print(" ".join([port.stem(i) for i in text8.split()]))
print(" ".join([port.stem(i) for i in text9.split()]))
print("\nSome examples")
print(" ".join([port.stem(i) for i in word.split()]))
print(" ".join([port.stem(i) for i in text.split()]))
print(" ".join([port.stem(i) for i in text1.split()]))
print(" ".join([port.stem(i) for i in text2.split()]))
print(" ".join([port.stem(i) for i in text3.split()]))
print(" ".join([port.stem(i) for i in text4.split()]))
print(" ".join([port.stem(i) for i in text5.split()]))


def polyglot_stem():
print "\nDerivational Morphemes using polyglot library"
def nltk_lemmatizer():
"""
Alternative to polyglot using NLTK's WordNetLemmatizer.
This demonstrates morphological analysis using lemmatization.
"""
lemmatizer = WordNetLemmatizer()

print("\nDerivational Morphemes using NLTK lemmatizer")
for w in words_derv:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nInflectional Morphemes using polyglot library"
noun_form = lemmatizer.lemmatize(w, pos='n')
adj_form = lemmatizer.lemmatize(w, pos='a')
verb_form = lemmatizer.lemmatize(w, pos='v')
print("{:<20}Noun: {:<15} Adj: {:<15} Verb: {}".format(w, noun_form, adj_form, verb_form))

print("\nInflectional Morphemes using NLTK lemmatizer")
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nSome Morphemes examples using polyglot library"
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
noun_form = lemmatizer.lemmatize(w, pos='n')
verb_form = lemmatizer.lemmatize(w, pos='v')
print("{:<20}Noun: {:<15} Verb: {}".format(w, noun_form, verb_form))

print("\nSome Morphemes examples using NLTK lemmatizer")
for w in words:
noun_form = lemmatizer.lemmatize(w, pos='n')
adj_form = lemmatizer.lemmatize(w, pos='a')
verb_form = lemmatizer.lemmatize(w, pos='v')
print("{:<20}Noun: {:<15} Adj: {:<15} Verb: {}".format(w, noun_form, adj_form, verb_form))


if __name__ == "__main__":
stemmer_porter()
polyglot_stem()
nltk_lemmatizer()