diff --git a/ch3/3_1_wordsteam.py b/ch3/3_1_wordsteam.py index 9b10cbb..7f6e1fd 100644 --- a/ch3/3_1_wordsteam.py +++ b/ch3/3_1_wordsteam.py @@ -1,8 +1,9 @@ -# This script give you idea how stemming has been placed by using NLTK and Polyglot libraries. +# This script give you idea how stemming has been placed by using NLTK library. # It is part of morphological analysis from nltk.stem import PorterStemmer -from polyglot.text import Text, Word +from nltk.stem import WordNetLemmatizer +import nltk word = "unexpected" text = "disagreement" @@ -17,41 +18,54 @@ text9 = "expected" words_derv = ["happiness", "unkind"] word_infle = ["dogs", "expected"] -words = ["unexpected", "disagreement", "disagree", "agreement", "quirkiness", "canonical" "historical"] +words = ["unexpected", "disagreement", "disagree", "agreement", "quirkiness", "canonical", "historical"] def stemmer_porter(): port = PorterStemmer() - print "\nDerivational Morphemes" - print " ".join([port.stem(i) for i in text6.split()]) - print " ".join([port.stem(i) for i in text7.split()]) - print "\nInflectional Morphemes" - print " ".join([port.stem(i) for i in text8.split()]) - print " ".join([port.stem(i) for i in text9.split()]) - print "\nSome examples" - print " ".join([port.stem(i) for i in word.split()]) - print " ".join([port.stem(i) for i in text.split()]) - print " ".join([port.stem(i) for i in text1.split()]) - print " ".join([port.stem(i) for i in text2.split()]) - print " ".join([port.stem(i) for i in text3.split()]) - print " ".join([port.stem(i) for i in text4.split()]) - print " ".join([port.stem(i) for i in text5.split()]) + print("\nDerivational Morphemes") + print(" ".join([port.stem(i) for i in text6.split()])) + print(" ".join([port.stem(i) for i in text7.split()])) + print("\nInflectional Morphemes") + print(" ".join([port.stem(i) for i in text8.split()])) + print(" ".join([port.stem(i) for i in text9.split()])) + print("\nSome examples") + print(" ".join([port.stem(i) for i in word.split()])) + print(" ".join([port.stem(i) for i in text.split()])) + print(" ".join([port.stem(i) for i in text1.split()])) + print(" ".join([port.stem(i) for i in text2.split()])) + print(" ".join([port.stem(i) for i in text3.split()])) + print(" ".join([port.stem(i) for i in text4.split()])) + print(" ".join([port.stem(i) for i in text5.split()])) -def polyglot_stem(): - print "\nDerivational Morphemes using polyglot library" +def nltk_lemmatizer(): + """ + Alternative to polyglot using NLTK's WordNetLemmatizer. + This demonstrates morphological analysis using lemmatization. + """ + lemmatizer = WordNetLemmatizer() + + print("\nDerivational Morphemes using NLTK lemmatizer") for w in words_derv: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nInflectional Morphemes using polyglot library" + noun_form = lemmatizer.lemmatize(w, pos='n') + adj_form = lemmatizer.lemmatize(w, pos='a') + verb_form = lemmatizer.lemmatize(w, pos='v') + print("{:<20}Noun: {:<15} Adj: {:<15} Verb: {}".format(w, noun_form, adj_form, verb_form)) + + print("\nInflectional Morphemes using NLTK lemmatizer") for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nSome Morphemes examples using polyglot library" - for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) + noun_form = lemmatizer.lemmatize(w, pos='n') + verb_form = lemmatizer.lemmatize(w, pos='v') + print("{:<20}Noun: {:<15} Verb: {}".format(w, noun_form, verb_form)) + + print("\nSome Morphemes examples using NLTK lemmatizer") + for w in words: + noun_form = lemmatizer.lemmatize(w, pos='n') + adj_form = lemmatizer.lemmatize(w, pos='a') + verb_form = lemmatizer.lemmatize(w, pos='v') + print("{:<20}Noun: {:<15} Adj: {:<15} Verb: {}".format(w, noun_form, adj_form, verb_form)) if __name__ == "__main__": stemmer_porter() - polyglot_stem() + nltk_lemmatizer()