From a2eb323ab056646c2e82b9eefc09dd3b5c66d8bb Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 05:06:17 +0000 Subject: [PATCH] Fix polyglot morpheme error by replacing with NLTK SnowballStemmer - Replaced polyglot library with NLTK's SnowballStemmer due to unavailable data server - Updated all print statements to Python 3 syntax - Updated installation documentation to reflect the change - Fixes HTTP Error 403: Forbidden when accessing polyglot.cs.stonybrook.edu The polyglot library's data server is no longer accessible, causing the script to fail. This fix uses NLTK's SnowballStemmer which provides similar stemming functionality without requiring external data downloads. Co-Authored-By: jalajthanaki@gmail.com --- ch3/3_1_wordsteam.py | 50 ++++++++++++------------- ch3/Chapter_3_Installation_Commands.txt | 7 +++- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/ch3/3_1_wordsteam.py b/ch3/3_1_wordsteam.py index 9b10cbb..ef87f6e 100644 --- a/ch3/3_1_wordsteam.py +++ b/ch3/3_1_wordsteam.py @@ -1,8 +1,7 @@ # This script give you idea how stemming has been placed by using NLTK and Polyglot libraries. # It is part of morphological analysis -from nltk.stem import PorterStemmer -from polyglot.text import Text, Word +from nltk.stem import PorterStemmer, SnowballStemmer word = "unexpected" text = "disagreement" @@ -21,35 +20,36 @@ def stemmer_porter(): port = PorterStemmer() - print "\nDerivational Morphemes" - print " ".join([port.stem(i) for i in text6.split()]) - print " ".join([port.stem(i) for i in text7.split()]) - print "\nInflectional Morphemes" - print " ".join([port.stem(i) for i in text8.split()]) - print " ".join([port.stem(i) for i in text9.split()]) - print "\nSome examples" - print " ".join([port.stem(i) for i in word.split()]) - print " ".join([port.stem(i) for i in text.split()]) - print " ".join([port.stem(i) for i in text1.split()]) - print " ".join([port.stem(i) for i in text2.split()]) - print " ".join([port.stem(i) for i in text3.split()]) - print " ".join([port.stem(i) for i in text4.split()]) - print " ".join([port.stem(i) for i in text5.split()]) + print("\nDerivational Morphemes") + print(" ".join([port.stem(i) for i in text6.split()])) + print(" ".join([port.stem(i) for i in text7.split()])) + print("\nInflectional Morphemes") + print(" ".join([port.stem(i) for i in text8.split()])) + print(" ".join([port.stem(i) for i in text9.split()])) + print("\nSome examples") + print(" ".join([port.stem(i) for i in word.split()])) + print(" ".join([port.stem(i) for i in text.split()])) + print(" ".join([port.stem(i) for i in text1.split()])) + print(" ".join([port.stem(i) for i in text2.split()])) + print(" ".join([port.stem(i) for i in text3.split()])) + print(" ".join([port.stem(i) for i in text4.split()])) + print(" ".join([port.stem(i) for i in text5.split()])) def polyglot_stem(): - print "\nDerivational Morphemes using polyglot library" + snowball = SnowballStemmer("english") + print("\nDerivational Morphemes using NLTK SnowballStemmer (replaces polyglot)") for w in words_derv: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nInflectional Morphemes using polyglot library" + stem = snowball.stem(w) + print("{:<20}{}".format(w, stem)) + print("\nInflectional Morphemes using NLTK SnowballStemmer (replaces polyglot)") for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nSome Morphemes examples using polyglot library" + stem = snowball.stem(w) + print("{:<20}{}".format(w, stem)) + print("\nSome Morphemes examples using NLTK SnowballStemmer (replaces polyglot)") for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) + stem = snowball.stem(w) + print("{:<20}{}".format(w, stem)) if __name__ == "__main__": diff --git a/ch3/Chapter_3_Installation_Commands.txt b/ch3/Chapter_3_Installation_Commands.txt index d3110f0..bbbbc50 100644 --- a/ch3/Chapter_3_Installation_Commands.txt +++ b/ch3/Chapter_3_Installation_Commands.txt @@ -2,7 +2,12 @@ NLTK we have already installed We need to install followiing libraries -Polyglot - installation steps +Polyglot - installation steps (DEPRECATED - polyglot data server is no longer available) + NOTE: The polyglot library has been replaced with NLTK's SnowballStemmer in 3_1_wordsteam.py + due to the polyglot data server (http://polyglot.cs.stonybrook.edu/~polyglot) being unavailable. + The NLTK SnowballStemmer provides similar stemming functionality without requiring external downloads. + + Original installation (no longer needed): $ sudo apt-get install python-numpy libicu-dev $ sudo pip install polyglot==16.7.4