Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions ch3/3_1_wordsteam.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,32 @@

def stemmer_porter():
port = PorterStemmer()
print "\nDerivational Morphemes"
print " ".join([port.stem(i) for i in text6.split()])
print " ".join([port.stem(i) for i in text7.split()])
print "\nInflectional Morphemes"
print " ".join([port.stem(i) for i in text8.split()])
print " ".join([port.stem(i) for i in text9.split()])
print "\nSome examples"
print " ".join([port.stem(i) for i in word.split()])
print " ".join([port.stem(i) for i in text.split()])
print " ".join([port.stem(i) for i in text1.split()])
print " ".join([port.stem(i) for i in text2.split()])
print " ".join([port.stem(i) for i in text3.split()])
print " ".join([port.stem(i) for i in text4.split()])
print " ".join([port.stem(i) for i in text5.split()])
print("\nDerivational Morphemes")
print(" ".join([port.stem(i) for i in text6.split()]))
print(" ".join([port.stem(i) for i in text7.split()]))
print("\nInflectional Morphemes")
print(" ".join([port.stem(i) for i in text8.split()]))
print(" ".join([port.stem(i) for i in text9.split()]))
print("\nSome examples")
print(" ".join([port.stem(i) for i in word.split()]))
print(" ".join([port.stem(i) for i in text.split()]))
print(" ".join([port.stem(i) for i in text1.split()]))
print(" ".join([port.stem(i) for i in text2.split()]))
print(" ".join([port.stem(i) for i in text3.split()]))
print(" ".join([port.stem(i) for i in text4.split()]))
print(" ".join([port.stem(i) for i in text5.split()]))


def polyglot_stem():
print "\nDerivational Morphemes using polyglot library"
print("\nDerivational Morphemes using polyglot library")
for w in words_derv:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nInflectional Morphemes using polyglot library"
print("\nInflectional Morphemes using polyglot library")
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nSome Morphemes examples using polyglot library"
print("\nSome Morphemes examples using polyglot library")
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
Expand Down
119 changes: 119 additions & 0 deletions ch3/3_1_wordsteam_nltk_only.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3
"""
Alternative version of wordsteam.py using only NLTK library.
This version avoids polyglot dependency issues and works reliably with Python 3.

This script demonstrates stemming using NLTK's Porter, Lancaster, and Snowball stemmers.
It is part of morphological analysis from Chapter 3.
"""

from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer

word = "unexpected"
text = "disagreement"
text1 = "disagree"
text2 = "agreement"
text3 = "quirkiness"
text4 = "historical"
text5 = "canonical"
text6 = "happiness"
text7 = "unkind"
text8 = "dogs"
text9 = "expected"
words_derv = ["happiness", "unkind"]
word_infle = ["dogs", "expected"]
words = ["unexpected", "disagreement", "disagree", "agreement", "quirkiness", "canonical", "historical"]

def stemmer_porter():
"""Demonstrate Porter Stemmer for derivational and inflectional morphemes."""
port = PorterStemmer()
print("\nDerivational Morphemes (Porter Stemmer)")
print(" ".join([port.stem(i) for i in text6.split()]))
print(" ".join([port.stem(i) for i in text7.split()]))
print("\nInflectional Morphemes (Porter Stemmer)")
print(" ".join([port.stem(i) for i in text8.split()]))
print(" ".join([port.stem(i) for i in text9.split()]))
print("\nSome examples (Porter Stemmer)")
print(" ".join([port.stem(i) for i in word.split()]))
print(" ".join([port.stem(i) for i in text.split()]))
print(" ".join([port.stem(i) for i in text1.split()]))
print(" ".join([port.stem(i) for i in text2.split()]))
print(" ".join([port.stem(i) for i in text3.split()]))
print(" ".join([port.stem(i) for i in text4.split()]))
print(" ".join([port.stem(i) for i in text5.split()]))


def stemmer_lancaster():
"""Demonstrate Lancaster Stemmer (more aggressive than Porter)."""
lanc = LancasterStemmer()
print("\n" + "="*60)
print("Lancaster Stemmer (More Aggressive)")
print("="*60)
print("\nDerivational Morphemes")
for w in words_derv:
print("{:<20}{}".format(w, lanc.stem(w)))
print("\nInflectional Morphemes")
for w in word_infle:
print("{:<20}{}".format(w, lanc.stem(w)))
print("\nSome examples")
for w in words:
print("{:<20}{}".format(w, lanc.stem(w)))


def stemmer_snowball():
"""Demonstrate Snowball Stemmer (improved Porter algorithm)."""
snow = SnowballStemmer('english')
print("\n" + "="*60)
print("Snowball Stemmer (Improved Porter)")
print("="*60)
print("\nDerivational Morphemes")
for w in words_derv:
print("{:<20}{}".format(w, snow.stem(w)))
print("\nInflectional Morphemes")
for w in word_infle:
print("{:<20}{}".format(w, snow.stem(w)))
print("\nSome examples")
for w in words:
print("{:<20}{}".format(w, snow.stem(w)))


def compare_stemmers():
"""Compare all three stemmers side by side."""
port = PorterStemmer()
lanc = LancasterStemmer()
snow = SnowballStemmer('english')

print("\n" + "="*60)
print("Stemmer Comparison")
print("="*60)
print("\n{:<20}{:<15}{:<15}{:<15}".format("Word", "Porter", "Lancaster", "Snowball"))
print("-" * 60)

all_words = words_derv + word_infle + words
for w in all_words:
print("{:<20}{:<15}{:<15}{:<15}".format(
w,
port.stem(w),
lanc.stem(w),
snow.stem(w)
))


if __name__ == "__main__":
print("="*60)
print("Word Stemming Examples using NLTK")
print("="*60)

stemmer_porter()

stemmer_lancaster()
stemmer_snowball()

compare_stemmers()

print("\n" + "="*60)
print("Note: This version uses only NLTK library.")
print("For morpheme analysis similar to polyglot, consider using:")
print(" - spaCy (modern, well-maintained)")
print(" - stanza (Stanford NLP)")
print("="*60)
83 changes: 83 additions & 0 deletions ch3/Chapter_3_Installation_Commands_UPDATED.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
NLTK we have already installed

===========================================
IMPORTANT: Polyglot Library Issues
===========================================

The polyglot library (version 16.7.4) has known issues:
1. Deprecated since 2016, not actively maintained
2. Server access issues causing HTTP 403 errors
3. Complex dependency requirements

RECOMMENDED SOLUTION: Use the NLTK-only alternative script
- File: 3_1_wordsteam_nltk_only.py
- Only requires: pip install nltk
- More reliable and demonstrates multiple stemming algorithms

===========================================
Option 1: NLTK-Only Solution (RECOMMENDED)
===========================================

Installation:
$ pip install nltk

Usage:
$ python3 3_1_wordsteam_nltk_only.py

This provides the same educational value without polyglot issues.

===========================================
Option 2: Full Polyglot Installation (Advanced)
===========================================

Note: May still encounter HTTP 403 errors even after installation

System Dependencies:
$ sudo apt-get update
$ sudo apt-get install -y libicu-dev pkg-config

Python Dependencies:
$ pip install polyglot==16.7.4
$ pip install PyICU pycld2 morfessor numpy six

Apply Patch (to fix HTTP 403 errors):
$ python3 polyglot_downloader_patch.py

Run Script:
$ python3 3_1_wordsteam.py

Warning: Polyglot's morpheme download may still fail due to server issues.

===========================================
Option 3: Modern Alternatives (Production)
===========================================

For production code, consider these well-maintained alternatives:

spaCy (Recommended):
$ pip install spacy
$ python -m spacy download en_core_web_sm

Stanza (Stanford NLP):
$ pip install stanza

===========================================
Stanford CoreNLP (Section 3.3)
===========================================

Section A: Install Stanford CoreNLP
3.1. Download CoreNLP: https://stanfordnlp.github.io/CoreNLP/
3.2. Extract the zip anywhere
3.3. $ cd stanford-corenlp-full-2016-10-31/
3.4. $ java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer

Section B: Install pycorenlp
3.5. Open new terminal
3.6. $ pip install pycorenlp

===========================================
For More Information
===========================================

See FIXES_README.md for detailed explanations, troubleshooting,
and comparison of different solutions.
Loading