NLP/app.py at main · TechnicalClubRBU-CodeRush1-0/NLP · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#importing libraries
import streamlit as st
import matplotlib.pyplot as plt
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from wordcloud import WordCloud
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
print(nltk.__version__)
import re

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('vader_lexicon')

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Function to summarize text
def summarize_text(text, num_sentences):
    # Initialize the LSA summarizer
    lsa = LsaSummarizer(Stemmer("english"))
    lsa.stop_words = get_stop_words("english")
    # Parse the text
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    # Generate the summary
    lsa_summary = lsa(parser.document, num_sentences)
    # Convert the summary to a string
    lsa_summary_list = [str(sentence) for sentence in lsa_summary]
    summary_novel = " ".join(lsa_summary_list)
    return summary_novel

# Function to generate word cloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color='white',
                          stopwords=set(stopwords.words('english')),
                          max_words=200, colormap='viridis').generate(text)
    return wordcloud

# Function to plot word frequency
def plot_word_frequency(text):
    text = text.lower()
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    word_counts = Counter(tokens)
    most_common_words = word_counts.most_common(10)
    words, counts = zip(*most_common_words)

    plt.figure(figsize=(10, 5))
    plt.bar(words, counts, color='skyblue')
    plt.xlabel('Words')
    plt.ylabel('Frequency')
    plt.title('Top 10 Most Common Words')
    plt.xticks(rotation=45)
    st.pyplot(plt)

# Function for sentiment analysis

# def SentimentAnalysis(text):
#     sentiment_analyzer = SentimentIntensityAnalyzer()
#     sentiment_score = sentiment_analyzer.polarity_scores(text)
#     return sentiment_score


#testing  for sentiment analysis
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation and numbers
    text = re.sub(r'[^a-z\s]', '', text)
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    text = ' '.join(word for word in text.split() if word not in stop_words)
    return text

def SentimentAnalysis(text):
    # Preprocess the input text
    cleaned_text = preprocess_text(text)
    sentiment_analyzer = SentimentIntensityAnalyzer()
    sentiment_score = sentiment_analyzer.polarity_scores(cleaned_text)
    return sentiment_score

# Streamlit UI
st.title("Text Summarization and Analysis ")

# File upload or text input
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
text_input = st.text_area("Or enter text directly:")

if uploaded_file is not None:
    text = uploaded_file.read().decode("utf-8")
elif text_input:
    text = text_input
else:
    st.warning("Please upload a file or enter text.")
    st.stop()

# Sidebar for controls
st.sidebar.header("Options")


# Number of lines for summary
num_lines = st.sidebar.number_input("Number of lines for summary:", min_value=1, max_value=20, value=3)


# Buttons in the sidebar
if st.sidebar.button("Summarize"):
    summary = summarize_text(text, num_lines)
    st.subheader("Summary:")
    st.write(summary)


if st.sidebar.button("Generate Word Cloud"):
    wordcloud = generate_wordcloud(text)
    st.subheader("Word Cloud:")
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    st.pyplot(plt)


if st.sidebar.button("Show Word Frequency Graph"):
    st.subheader("Word Frequency Graph:")
    plot_word_frequency(text)


if st.sidebar.button("Analyze Sentiment"):
    sentiment_score = SentimentAnalysis(text)
    st.subheader("Sentiment Analysis:")
    st.write("Sentiment Score:", sentiment_score)

    a=sentiment_score['pos']
    b=sentiment_score['neg']
    c=sentiment_score['neu']

    if(a>b and c<0.75):
        st.write("The text is Positive")
    elif(b>a and c<0.75):
        st.write("The text is Negative")
    else:
        st.write("The text is Neutral")

    st.write("Compound Score:", sentiment_score['compound'])

    #st.write("Positive:", sentiment_score['pos'])
    #st.write("Negative:", sentiment_score['neg'])
    #st.write("Neutral:", sentiment_score['neu'])