-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
158 lines (126 loc) · 4.68 KB
/
app.py
File metadata and controls
158 lines (126 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#importing libraries
import streamlit as st
import matplotlib.pyplot as plt
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from wordcloud import WordCloud
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
print(nltk.__version__)
import re
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('vader_lexicon')
# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
# Function to summarize text
def summarize_text(text, num_sentences):
# Initialize the LSA summarizer
lsa = LsaSummarizer(Stemmer("english"))
lsa.stop_words = get_stop_words("english")
# Parse the text
parser = PlaintextParser.from_string(text, Tokenizer("english"))
# Generate the summary
lsa_summary = lsa(parser.document, num_sentences)
# Convert the summary to a string
lsa_summary_list = [str(sentence) for sentence in lsa_summary]
summary_novel = " ".join(lsa_summary_list)
return summary_novel
# Function to generate word cloud
def generate_wordcloud(text):
wordcloud = WordCloud(width=800, height=400, background_color='white',
stopwords=set(stopwords.words('english')),
max_words=200, colormap='viridis').generate(text)
return wordcloud
# Function to plot word frequency
def plot_word_frequency(text):
text = text.lower()
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
word_counts = Counter(tokens)
most_common_words = word_counts.most_common(10)
words, counts = zip(*most_common_words)
plt.figure(figsize=(10, 5))
plt.bar(words, counts, color='skyblue')
plt.xlabel('Words')
plt.ylabel('Frequency')
plt.title('Top 10 Most Common Words')
plt.xticks(rotation=45)
st.pyplot(plt)
# Function for sentiment analysis
# def SentimentAnalysis(text):
# sentiment_analyzer = SentimentIntensityAnalyzer()
# sentiment_score = sentiment_analyzer.polarity_scores(text)
# return sentiment_score
#testing for sentiment analysis
def preprocess_text(text):
# Convert to lowercase
text = text.lower()
# Remove punctuation and numbers
text = re.sub(r'[^a-z\s]', '', text)
# Remove stop words
stop_words = set(stopwords.words('english'))
text = ' '.join(word for word in text.split() if word not in stop_words)
return text
def SentimentAnalysis(text):
# Preprocess the input text
cleaned_text = preprocess_text(text)
sentiment_analyzer = SentimentIntensityAnalyzer()
sentiment_score = sentiment_analyzer.polarity_scores(cleaned_text)
return sentiment_score
# Streamlit UI
st.title("Text Summarization and Analysis ")
# File upload or text input
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
text_input = st.text_area("Or enter text directly:")
if uploaded_file is not None:
text = uploaded_file.read().decode("utf-8")
elif text_input:
text = text_input
else:
st.warning("Please upload a file or enter text.")
st.stop()
# Sidebar for controls
st.sidebar.header("Options")
# Number of lines for summary
num_lines = st.sidebar.number_input("Number of lines for summary:", min_value=1, max_value=20, value=3)
# Buttons in the sidebar
if st.sidebar.button("Summarize"):
summary = summarize_text(text, num_lines)
st.subheader("Summary:")
st.write(summary)
if st.sidebar.button("Generate Word Cloud"):
wordcloud = generate_wordcloud(text)
st.subheader("Word Cloud:")
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
st.pyplot(plt)
if st.sidebar.button("Show Word Frequency Graph"):
st.subheader("Word Frequency Graph:")
plot_word_frequency(text)
if st.sidebar.button("Analyze Sentiment"):
sentiment_score = SentimentAnalysis(text)
st.subheader("Sentiment Analysis:")
st.write("Sentiment Score:", sentiment_score)
a=sentiment_score['pos']
b=sentiment_score['neg']
c=sentiment_score['neu']
if(a>b and c<0.75):
st.write("The text is Positive")
elif(b>a and c<0.75):
st.write("The text is Negative")
else:
st.write("The text is Neutral")
st.write("Compound Score:", sentiment_score['compound'])
#st.write("Positive:", sentiment_score['pos'])
#st.write("Negative:", sentiment_score['neg'])
#st.write("Neutral:", sentiment_score['neu'])