-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
executable file
·134 lines (87 loc) · 4.24 KB
/
main.py
File metadata and controls
executable file
·134 lines (87 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from components.preppers.PdfExtractor import PdfExtractor
from components.preppers.Formatter import Formatter
from components.analyzers.SentimentAnalyzer import SentimentAnalyzer
from components.plotters.SentimentPlotter import SentimentPlotter
from components.plotters.SentimentScatterPlot import SentimentScatterPlot
from components.analyzers.RelationshipFinder import RelationshipFinder
from components.analyzers.CategoryBigrams import CategoryBigrams
from components.plotters.OverlayPlotter import OverlayPlotter
def overlay_plot(percents, info):
overlay = OverlayPlotter(percents, info[0], info[1])
overlay.plot()
def plot_base(categories):
plotter = SentimentPlotter(categories, "Sentiment Categorization by Comment", "Sentiment Category")
plotter.plot()
def find_relationships(comments_and_ratings, base_dist):
relationship = RelationshipFinder(comments_and_ratings, base_dist)
lab_regex = r"^lab"
lang_regex = "language"
discuss_regex = "discus"
zoom_regex = "zoom"
connection_regex = r"connect(?!.*internet)+(?=.*\b(?:students|peers|professor|everyone|classmates|people)\b)"
lecture_regex = "lectur"
canvas_regex = "canvas"
regex = ( (lab_regex, "Labs"), (lang_regex, "Foreign Language"), (zoom_regex, "Zoom"), (canvas_regex, "Canvas"), \
(discuss_regex, "Discussion"), (lecture_regex, "Lecture"), (connection_regex, "Interpersonal Connections"))
infos = []
for r in regex:
infos.append( (relationship.get_base_info(r[0]),r[1]) )
return infos
def main():
# extract text from pdf and separate each question
extractor = PdfExtractor("input/evals.pdf")
extractor.extract_text_to_file()
extractor.write_individual_question_files()
# format text so that comments are the units
remote_instruction_formatter = Formatter("output/How_has_remote_instruction_affected_your_experience.txt")
data = remote_instruction_formatter.comments_by_student
with open('comments.txt', 'w') as comments_file:
for line in data:
comments_file.writelines(line)
# extract sentiment
analyzer = SentimentAnalyzer(data, "components/analyzers/custom_model/NBC-0.9.pickle")
average = analyzer.average_sentiment
categories = analyzer.sentiment_buckets
individual_scores = analyzer.individual_scores
comments_and_ratings = analyzer.comments_and_ratings
# get percent by category baseline
total = sum([num for num in categories.values()])
percents = []
base_dist = {}
for i,category in enumerate(categories):
if i == 0: base_dist["positive"] = categories[category]
elif i == 1: base_dist["neutral"] = categories[category]
elif i == 2: base_dist["negative"] = categories[category]
percents.append(round(categories[category] / total,2))
plot_base(categories)
infos = find_relationships(comments_and_ratings, base_dist)
[overlay_plot(percents,info) for info in infos]
if __name__ == "__main__":
main()
# Plot base
# plotter = SentimentPlotter(categories, "Sentiment Categorization by Comment", "Sentiment Category")
# plotter.plot()
# Plot overlay relationships
# overlay = OverlayPlotter(percents, lab_info, "Labs")
# overlay.plot()
# overlay = OverlayPlotter(percents, lang_info, "Language")
# overlay.plot()
# overlay = OverlayPlotter(percents, discuss_info, "Discussion")
# overlay.plot()
# overlay = OverlayPlotter(percents, zoom_info, "Zoom")
# overlay.plot()
# overlay = OverlayPlotter(percents, connection_info, "Personal Connections")
# overlay.plot()
# overlay = OverlayPlotter(percents, lecture_info, "Lecture")
# overlay.plot()
# overlay = OverlayPlotter(percents, canvas_info, "Canvas")
# overlay.plot()
# # Bigrams
# bigram = CategoryBigrams(comments_and_ratings)
# print("POSITIVE: ", bigram.sorted_positive_bigrams[:20])
# print("NEUTRAL: ", bigram.sorted_neutral_bigrams[:20])
# print("NEGATIVE: ", bigram.sorted_negative_bigrams[:20])
# plotter = SentimentPlotter(categories, "Three buckets", "Category", False)
# plotter.plot()
# scatter = SentimentScatterPlot(individual_scores, "Sentiment Based on Free Response Text", "Sentiment Score", False)
# scatter.plot()