Coding-Challenge-F21/main.py at main · kushb007/Coding-Challenge-F21 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import stanza
import re
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('sentiwordnet')
nltk.download('wordnet')
from senticnet.senticnet import SenticNet
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk.tag import pos_tag
stanza.download('en')
nlp = stanza.Pipeline(lang='en', processors='tokenize,sentiment')
sn = SenticNet()

#Regex match for sentences
f = open('input.txt', 'r')
regex = "[A-Z].*?(?<!Dr)(?<!Mr)[\.!?][\',\", ]"
sentences = re.compile(regex, re.MULTILINE  ).findall(f.read().replace('\n',' ')+' ')
f.close()

total = 0
for s in sentences:
    #Finding sentiment via sentiwordnet
    tokens = nltk.word_tokenize(s[:-1])
    tagged = pos_tag(tokens)
    synsets=None
    pscore = 0
    oscore = 0
    nscore = 0
    for t in tagged:
        if 'NN' in t:
            synsets=wn.synsets(t[0], wn.NOUN)
        elif 'VB' in t:
            synsets=wn.synsets(t[0], wn.VERB)
        elif 'JJ' in t:
            synsets=wn.synsets(t[0], wn.ADJ)
        elif 'RB' in t:
            synsets=wn.synsets(t[0], wn.ADV)
        if not synsets:
            continue
        l = swn.senti_synset(synsets[0].name())
        pscore+=l.pos_score()
        oscore+=l.obj_score()
        nscore+=l.neg_score()
    #Finding seintiment via core NLP
    doc = nlp(s)
    #Combining results
    sscore = 0
    if(pscore>oscore): sscore+=2
    elif(nscore>oscore): sscore+=0
    else: sscore+=1
    sscore+=doc.sentences[0].sentiment
    print(s+': ', sscore)
    total+=sscore
#Output average of methods
print(total/len(sentences))