-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtweet_sentiment.py
More file actions
104 lines (84 loc) · 2.48 KB
/
tweet_sentiment.py
File metadata and controls
104 lines (84 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import sys
import json
import re
def hw():
print 'Hello, world!'
def lines(fp):
print str(len(fp.readlines()))
def json_parse():
#parses json and takes only english tweets
#tfile = open('C:\Users\Dex\Documents\IPython Notebooks\cert\practweet.txt')
#tfile=open('C:\dex\datascience\cert\practweet.txt')
tfile=open(sys.argv[2])
tweet={}
#tweets={}
tweets=[]
num=0
#below goes through each line of the twitter json and then checks if its english
for t in tfile:
tweet=json.loads(t)
#num=num+1
try:
if tweet['lang']=='en':
#tweets[num]=tweet
tweets.append(tweet)
else:
pass
except KeyError:
pass
return tweets
def tweet_txt(tweets):
#takes the text of each tweet and converts it to utf8. creates a list with every status in there
status=[]
for t in tweets:
status.append(t['text'].encode('utf-8'))
return status
def sentiment(afindict,status):
#iterates over statuses then iterates over afindictionary to see if the entry in afindictionary is in the status. Resets score after each new status
statusdict={}
for s in status:
score=0
for a in afindict.keys():
aa='\\b'+a+'\\b'
match=re.search((aa),s)
if match:
score=score+afindict[a]
#print a
else:
pass
#print s+'----->'+str(score)
print score
#print '\n'
#print statusdict
def afin():
#parses afin file
#file = open('C:\Users\Dex\Documents\IPython Notebooks\cert\AFINN-111.txt')
file =open(sys.argv[1])
#file=open('C:\dex\datascience\cert\AFINN-111.txt')
scores={} # initialize an empty dictionary
ascores={}
for line in file:
#note that when you have a multiple variable set. it will iterate over the variable for each line
term, score = line.split("\t") # The file is tab-delimted. "\t" means "tab character".
scores[term]=int(score) #convert the score to an integer.
ascores=scores.items() # print every (term,score) pair in the dictionary
return scores
def main():
#sent_file = open('C:\Users\Dex\Documents\IPython Notebooks\cert\AFINN-111.txt')
sent_file = open(sys.argv[1])
#sent_file=open('C:\dex\datascience\cert\AFINN-111.txt')
#tweet_file = open('C:\Users\Dex\Documents\IPython Notebooks\cert\practweet.txt')
#tweet_file=open('C:\dex\datascience\cert\practweet.txt')
tweet_file = open(sys.argv[2])
#hw()
#lines(sent_file)
#lines(tweet_file)
afindict={}
afindict=afin()
#print afindict.()
tweets={}
tweets=json_parse()
status=tweet_txt(tweets)
sentiment(afindict,status)
if __name__ == '__main__':
main()