-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbitcointalk_batch_classifier.py
More file actions
89 lines (73 loc) · 2.9 KB
/
bitcointalk_batch_classifier.py
File metadata and controls
89 lines (73 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import getopt
import sys
import bitcointalk_sentiment_classifier
import json
import datetime
import os
def main(argv):
input_folder = ''
model_file = ''
output_folder = ''
announce_json = ''
sentiment_json = ''
output_posts = ''
try:
opts, args = getopt.getopt(argv, "hi:m:f:a:s:n:")
except getopt.GetoptError:
print('bitcointalk_batch_classifier.py -i <input folder> -m <model> -f <output folder> -a <announce JSON> -s <sentiment JSON> -n <number of output posts [number|fraction|all]>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('bitcointalk_batch_classifier.py -i <input folder> -m <model> -f <output folder> -a <announce JSON> -s <sentiment JSON> -n <number of output posts [number|fraction|all]>')
elif opt == '-i':
input_folder = arg
elif opt == '-m':
model_file = arg
elif opt == '-f':
output_folder = arg
elif opt == '-a':
announce_json = arg
elif opt == '-s':
sentiment_json = arg
elif opt == '-n':
output_posts = arg
batch_classify(input_folder, model_file, output_folder, announce_json, sentiment_json, output_posts)
def batch_classify(input_folder, model_file, output_folder, announce_json, sentiment_json, output_posts):
try:
fLockSentiment = open('lockSentiment.txt', 'w')
except:
print('Another process is working. Exiting.')
sys.exit(1)
with open(announce_json, 'r') as f:
parsedList = json.load(f)
f.close()
with open(sentiment_json, 'r') as f:
sentimentList = json.load(f)
f.close()
toClassify = []
for topicId in parsedList.keys():
if topicId not in sentimentList.keys():
toClassify.append(topicId)
elif datetime.datetime.strptime(parsedList[topicId]['dateTimeParsing'], '%Y-%m-%d %H:%M') >=\
datetime.datetime.strptime(sentimentList[topicId]['dateTimeSentiment'], '%Y.%m.%d %H:%M'):
toClassify.append(topicId)
currentTime = datetime.datetime.now()
numTopics = len(toClassify)
print("Topics to (re)process:{}".format(numTopics))
currTopic = 0
for topicId in toClassify:
filename = os.path.join(input_folder, '{}.json'.format(topicId))
bitcointalk_sentiment_classifier.classify(filename, model_file, output_folder, output_posts)
sentimentList[topicId] = {'dateTimeSentiment': currentTime.strftime('%Y.%m.%d %H:%M')}
currTopic += 1
if currTopic % 10 == 0:
with open(sentiment_json, 'w') as f:
json.dump(sentimentList, f)
f.close()
print("Processed {} topics of {}".format(currTopic, numTopics))
with open(sentiment_json, 'w') as f:
json.dump(sentimentList, f)
f.close()
fLockSentiment.close()
if __name__ == '__main__':
main(sys.argv[1:])