-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnlp_predict.py
More file actions
75 lines (59 loc) · 2.4 KB
/
nlp_predict.py
File metadata and controls
75 lines (59 loc) · 2.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#load the saved nlp model to predict text labels
import pandas as pd
import string
import pickle
import sys
from nltk.corpus import stopwords
from sklearn.metrics import classification_report
# model must be loaded before doing any prediction with it
loaded_model = {}
# The same preprocess_text() function used during model training is provided here.
# Looks like it's called as part of the pipeline process to predict label.
def preprocess_text(message):
# removes any punctuation
nopunc = [char for char in message if char not in string.punctuation]
# forms a string without punctuation
nopunc = ''.join(nopunc)
# removes any stopwords and returns the rest as list of words
return [word for word in nopunc.split() if word.lower() not in stopwords.words('english')]
def get_data(csvfile):
# returns as dataframe
return pd.read_csv(csvfile, header=0, sep=',')
def load_model(filename):
# loads the saved model from the file
global loaded_model
loaded_model = pickle.load(open(filename, 'rb'))
def test_model(X, y):
# predicts labels for X and report against the expected y
predictions = predict_label_iterable(X)
print(classification_report(predictions, y))
def predict_label_iterable(X):
# predicts label(s) for X using the loaded model
# make sure X is iterable list
# returns
if loaded_model == {}:
sys.exit('error: load the model first')
else:
return loaded_model.predict(X)
def predict_label(textString):
# predicts label for given text string
# converts input string to iterable list for the model to predict
# returns predicted label as string
X = []
X.append(textString)
yPredicted = predict_label_iterable(X)
yPredictedString = str(yPredicted[0])
return yPredictedString
def main():
# load the model from a saved file for testing purpose
load_model('nlp_model.sav')
# get data for testing purpose
messages = get_data('data/a-s-data.csv')
#messages = get_data('data/training data.csv')
# testing the loaded model using all data
test_model(messages['data'], messages['labels'])
# testing label prediction using a string
print ('predicted = ', predict_label(messages['data'].values[0]))
print ('predicted = ', predict_label(messages['data'].values[40]))
if __name__ == '__main__':
main()