Commen-sense/Model.py at main · Payal508/Commen-sense · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from ytAPI import youtubeAPI
import re
import string
import pandas as pd

class model:
    def __init__(self):
        self.training_df = pd.read_csv("/Users/payal/Downloads/YT setiment analysis/YT Project FINAL/trainingTweets.csv", encoding='latin')
        # self.training_df = pd.read_csv("/Users/anorangefalcon/Downloads/YT Project/trainingTweets.csv", encoding='latin')

        self.training_df.drop(columns = ['1467810369','Mon Apr 06 22:19:45 PDT 2009','NO_QUERY','_TheSpecialOne_'], inplace = True)
        print(type(self.training_df.Remark))
        # self.training_df.Remark = self.training_df.Remark.str.replace(0,'negative')
        # self.training_df.Remark = self.training_df.Remark.str.replace(4,'positive')
        self.training_df["Remark"] = self.training_df["Remark"].replace({0: "negative", 4: "positive"})
        self.training_df['comment'] = self.training_df['comment'].str.replace(r"@[A-Za-z0-9_]+", '')

        cleantext = lambda x: self.clean_text(x)
        self.training_df['comment'] = pd.DataFrame(self.training_df['comment'].apply(cleantext))
        print("hello I'm model's innit")

    def clean_text(self, text):

        text = text.lower()

        text = re.sub('\[.*?\]','', text)
        text = re.sub('[%s]'%re.escape(string.punctuation), '', text)

        text = re.sub('\w*\d\w*','', text)
        text = re.sub('\n','', text)

        return text

    def get_clean_training_data(self):
        return self.training_df

    # def clean_data(self):
    #     # if self.isAPI == False:
    #     #     test_df = pd.read_csv("/Users/rishabmehndiratta/Documents/Spic Project/Data/trainingTweets.csv", encoding='latin')
    #     #     # df.head()

    #     #     test_df.drop(columns = ['1467810369','Mon Apr 06 22:19:45 PDT 2009','NO_QUERY','_TheSpecialOne_'], inplace = True)
    #     #     test_df.Remark = test_df.Remark.str.replace(0,'negative')
    #     #     test_df.Remark = test_df.Remark.str.replace(4,'positive')
    #     #     test_df['comment'] = test_df['comment'].str.replace(r'@[A-Za-z0-9_]+', '')
    #     #     cleantext = lambda x: self.clean_text(x)

    #     #     test_df['Cleaned_Description'] = pd.DataFrame(test_df['comment'].apply(cleantext))
    #     else:
    #         cleantext = lambda x: self.clean_text(x)
    #         self.df['Cleaned_Description'] = pd.DataFrame(self.df['comment'].apply(cleantext))
    #         print(self.df['Cleaned_Description'].head())


# obj = model()
# obj.clean_data()