-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess_data.py
More file actions
31 lines (22 loc) · 1.06 KB
/
preprocess_data.py
File metadata and controls
31 lines (22 loc) · 1.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import argparse
import pandas as pd
import numpy as np
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('input_path')
parser.add_argument('output_path')
args = parser.parse_args()
data = pd.read_csv(args.input_path, sep='\t', encoding='utf-8', names=['text', 'label'])
# 不必要なlabelを除外
data = data[data.label != 'neutral']
data = data[data.label != 'empty']
# labelをまとめる
data.label = np.where(data.label == 'enthusiasm', 'joy', data.label)
data.label = np.where(data.label == 'love', 'joy', data.label)
data.label = np.where(data.label == 'fun', 'joy', data.label)
data.label = np.where(data.label == 'relief', 'joy', data.label)
data.label = np.where(data.label == 'happiness', 'joy', data.label)
data.label = np.where(data.label == 'hate', 'disgust', data.label)
data.label = np.where(data.label == 'worry', 'disgust', data.label)
data.label = np.where(data.label == 'boredom', 'sadness', data.label)
data.label = np.where(data.label == 'fear', 'surprise', data.label)
data.to_csv(args.output_path, sep='\t', header=False, index=False)