forked from agwaBom/PEMA
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpostprocess_generated_opt.py
More file actions
33 lines (27 loc) · 1.17 KB
/
postprocess_generated_opt.py
File metadata and controls
33 lines (27 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import re
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input", type=str, default="./pema_90_0/lm_pred.txt")
parser.add_argument("--output", type=str, default="./pema_90_0/lm_pred_post.txt")
args = parser.parse_args()
# remove I'm not sure and following
data = open(args.input, "r").readlines()
for i in range(len(data)):
data[i] = re.sub(r' I\'m not sure.*', "", data[i])
data[i] = re.sub(r' I\'m" .*', "", data[i])
data[i] = re.sub(r' I\'m.*', "", data[i])
data[i] = re.sub(r' I 50%.*', "", data[i])
data[i] = re.sub(r'Convert the following informal sentence .*Formal:', "", data[i])
data[i] = re.sub(r'Translate this from English to German:.*', "", data[i])
data[i] = re.sub(r' I\..*', "", data[i])
data[i] = re.sub(r' \.\.\.\.\.\..*', "", data[i])
for _ in range(1000):
for i in range(len(data)):
data[i] = re.sub(r'\b(\w+)[ ]\1\b', r'\1', data[i])
if data[-1] == '':
data.pop(-1)
if data[-1][-1] == '\n':
data[-1] = data[-1][:-1]
with open(args.output, "w") as f:
f.writelines(data)