-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcalc_meteor.py
More file actions
109 lines (90 loc) · 2.54 KB
/
calc_meteor.py
File metadata and controls
109 lines (90 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import nltk
import numpy as np
import pandas as pd
import torch
from utils.text import CaptionProcessor
from utils.datacreator import CaptionGenderDataset
from nltk.translate.meteor_score import meteor_score
# nltk.download('wordnet')
GLOVE_PATH = "./glove.6B.50d.w2vformat.txt"
SUB_MODEL = (
"bert" # Use glove for fasttext as well! IDK why it is not working as expected
)
DEVICE = torch.device("cpu") # Force CPU to avoid CUDA-related segfaults
MASCULINE = [
"man",
"men",
"male",
"father",
"gentleman",
"boy",
"uncle",
"husband",
"actor",
"prince",
"waiter",
"he",
"his",
"him",
]
FEMININE = [
"woman",
"women",
"female",
"mother",
"lady",
"girl",
"aunt",
"wife",
"actress",
"princess",
"waitress",
"she",
"her",
"hers",
]
GENDER_WORDS = MASCULINE + FEMININE
GENDER_TOKEN = "<unk>"
HUMAN_ANN_PATH = "./data/gender_obj_cap_mw_entries.pkl"
MODEL_ANN_PATH = "./data/new_models/no_masking/bakllava.pkl"
data_obj = CaptionGenderDataset(HUMAN_ANN_PATH, MODEL_ANN_PATH)
ann_data = data_obj.getDataCombined()
object_presence_df = data_obj.get_object_presence_df()
OBJ_WORDS = object_presence_df.columns.tolist()
OBJ_TOKEN = "<obj>"
NUM_OBJS = len(OBJ_WORDS)
capProcessor = CaptionProcessor(
GENDER_WORDS,
OBJ_WORDS,
gender_token=GENDER_TOKEN,
obj_token=OBJ_TOKEN,
glove_path=GLOVE_PATH,
model_type=SUB_MODEL,
device=DEVICE,
)
def captionPreprocess(
human_captions: pd.Series,
model_captions: pd.Series,
mode="gender",
similarity_threshold=0.5,
maskType="contextual",
): # type: ignore
model_captions = capProcessor.maskWords(model_captions, mode=mode)
human_captions = capProcessor.maskWords(human_captions, mode=mode)
human_captions, model_captions = capProcessor.equalize_vocab(
human_captions,
model_captions,
similarity_threshold=similarity_threshold,
maskType=maskType,
bidirectional=False,
)
return human_captions, model_captions
def calculate_meteor(refs, candidate):
return meteor_score([item.split() for item in refs], candidate.split())
human_ann = ann_data["caption_human"]
model_ann = ann_data["caption_model"].iloc[::5]
human_cap, model_cap = captionPreprocess(human_ann, model_ann)
vals = np.zeros(len(model_ann))
for i in range(len(model_ann)):
vals[i] = calculate_meteor([human_ann[i]], human_cap[i])
print(f"{vals.mean()=}")