-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsummarize_score.py
More file actions
47 lines (39 loc) · 1.88 KB
/
summarize_score.py
File metadata and controls
47 lines (39 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json
import argparse
from datasets import load_dataset
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--result_dir", type=str, default="./examples/result.json", help="The path to save the evaluation report json file.")
parser.add_argument("--data_path", type=str, default="./data/test-00000-of-00001.parquet", help="The path to the benchmark dataset.")
args = parser.parse_args()
return args
def summerize(result_dict, dataset):
total_ac = 0
total_ps = 0
wrong_discount = 0.7
total_num = 0
for key in result_dict.keys():
try:
total_num += 1
max_score = dataset[int(key)]['total_score']
score = 1.0 if result_dict[key]['is_fully_correct'] else result_dict[key]['final_score'] / max_score * wrong_discount
total_ps += score
total_ac += (result_dict[key]['is_fully_correct'] is True)
except Exception as e:
print(f"Error processing key {key}: {e}")
if total_num == 0:
return 0.0, 0.0
else:
return total_ac / total_num * 100, total_ps / total_num * 100
args = parse_args()
dataset = load_dataset("parquet", data_files={"test": args.data_path})["test"]
with open(args.result_dir, 'r') as f:
result_dict = json.load(f)
result_dict_text = {k: v for k, v in result_dict.items() if dataset[int(k)]['category'] == 'text'}
result_dict_mm = {k: v for k, v in result_dict.items() if dataset[int(k)]['category'] == 'multimodal'}
ac_text, ps_text = summerize(result_dict_text, dataset)
ac_mm, ps_mm = summerize(result_dict_mm, dataset)
ac_overall, ps_overall = summerize(result_dict, dataset)
print(f"Text: Accuracy Score = {ac_text:.2f}, Process Score = {ps_text:.2f}")
print(f"Multimodal: Accuracy Score = {ac_mm:.2f}, Process Score = {ps_mm:.2f}")
print(f"Overall: Accuracy Score = {ac_overall:.2f}, Process Score = {ps_overall:.2f}")