-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_all_evals.py
More file actions
78 lines (63 loc) · 1.82 KB
/
run_all_evals.py
File metadata and controls
78 lines (63 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
"""
批量评估 - 仅运行 evaluate,不重新导入
适用于已经通过 batch_import.py 导入数据的情况
"""
import subprocess
import sys
from pathlib import Path
SPLITS = [
# "conv-26", "conv-30", "conv-41", "conv-42", "conv-43", # Batch 1 - 已完成
"conv-44", "conv-47", "conv-48", "conv-49", "conv-50" # Batch 2
]
def run_command(cmd: list) -> bool:
"""运行命令,返回是否成功"""
try:
result = subprocess.run(
cmd,
text=True,
capture_output=False, # 实时显示输出
check=False
)
return result.returncode == 0
except Exception as e:
print(f"[ERROR] Command failed: {e}")
return False
def main():
print("=" * 60)
print("Running evaluations (without import)")
print("=" * 60)
print()
success_count = 0
failed_splits = []
for i, conv_id in enumerate(SPLITS, 1):
print()
print("=" * 60)
print(f"[{i}/{len(SPLITS)}] Evaluating {conv_id}")
print("=" * 60)
print()
eval_success = run_command(
[sys.executable, "evaluate.py", "gen", "-c", conv_id]
)
if not eval_success:
print(f"[ERROR] Evaluation failed for {conv_id}")
failed_splits.append(conv_id)
else:
success_count += 1
print()
print(f"[Done] {conv_id} completed ✓")
print()
# Summary
print()
print("=" * 60)
print("Summary")
print("=" * 60)
print(f"Successful: {success_count}/{len(SPLITS)}")
if failed_splits:
print(f"\nFailed splits:")
for split in failed_splits:
print(f" - {split}")
else:
print("\nAll evaluations completed successfully! ✓")
if __name__ == "__main__":
main()