-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark_batch.py
More file actions
55 lines (46 loc) · 1.5 KB
/
benchmark_batch.py
File metadata and controls
55 lines (46 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import json
import re
import time
from collections import defaultdict
import pandas as pd
from datasets import load_dataset
from more_itertools import chunked
from tqdm import tqdm
import pyle
def parse_header(theorem: str):
pattern = r"^import .*$"
header = []
rest = []
for line in (l for l in theorem.splitlines() if len(l.strip())):
if re.match(pattern, line):
header.append(line.strip())
else:
rest.append(line)
return header, "\n".join(rest).strip()
if __name__ == "__main__":
dataset = load_dataset( "Goedel-LM/Lean-workbook-proofs", split="train[:25]", num_proc=1)
dataset = dataset["full_proof"]
# dataset = pd.read_json("benchmark.json")["full_theorem"]
start = time.time()
response, duration, state_cache = pyle.evaluate("import Mathlib\nimport Aesop\n--asdf", state_cache=None, timeout=1)
import_end = time.time()
print(
f"Import took: {import_end - start:0.3f}s, or {duration / 1000:0.3f}s measured by lean"
)
results = defaultdict(list)
problems = list(dataset)
responses, durations, state_cache = pyle.evaluate_many(
problems,
state_cache=state_cache,
timeout=20_000,
n_workers=8,
)
results = [
{**json.loads(resp), "duration": duration}
for resp, duration in zip(responses, durations)
]
end = time.time()
print(f"Total time: {end - import_end:0.3f}s")
df = pd.DataFrame(results)
print(df)
df.to_json("benchmark.json")