-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathEffilearner Script
More file actions
71 lines (48 loc) · 1.77 KB
/
Effilearner Script
File metadata and controls
71 lines (48 loc) · 1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Clone the repo (once)
!git clone https://github.com/huangd1999/EffiLearner.git
# Move into the repo
%cd EffiLearner
# Install the pinned OpenAI SDK and Hugging Face tools
!pip install --upgrade openai==0.28.0
!pip install --upgrade datasets transformers fsspec
from getpass import getpass
import os
# Prompt once for your key; it will be picked up by the SDK
if "OPENAI_API_KEY" not in os.environ:
os.environ["OPENAI_API_KEY"] = getpass("OpenAI API key: ")
from datasets import load_dataset
import json, os
# 1) Load the full BigOBench train split
bb = load_dataset("facebook/BigOBench", split="train")
# 2) Shuffle and take x amount of examples (10 in this case)
sampled = bb.shuffle(seed=42).select(range(10))
# 3) Transform into the simple {prompt,reference} format
effibench_like = [
{
"prompt": ex["description"],
"reference": ex.get("solution_code","")
}
for ex in sampled
]
# 4) Write into the repository’s datasets/ folder
os.makedirs("datasets", exist_ok=True)
with open("datasets/dataset.json", "w") as f:
json.dump(effibench_like, f, indent=2)
# 5) Confirm
print("Wrote", len(effibench_like), "examples to datasets/dataset.json")
import json
path = "datasets/dataset.json"
data = json.load(open(path))
for ex in data:
ex["markdown_description"] = ex["prompt"]
ex["small_test_cases"] = [ { "input": ex["prompt"] } ]
with open(path, "w") as f:
json.dump(data, f, indent=2)
print("Injected markdown_description + small_test_cases into dataset.json")
# Go into the src folder so the relative paths line up
%cd src
# Run the code‐generation step on your 10 examples
!python gpt_generation.py \
--checkpoint gpt-4 \
--dataset EffiBench
python gpt_EffiLearner.py --checkpoint gpt-4 --dataset ../datasets/dataset_gpt-4.json