Skip to content

Commit 3c07793

Browse files
authored
Merge pull request #38 from AgentOptimizer/paretocurve
Paretocurve
2 parents a357229 + aad735c commit 3c07793

11 files changed

Lines changed: 319 additions & 56 deletions

examples/advanced_selection_example.py

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,43 @@
2424
# Agent, dataset, and eval_fn (same as custom_agent_example.py)
2525
# ---------------------------------------------------------------------------
2626

27+
2728
class MyAgent:
2829
def __init__(self, models):
2930
self.client = OpenAI()
3031
self.planner_model = models["planner"]
3132
self.solver_model = models["solver"]
3233

3334
def run(self, input_data):
34-
plan = self.client.chat.completions.create(
35-
model=self.planner_model,
36-
messages=[
37-
{"role": "system", "content": "Create a brief plan to answer the question."},
38-
{"role": "user", "content": input_data},
39-
],
40-
).choices[0].message.content
41-
42-
answer = self.client.chat.completions.create(
43-
model=self.solver_model,
44-
messages=[
45-
{"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
46-
{"role": "user", "content": input_data},
47-
],
48-
).choices[0].message.content
35+
plan = (
36+
self.client.chat.completions.create(
37+
model=self.planner_model,
38+
messages=[
39+
{
40+
"role": "system",
41+
"content": "Create a brief plan to answer the question.",
42+
},
43+
{"role": "user", "content": input_data},
44+
],
45+
)
46+
.choices[0]
47+
.message.content
48+
)
49+
50+
answer = (
51+
self.client.chat.completions.create(
52+
model=self.solver_model,
53+
messages=[
54+
{
55+
"role": "system",
56+
"content": f"Follow this plan and answer concisely:\n{plan}",
57+
},
58+
{"role": "user", "content": input_data},
59+
],
60+
)
61+
.choices[0]
62+
.message.content
63+
)
4964
return answer
5065

5166

@@ -71,19 +86,22 @@ def eval_fn(expected, actual):
7186
# Selection algorithms
7287
# ---------------------------------------------------------------------------
7388

89+
7490
def run_auto():
7591
"""method="auto" — automatically picks the best algorithm (default)."""
7692
selector = ModelSelector(
77-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
78-
method="auto",
93+
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset, method="auto",
7994
)
8095
return selector.select_best(parallel=True)
8196

8297

8398
def run_random():
8499
"""method="random" — evaluate a random subset of combinations."""
85100
selector = ModelSelector(
86-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
101+
agent=MyAgent,
102+
models=models,
103+
eval_fn=eval_fn,
104+
dataset=dataset,
87105
method="random",
88106
sample_fraction=0.5, # evaluate 50% of all combinations
89107
)
@@ -93,7 +111,10 @@ def run_random():
93111
def run_hill_climbing():
94112
"""method="hill_climbing" — greedy search using model quality/speed rankings."""
95113
selector = ModelSelector(
96-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
114+
agent=MyAgent,
115+
models=models,
116+
eval_fn=eval_fn,
117+
dataset=dataset,
97118
method="hill_climbing",
98119
batch_size=4, # number of neighbors to evaluate per step
99120
)
@@ -103,7 +124,10 @@ def run_hill_climbing():
103124
def run_arm_elimination():
104125
"""method="arm_elimination" — eliminates statistically dominated combinations early."""
105126
selector = ModelSelector(
106-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
127+
agent=MyAgent,
128+
models=models,
129+
eval_fn=eval_fn,
130+
dataset=dataset,
107131
method="arm_elimination",
108132
)
109133
return selector.select_best(parallel=True)
@@ -112,7 +136,10 @@ def run_arm_elimination():
112136
def run_epsilon_lucb():
113137
"""method="epsilon_lucb" — stops when the best arm is identified within epsilon."""
114138
selector = ModelSelector(
115-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
139+
agent=MyAgent,
140+
models=models,
141+
eval_fn=eval_fn,
142+
dataset=dataset,
116143
method="epsilon_lucb",
117144
epsilon=0.05, # acceptable gap from the true best
118145
)
@@ -122,7 +149,10 @@ def run_epsilon_lucb():
122149
def run_threshold():
123150
"""method="threshold" — classify combinations as above/below a quality threshold."""
124151
selector = ModelSelector(
125-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
152+
agent=MyAgent,
153+
models=models,
154+
eval_fn=eval_fn,
155+
dataset=dataset,
126156
method="threshold",
127157
threshold=0.8, # minimum acceptable accuracy
128158
)
@@ -132,7 +162,10 @@ def run_threshold():
132162
def run_lm_proposal():
133163
"""method="lm_proposal" — use a proposer LLM to shortlist promising combinations."""
134164
selector = ModelSelector(
135-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
165+
agent=MyAgent,
166+
models=models,
167+
eval_fn=eval_fn,
168+
dataset=dataset,
136169
method="lm_proposal",
137170
)
138171
return selector.select_best(parallel=True)
@@ -141,7 +174,10 @@ def run_lm_proposal():
141174
def run_bayesian():
142175
"""method="bayesian" — GP-based Bayesian optimization (requires agentopt[bayesian])."""
143176
selector = ModelSelector(
144-
agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
177+
agent=MyAgent,
178+
models=models,
179+
eval_fn=eval_fn,
180+
dataset=dataset,
145181
method="bayesian",
146182
batch_size=4,
147183
)

examples/ag2_example.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
# run(input_data) runs the agent on a single datapoint and returns the output.
2626
# ---------------------------------------------------------------------------
2727

28+
2829
class MyAgent:
2930
"""AG2 planner+solver agent pair."""
3031

@@ -76,6 +77,7 @@ def run(self, input_data):
7677
# Step 3: Evaluation function — score agent output against expected answer.
7778
# ---------------------------------------------------------------------------
7879

80+
7981
def eval_fn(expected, actual):
8082
return 1.0 if expected.lower() in str(actual).lower() else 0.0
8183

@@ -99,6 +101,7 @@ def eval_fn(expected, actual):
99101

100102
results = selector.select_best(parallel=True)
101103
results.print_summary()
104+
results.plot_pareto()
102105

103106
best = results.get_best_combo()
104107
if best:

examples/crewai_example.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# run(input_data) runs the agent on a single datapoint and returns the output.
2222
# ---------------------------------------------------------------------------
2323

24+
2425
class MyAgent:
2526
"""CrewAI crew with researcher + writer agents."""
2627

@@ -90,6 +91,7 @@ def run(self, input_data):
9091
# Step 3: Evaluation function — score agent output against expected answer.
9192
# ---------------------------------------------------------------------------
9293

94+
9395
def eval_fn(expected, actual):
9496
return 1.0 if expected.lower() in str(actual).lower() else 0.0
9597

@@ -113,6 +115,7 @@ def eval_fn(expected, actual):
113115

114116
results = selector.select_best(parallel=True)
115117
results.print_summary()
118+
results.plot_pareto()
116119

117120
best = results.get_best_combo()
118121
if best:

examples/custom_agent_example.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# run() takes a single datapoint and returns the agent's output.
2727
# ---------------------------------------------------------------------------
2828

29+
2930
class MyAgent:
3031
"""A simple planner+solver agent using the OpenAI SDK."""
3132

@@ -36,22 +37,36 @@ def __init__(self, models):
3637

3738
def run(self, input_data):
3839
# Step 1: Planner generates a plan
39-
plan = self.client.chat.completions.create(
40-
model=self.planner_model,
41-
messages=[
42-
{"role": "system", "content": "You are a planning assistant. Create a brief plan to answer the question."},
43-
{"role": "user", "content": input_data},
44-
],
45-
).choices[0].message.content
40+
plan = (
41+
self.client.chat.completions.create(
42+
model=self.planner_model,
43+
messages=[
44+
{
45+
"role": "system",
46+
"content": "You are a planning assistant. Create a brief plan to answer the question.",
47+
},
48+
{"role": "user", "content": input_data},
49+
],
50+
)
51+
.choices[0]
52+
.message.content
53+
)
4654

4755
# Step 2: Solver executes the plan
48-
answer = self.client.chat.completions.create(
49-
model=self.solver_model,
50-
messages=[
51-
{"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
52-
{"role": "user", "content": input_data},
53-
],
54-
).choices[0].message.content
56+
answer = (
57+
self.client.chat.completions.create(
58+
model=self.solver_model,
59+
messages=[
60+
{
61+
"role": "system",
62+
"content": f"Follow this plan and answer concisely:\n{plan}",
63+
},
64+
{"role": "user", "content": input_data},
65+
],
66+
)
67+
.choices[0]
68+
.message.content
69+
)
5570
return answer
5671

5772

@@ -75,6 +90,7 @@ def run(self, input_data):
7590
# It compares agent output against expected output and returns a score.
7691
# ---------------------------------------------------------------------------
7792

93+
7894
def eval_fn(expected, actual):
7995
return 1.0 if expected.lower() in str(actual).lower() else 0.0
8096

@@ -99,6 +115,7 @@ def eval_fn(expected, actual):
99115

100116
results = selector.select_best(parallel=True)
101117
results.print_summary()
118+
results.plot_pareto()
102119

103120
best = results.get_best_combo()
104121
if best:

examples/langchain_example.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ def search(query: str) -> str:
2828

2929
PROMPT = ChatPromptTemplate.from_messages(
3030
[
31-
("system", "You are a helpful assistant. Use tools when needed to answer questions concisely."),
31+
(
32+
"system",
33+
"You are a helpful assistant. Use tools when needed to answer questions concisely.",
34+
),
3235
("human", "{input}"),
3336
("placeholder", "{agent_scratchpad}"),
3437
]
@@ -41,6 +44,7 @@ def search(query: str) -> str:
4144
# run(input_data) runs the agent on a single datapoint and returns the output.
4245
# ---------------------------------------------------------------------------
4346

47+
4448
class MyAgent:
4549
"""LangChain tool-calling agent."""
4650

@@ -71,6 +75,7 @@ def run(self, input_data):
7175
# Step 3: Evaluation function — score agent output against expected answer.
7276
# ---------------------------------------------------------------------------
7377

78+
7479
def eval_fn(expected, actual):
7580
return 1.0 if expected.lower() in str(actual).lower() else 0.0
7681

@@ -83,16 +88,15 @@ def eval_fn(expected, actual):
8388
if __name__ == "__main__":
8489
selector = ModelSelector(
8590
agent=MyAgent,
86-
models={
87-
"agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],
88-
},
91+
models={"agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],},
8992
eval_fn=eval_fn,
9093
dataset=dataset,
9194
method="brute_force", # or "auto" for smarter selection algorithms
9295
)
9396

9497
results = selector.select_best(parallel=True)
9598
results.print_summary()
99+
results.plot_pareto()
96100

97101
best = results.get_best_combo()
98102
if best:

examples/langgraph_example.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class AgentState(TypedDict):
3131
# run(input_data) runs the agent on a single datapoint and returns the output.
3232
# ---------------------------------------------------------------------------
3333

34+
3435
class MyAgent:
3536
"""LangGraph planner+solver agent."""
3637

@@ -40,15 +41,23 @@ def __init__(self, models):
4041

4142
def planner_node(state: AgentState) -> dict:
4243
response = planner_llm.invoke(
43-
[{"role": "system", "content": "Create a brief plan to answer the question."}]
44+
[
45+
{
46+
"role": "system",
47+
"content": "Create a brief plan to answer the question.",
48+
}
49+
]
4450
+ state["messages"]
4551
)
4652
return {"plan": response.content}
4753

4854
def solver_node(state: AgentState) -> dict:
4955
response = solver_llm.invoke(
5056
[
51-
{"role": "system", "content": f"Follow this plan and answer concisely:\n{state['plan']}"},
57+
{
58+
"role": "system",
59+
"content": f"Follow this plan and answer concisely:\n{state['plan']}",
60+
},
5261
state["messages"][-1],
5362
]
5463
)
@@ -63,7 +72,9 @@ def solver_node(state: AgentState) -> dict:
6372
self._app = graph.compile()
6473

6574
def run(self, input_data):
66-
result = self._app.invoke({"messages": [{"role": "user", "content": input_data}]})
75+
result = self._app.invoke(
76+
{"messages": [{"role": "user", "content": input_data}]}
77+
)
6778
return result["answer"]
6879

6980

@@ -82,6 +93,7 @@ def run(self, input_data):
8293
# Step 3: Evaluation function — score agent output against expected answer.
8394
# ---------------------------------------------------------------------------
8495

96+
8597
def eval_fn(expected, actual):
8698
return 1.0 if expected.lower() in str(actual).lower() else 0.0
8799

@@ -105,6 +117,7 @@ def eval_fn(expected, actual):
105117

106118
results = selector.select_best(parallel=True)
107119
results.print_summary()
120+
results.plot_pareto()
108121

109122
best = results.get_best_combo()
110123
if best:

0 commit comments

Comments
 (0)