AgentOptimizer
diff --git a/‎examples/advanced_selection_example.py‎
Lines changed: 60 additions & 24 deletions b/‎examples/advanced_selection_example.py‎
Lines changed: 60 additions & 24 deletions
diff --git a/‎examples/ag2_example.py‎
Lines changed: 3 additions & 0 deletions b/‎examples/ag2_example.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/crewai_example.py‎
Lines changed: 3 additions & 0 deletions b/‎examples/crewai_example.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/custom_agent_example.py‎
Lines changed: 31 additions & 14 deletions b/‎examples/custom_agent_example.py‎
Lines changed: 31 additions & 14 deletions
diff --git a/‎examples/langchain_example.py‎
Lines changed: 8 additions & 4 deletions b/‎examples/langchain_example.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎examples/langgraph_example.py‎
Lines changed: 16 additions & 3 deletions b/‎examples/langgraph_example.py‎
Lines changed: 16 additions & 3 deletions
@@ -24,28 +24,43 @@
 # Agent, dataset, and eval_fn (same as custom_agent_example.py)
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     def __init__(self, models):
         self.client = OpenAI()
         self.planner_model = models["planner"]
         self.solver_model = models["solver"]
 
     def run(self, input_data):
-        plan = self.client.chat.completions.create(
-            model=self.planner_model,
-            messages=[
-                {"role": "system", "content": "Create a brief plan to answer the question."},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
-
-        answer = self.client.chat.completions.create(
-            model=self.solver_model,
-            messages=[
-                {"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
+        plan = (
+            self.client.chat.completions.create(
+                model=self.planner_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "Create a brief plan to answer the question.",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
+
+        answer = (
+            self.client.chat.completions.create(
+                model=self.solver_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f"Follow this plan and answer concisely:\n{plan}",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
         return answer
 
 
@@ -71,19 +86,22 @@ def eval_fn(expected, actual):
 # Selection algorithms
 # ---------------------------------------------------------------------------
 
+
 def run_auto():
     """method="auto" — automatically picks the best algorithm (default)."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
-        method="auto",
+        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset, method="auto",
     )
     return selector.select_best(parallel=True)
 
 
 def run_random():
     """method="random" — evaluate a random subset of combinations."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="random",
         sample_fraction=0.5,  # evaluate 50% of all combinations
     )
@@ -93,7 +111,10 @@ def run_random():
 def run_hill_climbing():
     """method="hill_climbing" — greedy search using model quality/speed rankings."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="hill_climbing",
         batch_size=4,  # number of neighbors to evaluate per step
     )
@@ -103,7 +124,10 @@ def run_hill_climbing():
 def run_arm_elimination():
     """method="arm_elimination" — eliminates statistically dominated combinations early."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="arm_elimination",
     )
     return selector.select_best(parallel=True)
@@ -112,7 +136,10 @@ def run_arm_elimination():
 def run_epsilon_lucb():
     """method="epsilon_lucb" — stops when the best arm is identified within epsilon."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="epsilon_lucb",
         epsilon=0.05,  # acceptable gap from the true best
     )
@@ -122,7 +149,10 @@ def run_epsilon_lucb():
 def run_threshold():
     """method="threshold" — classify combinations as above/below a quality threshold."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="threshold",
         threshold=0.8,  # minimum acceptable accuracy
     )
@@ -132,7 +162,10 @@ def run_threshold():
 def run_lm_proposal():
     """method="lm_proposal" — use a proposer LLM to shortlist promising combinations."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="lm_proposal",
     )
     return selector.select_best(parallel=True)
@@ -141,7 +174,10 @@ def run_lm_proposal():
 def run_bayesian():
     """method="bayesian" — GP-based Bayesian optimization (requires agentopt[bayesian])."""
     selector = ModelSelector(
-        agent=MyAgent, models=models, eval_fn=eval_fn, dataset=dataset,
+        agent=MyAgent,
+        models=models,
+        eval_fn=eval_fn,
+        dataset=dataset,
         method="bayesian",
         batch_size=4,
     )
 
@@ -25,6 +25,7 @@
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """AG2 planner+solver agent pair."""
 
@@ -76,6 +77,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
@@ -99,6 +101,7 @@ def eval_fn(expected, actual):
 
     results = selector.select_best(parallel=True)
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
 
@@ -21,6 +21,7 @@
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """CrewAI crew with researcher + writer agents."""
 
@@ -90,6 +91,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
@@ -113,6 +115,7 @@ def eval_fn(expected, actual):
 
     results = selector.select_best(parallel=True)
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
 
@@ -26,6 +26,7 @@
 # run() takes a single datapoint and returns the agent's output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """A simple planner+solver agent using the OpenAI SDK."""
 
@@ -36,22 +37,36 @@ def __init__(self, models):
 
     def run(self, input_data):
         # Step 1: Planner generates a plan
-        plan = self.client.chat.completions.create(
-            model=self.planner_model,
-            messages=[
-                {"role": "system", "content": "You are a planning assistant. Create a brief plan to answer the question."},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
+        plan = (
+            self.client.chat.completions.create(
+                model=self.planner_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are a planning assistant. Create a brief plan to answer the question.",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
 
         # Step 2: Solver executes the plan
-        answer = self.client.chat.completions.create(
-            model=self.solver_model,
-            messages=[
-                {"role": "system", "content": f"Follow this plan and answer concisely:\n{plan}"},
-                {"role": "user", "content": input_data},
-            ],
-        ).choices[0].message.content
+        answer = (
+            self.client.chat.completions.create(
+                model=self.solver_model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": f"Follow this plan and answer concisely:\n{plan}",
+                    },
+                    {"role": "user", "content": input_data},
+                ],
+            )
+            .choices[0]
+            .message.content
+        )
         return answer
 
 
@@ -75,6 +90,7 @@ def run(self, input_data):
 # It compares agent output against expected output and returns a score.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
@@ -99,6 +115,7 @@ def eval_fn(expected, actual):
 
     results = selector.select_best(parallel=True)
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
 
@@ -28,7 +28,10 @@ def search(query: str) -> str:
 
 PROMPT = ChatPromptTemplate.from_messages(
     [
-        ("system", "You are a helpful assistant. Use tools when needed to answer questions concisely."),
+        (
+            "system",
+            "You are a helpful assistant. Use tools when needed to answer questions concisely.",
+        ),
         ("human", "{input}"),
         ("placeholder", "{agent_scratchpad}"),
     ]
@@ -41,6 +44,7 @@ def search(query: str) -> str:
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """LangChain tool-calling agent."""
 
@@ -71,6 +75,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
@@ -83,16 +88,15 @@ def eval_fn(expected, actual):
 if __name__ == "__main__":
     selector = ModelSelector(
         agent=MyAgent,
-        models={
-            "agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],
-        },
+        models={"agent": ["gpt-4o", "gpt-4o-mini", "gpt-4.1-nano"],},
         eval_fn=eval_fn,
         dataset=dataset,
         method="brute_force",  # or "auto" for smarter selection algorithms
     )
 
     results = selector.select_best(parallel=True)
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best:
 
@@ -31,6 +31,7 @@ class AgentState(TypedDict):
 # run(input_data) runs the agent on a single datapoint and returns the output.
 # ---------------------------------------------------------------------------
 
+
 class MyAgent:
     """LangGraph planner+solver agent."""
 
@@ -40,15 +41,23 @@ def __init__(self, models):
 
         def planner_node(state: AgentState) -> dict:
             response = planner_llm.invoke(
-                [{"role": "system", "content": "Create a brief plan to answer the question."}]
+                [
+                    {
+                        "role": "system",
+                        "content": "Create a brief plan to answer the question.",
+                    }
+                ]
                 + state["messages"]
             )
             return {"plan": response.content}
 
         def solver_node(state: AgentState) -> dict:
             response = solver_llm.invoke(
                 [
-                    {"role": "system", "content": f"Follow this plan and answer concisely:\n{state['plan']}"},
+                    {
+                        "role": "system",
+                        "content": f"Follow this plan and answer concisely:\n{state['plan']}",
+                    },
                     state["messages"][-1],
                 ]
             )
@@ -63,7 +72,9 @@ def solver_node(state: AgentState) -> dict:
         self._app = graph.compile()
 
     def run(self, input_data):
-        result = self._app.invoke({"messages": [{"role": "user", "content": input_data}]})
+        result = self._app.invoke(
+            {"messages": [{"role": "user", "content": input_data}]}
+        )
         return result["answer"]
 
 
@@ -82,6 +93,7 @@ def run(self, input_data):
 # Step 3: Evaluation function — score agent output against expected answer.
 # ---------------------------------------------------------------------------
 
+
 def eval_fn(expected, actual):
     return 1.0 if expected.lower() in str(actual).lower() else 0.0
 
@@ -105,6 +117,7 @@ def eval_fn(expected, actual):
 
     results = selector.select_best(parallel=True)
     results.print_summary()
+    results.plot_pareto()
 
     best = results.get_best_combo()
     if best: