From c8c5134ff66baee76fd104c39ab47fadfb83937a Mon Sep 17 00:00:00 2001
From: Ananta Shahane <ananta.shahane@aol.com>
Date: Fri, 30 Jan 2026 13:47:01 +0100
Subject: [PATCH 1/4] Auto-correlation fixed.

---
 .../analysis/auto_correlation_base_spec.py    | 18 +++++------
 .../analysis/auto_correlation_ineq1.py        | 13 ++++----
 .../analysis/auto_correlation_ineq2.py        | 11 ++++---
 .../analysis/auto_correlation_ineq3.py        | 12 ++++----
 .../analysis/get_analysis_problems.py         |  6 ++--
 iohblade/solution.py                          | 30 ++++++++++++++++++-
 run_benchmarks/auto-correlation.py            | 14 ++++-----
 7 files changed, 63 insertions(+), 41 deletions(-)

diff --git a/iohblade/benchmarks/analysis/auto_correlation_base_spec.py b/iohblade/benchmarks/analysis/auto_correlation_base_spec.py
index 02b480b..8b15a10 100644
--- a/iohblade/benchmarks/analysis/auto_correlation_base_spec.py
+++ b/iohblade/benchmarks/analysis/auto_correlation_base_spec.py
@@ -2,7 +2,7 @@
 from numpy.typing import NDArray
 from typing import Optional
 
-from iohblade.misc.prepare_namespace import prepare_namespace, clean_local_namespace
+from iohblade.misc.prepare_namespace import prepare_namespace
 
 """
     Autocorrelation measures how similar a signal is to a shifted version of itself.
@@ -138,30 +138,26 @@ def make_format_prompt(self):
 
 """
 
-    def _get_time_series(self, code) -> tuple[NDArray[np.float64], Optional[Exception]]:
+    def _get_time_series(self, code, name) -> tuple[NDArray[np.float64], Optional[Exception]]:
         local_parameters = {}
 
         allowed = ["numpy", "scipy"]
 
         try:
             global_parameters = prepare_namespace(code, allowed)
-            exec(code, global_parameters, local_parameters)
-            local_parameters = clean_local_namespace(
-                local_parameters, global_parameters
-            )
-            cls = next(v for v in local_parameters.values() if isinstance(v, type))
-            try:
+            compiled_code = compile(code, name, 'exec')
+            exec(compiled_code, global_parameters, local_parameters)
+            cls = local_parameters[name]
+            if self.best_solution:
                 f = np.asarray(
                     cls(best_known_configuration=self.best_known)(), dtype=np.float64
                 )  # Runs if class has __init__(self, best_known_solution)
-            except:
+            else:
                 f = np.asarray(
                     cls()(), dtype=np.float64
                 )  # Rollback to empty initantiation.
-
             return f, None
         except Exception as e:
-            print("\t Exception in `auto_correlation_ineq1.py`, " + e.__repr__())
             return (
                 np.ndarray(
                     [
diff --git a/iohblade/benchmarks/analysis/auto_correlation_ineq1.py b/iohblade/benchmarks/analysis/auto_correlation_ineq1.py
index e92eeae..200ecc4 100644
--- a/iohblade/benchmarks/analysis/auto_correlation_ineq1.py
+++ b/iohblade/benchmarks/analysis/auto_correlation_ineq1.py
@@ -35,7 +35,7 @@ def __init__(
         Problem.__init__(self, name=self.task_name)
 
         self.task_prompt = self.make_task_prompt("minimize  max_t (f*f)(t) / (∫ f)^2")
-        self.example_prompt = self.make_example_prompt("AutoCorrCandidate")
+        self.example_prompt = self.make_example_prompt("AutoCorrCandidate_1")
         self.format_prompt = self.make_format_prompt()
         self.dependencies += [
             "scipy"
@@ -49,12 +49,11 @@ def evaluate(self, solution: Solution) -> Solution:
         code = solution.code
 
         try:
-            f, err = self._get_time_series(code)
+            f, err = self._get_time_series(code, solution.name)
             if err is not None:
                 raise err
         except Exception as e:
-            print("\t Exception in `auto_correlation_ineq1.py`, " + e.__repr__())
-            solution.set_scores(float("inf"), f"exec-error {e}", "exec-failed")
+            solution = solution.set_scores(float("inf"), e)
             return solution
 
         try:
@@ -70,11 +69,11 @@ def evaluate(self, solution: Solution) -> Solution:
                 raise ValueError("Integral ∫f must be > 0 for C1")
 
             score = float(np.max(g) / (I * I))  # minimize
-            solution.set_scores(
-                score, f"C1 ratio = {score:.6g}, best known = {self.best_known:.6g}"
+            solution = solution.set_scores(
+                score, f"C1 ratio = {score:.6g}, best known = {self.best_known:.6g}; soln={f}"
             )
         except Exception as e:
-            solution.set_scores(float("inf"), f"calc-error {e}", "calc-failed")
+            solution = solution.set_scores(float("inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution: Solution) -> Solution:
diff --git a/iohblade/benchmarks/analysis/auto_correlation_ineq2.py b/iohblade/benchmarks/analysis/auto_correlation_ineq2.py
index 020aaa8..53d38e3 100644
--- a/iohblade/benchmarks/analysis/auto_correlation_ineq2.py
+++ b/iohblade/benchmarks/analysis/auto_correlation_ineq2.py
@@ -13,7 +13,7 @@ class AutoCorrIneq2(AutoCorrBaseSpec, Problem):
         functionality.
         Optimisation:
             \[\min -(||f*f||_2^2 / (||f*f||_1 • ||f*f||_\infty))\]
-        Best known auto-correlation 1 score by alpha evolve: is C_2 >= 0.8962 (prev 0.8892).
+        Best known auto-correlation 2 score by alpha evolve: is C_2 >= 0.8962 (prev 0.8892).
     """
 
     def __init__(
@@ -42,12 +42,11 @@ def evaluate(self, solution: Solution) -> Solution:
         code = solution.code
 
         try:
-            f, err = self._get_time_series(code)
+            f, err = self._get_time_series(code, name=solution.name)
             if err is not None:
                 raise err
         except Exception as e:
-            print("\t Exception in `auto_correlation_ineq2.py`, " + e.__repr__())
-            solution.set_scores(float("-inf"), f"exec-error {e}", "exec-failed")
+            solution = solution.set_scores(float("-inf"), f"exec-error {e}", e)
             return solution
 
         try:
@@ -67,11 +66,11 @@ def evaluate(self, solution: Solution) -> Solution:
                 raise ValueError("Denominator zero in C2 ratio")
 
             score = L2sq / den  # maximize in paper
-            solution.set_scores(
+            solution = solution.set_scores(
                 score, f"C2 ratio = {score:.6g}, best known = {self.best_known:.6g}"
             )
         except Exception as e:
-            solution.set_scores(float("-inf"), f"calc-error {e}", "calc-failed")
+            solution = solution.set_scores(float("-inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution: Solution) -> Solution:
diff --git a/iohblade/benchmarks/analysis/auto_correlation_ineq3.py b/iohblade/benchmarks/analysis/auto_correlation_ineq3.py
index 1eae3bc..82d63f6 100644
--- a/iohblade/benchmarks/analysis/auto_correlation_ineq3.py
+++ b/iohblade/benchmarks/analysis/auto_correlation_ineq3.py
@@ -13,7 +13,7 @@ class AutoCorrIneq3(AutoCorrBaseSpec, Problem):
         functionality.
         Optimisation:
             \[\max_t |||f*f||(t)| / (∫f)^2 \]
-        Best known auto-correlation 1 score by alpha evolve: is C_3 <= 1.4557 (prev 1.4581).
+        Best known auto-correlation 3 score by alpha evolve: is C_3 <= 1.4557 (prev 1.4581).
     """
 
     def __init__(
@@ -29,7 +29,7 @@ def __init__(
         Problem.__init__(self, name=self.task_name)
 
         self.task_prompt = self.make_task_prompt("minimize  max_t |(f*f)(t)| / (∫ f)^2")
-        self.example_prompt = self.make_example_prompt("AutoCorreCandidate_2")
+        self.example_prompt = self.make_example_prompt("AutoCorreCandidate_3")
         self.format_prompt = self.make_format_prompt()
 
         self.dependencies += ["scipy"]
@@ -40,12 +40,12 @@ def evaluate(self, solution: Solution) -> Solution:
         code = solution.code
 
         try:
-            f, err = self._get_time_series(code)
+            f, err = self._get_time_series(code, name=solution.name)
             if err is not None:
                 raise err
         except Exception as e:
             print("\t Exception in `auto_correlation_ineq3.py`, " + e.__repr__())
-            solution.set_scores(float("inf"), f"exec-error {e}", "exec-failed")
+            solution = solution.set_scores(float("inf"), f"exec-error {e}", e)
             return solution
 
         try:
@@ -59,11 +59,11 @@ def evaluate(self, solution: Solution) -> Solution:
                 raise ValueError("Integral ∫f must be nonzero for C3")
 
             score = float(np.max(np.abs(g)) / (I * I))  # minimize
-            solution.set_scores(
+            solution = solution.set_scores(
                 score, f"C3 ratio = {score:.6g}, best known = {self.best_known:.6g}"
             )
         except Exception as e:
-            solution.set_scores(float("inf"), f"calc-error {e}", "calc-failed")
+            solution = solution.set_scores(float("inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution: Solution) -> Solution:
diff --git a/iohblade/benchmarks/analysis/get_analysis_problems.py b/iohblade/benchmarks/analysis/get_analysis_problems.py
index 96dd55d..5c38df0 100644
--- a/iohblade/benchmarks/analysis/get_analysis_problems.py
+++ b/iohblade/benchmarks/analysis/get_analysis_problems.py
@@ -1068,9 +1068,9 @@ def get_analysis_problems(use_best: bool) -> list[AutoCorrBaseSpec]:
 
     Returns:
         An array of benchmark objects as follows:
-            array[0] = Auto Correlation Inrquality 1
-            array[1] = Auto Correlation Inrquality 2
-            array[2] = Auto Correlation Inrquality 3
+            array[0] = Auto Correlation Inequality 1
+            array[1] = Auto Correlation Inequality 2
+            array[2] = Auto Correlation Inequality 3
 
     """
     if use_best:
diff --git a/iohblade/solution.py b/iohblade/solution.py
index 872c549..c838ff1 100644
--- a/iohblade/solution.py
+++ b/iohblade/solution.py
@@ -1,7 +1,8 @@
 import json
 import uuid
-
+import traceback
 import numpy as np
+from typing import Optional
 
 
 class Solution:
@@ -89,6 +90,33 @@ def set_scores(self, fitness, feedback="", error=""):
         self.feedback = feedback
         self.error = error
         return self
+    
+    def set_scores(
+        self, fitness: float, feedback="", error: Optional[Exception] = None
+    ):
+        """
+            Set the score of current instance of individual.
+        Args:
+            `fitness: float | Fitness`: Fitness/Score of the individual. It is of type `float` when single objective, or `Fitness` when multi-objective.
+            `Feedback: str` feedback for the LLM, suggest improvements or target score.
+            `error: Exception`: Exception object encountered during `exec` of the code block.
+        """
+        self.fitness = fitness
+        self.feedback = feedback
+
+        if error:
+            tb = traceback.extract_tb(error.__traceback__)[-1]
+            line_no = tb.lineno
+            code_line = ""
+            code_lines = self.code.split("\n")
+            if line_no and len(code_lines) >= line_no:
+                code_line = code_lines[line_no - 1]
+            error_type = type(error).__name__
+            error_msg = str(error)
+            self.error = f"{error_type}: {error_msg}.\n"
+            if tb.filename != "<string>" or tb.filename != self.name:
+                self.error += f"On line {line_no}: {code_line}.\n"
+        return self
 
     def get_summary(self):
         """
diff --git a/run_benchmarks/auto-correlation.py b/run_benchmarks/auto-correlation.py
index 2c4d6d3..68f4c1e 100644
--- a/run_benchmarks/auto-correlation.py
+++ b/run_benchmarks/auto-correlation.py
@@ -1,5 +1,5 @@
 from iohblade.experiment import Experiment
-from iohblade.llm import Gemini_LLM, Ollama_LLM
+from iohblade.llm import Ollama_LLM
 from iohblade.methods import LLaMEA
 from iohblade.loggers import ExperimentLogger
 from os import environ
@@ -15,26 +15,26 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM("gemma3:12b")
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     # Select the instances of Auto-Correlation 1-3.
     # ===============================================
-    autocorrineq = get_analysis_problems(use_best=True)[2]
+    autocorrineq = get_analysis_problems(use_best=False)[2]
     # ================================================
 
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
             n_offspring=1,
             budget=budget,
-            minimization=autocorrineq.minimisation,         
+            minimization=autocorrineq.minimisation,  
         )
         methods.append(method)
-    logger = ExperimentLogger(f"results/{autocorrineq.task_name}")
+    logger = ExperimentLogger(f"results/Autocorrelation_Inequality")
     experiment = Experiment(
         methods,
         [autocorrineq],

From 8d9bd6e255b4eafddc40030cf1f566674835806e Mon Sep 17 00:00:00 2001
From: Ananta Shahane <ananta.shahane@aol.com>
Date: Mon, 2 Feb 2026 11:40:06 +0100
Subject: [PATCH 2/4] Resolved erdos-min-overlap

---
 .../combinatorics/erdos_min_overlap.py        | 24 +++++++++----------
 run_benchmarks/auto-correlation.py            |  2 +-
 run_benchmarks/erdos-min-overlap.py           |  8 +++----
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/iohblade/benchmarks/combinatorics/erdos_min_overlap.py b/iohblade/benchmarks/combinatorics/erdos_min_overlap.py
index 6b5f1c0..aaa6146 100644
--- a/iohblade/benchmarks/combinatorics/erdos_min_overlap.py
+++ b/iohblade/benchmarks/combinatorics/erdos_min_overlap.py
@@ -54,7 +54,7 @@ def __init__(
     * Overlap functional uses zero-extension of g outside [-1,1].
     * Optimize the objective:
         * minimize  sup_{x ∈ [-2,2]} ∫_{-1}^{1} f(t) · g(x+t) dt,  with g = 1 - f
-    * Do not use scipy's interp1d, it is no longer supported.
+    * Do not use scipy's interp1d, it is no depricated.
 """
 
         self.task_prompt += f"""
@@ -121,27 +121,27 @@ def _sup_overlap(self, f: np.ndarray, g: np.ndarray) -> float:
     def evaluate(self, solution: Solution, explogger=None):
         local_ns = {}
         code = solution.code
-
+        name = solution.name if solution.name else "ErdosCandidate"
         try:
             safe_globals = prepare_namespace(code, self.dependencies)
+            compiled = compile(code, filename=name, mode="exec")
+            exec(compiled, safe_globals, local_ns)
+            
+            cls = local_ns[name]
 
-            exec(code, safe_globals, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe_globals)
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-
-            try:
+            if self.best_solution is not None:
                 f = np.asarray(
                     cls(best_known_configuration=self.best_solution)(), dtype=np.float64
                 )
-            except:
+            else:
                 f = np.asarray(cls()(), dtype=np.float64)
         except Exception as e:
-            solution.set_scores(float("inf"), f"exec-error {e}", "exec-failed")
+            solution = solution.set_scores(float("inf"), f"exec-error {e}", e)
             return solution
 
         try:
             if f.ndim != 1 or f.size != self.n_bins:
-                raise ValueError(f"f must be length {self.n_bins}")
+                raise ValueError(f"f must be length {self.n_bins} got {f.shape}.")
             # bounds f ∈ [0,1] within tolerance
             if np.any(f < -self.tolerance) or np.any(f > 1.0 + self.tolerance):
                 raise ValueError("entries of f must lie in [0,1]")
@@ -155,9 +155,9 @@ def evaluate(self, solution: Solution, explogger=None):
 
             score = self._sup_overlap(f, g)  # minimize
             msg = f"Score = {score:.6g}; with configuration: N={self.n_bins}, dx={dx:.6g}, If={I_f:.6g}.\n\t Best known score = {self.best_known}"
-            solution.set_scores(score, msg)
+            solution = solution.set_scores(score, msg)
         except Exception as e:
-            solution.set_scores(float("inf"), f"calc-error {e}", "calc-failed")
+            solution = solution.set_scores(float("inf"), f"calc-error {e}")
 
         return solution
 
diff --git a/run_benchmarks/auto-correlation.py b/run_benchmarks/auto-correlation.py
index 68f4c1e..e5932af 100644
--- a/run_benchmarks/auto-correlation.py
+++ b/run_benchmarks/auto-correlation.py
@@ -31,7 +31,7 @@
             n_parents=1,
             n_offspring=1,
             budget=budget,
-            minimization=autocorrineq.minimisation,  
+            minimization=autocorrineq.minimisation
         )
         methods.append(method)
     logger = ExperimentLogger(f"results/Autocorrelation_Inequality")
diff --git a/run_benchmarks/erdos-min-overlap.py b/run_benchmarks/erdos-min-overlap.py
index cd9429f..876327b 100644
--- a/run_benchmarks/erdos-min-overlap.py
+++ b/run_benchmarks/erdos-min-overlap.py
@@ -13,13 +13,13 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     erdos_min_overlap = get_combinatorics_problems(True)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
@@ -28,7 +28,7 @@
             minimization=erdos_min_overlap.minimisation,
         )
         methods.append(method)
-    logger = ExperimentLogger(f"results/{erdos_min_overlap.name}")
+    logger = ExperimentLogger(f"results/Erdös_Min_Overlap")
     experiment = Experiment(
         methods,
         [erdos_min_overlap],

From ff97b5f6982b594c763ac1e3dddb7020f5fe8397 Mon Sep 17 00:00:00 2001
From: "A.A. Shahane" <shahanea@neocortex.liacs.nl>
Date: Mon, 2 Feb 2026 15:09:35 +0000
Subject: [PATCH 3/4] Geometry Problems fixed.

---
 iohblade/benchmarks/fourier/fourier_base.py   | 15 ++++++----
 .../benchmarks/fourier/get_fourier_problem.py |  5 +++-
 .../fourier/uncertainty_inequality.py         | 25 ++++++++--------
 .../geometry/get_geometry_problems.py         |  1 -
 .../geometry/heilbronn_convex_region.py       | 16 +++++-----
 .../benchmarks/geometry/heilbronn_triangle.py | 14 +++++----
 .../benchmarks/geometry/kissing_number_11d.py | 23 ++++++++------
 .../geometry/min_max_distance_ratio.py        | 16 +++++-----
 iohblade/solution.py                          | 30 ++++++++++++-------
 run_benchmarks/erdos-min-overlap.py           |  7 +++--
 run_benchmarks/heilbronn_convex_region.py     |  8 ++---
 run_benchmarks/heilbronn_triangle.py          |  9 +++---
 run_benchmarks/kissing-11D.py                 |  8 ++---
 run_benchmarks/min_max_min_distance_ratio.py  |  8 ++---
 run_benchmarks/uncertainty_inequality.py      | 13 ++++----
 15 files changed, 116 insertions(+), 82 deletions(-)

diff --git a/iohblade/benchmarks/fourier/fourier_base.py b/iohblade/benchmarks/fourier/fourier_base.py
index 7487329..57de395 100644
--- a/iohblade/benchmarks/fourier/fourier_base.py
+++ b/iohblade/benchmarks/fourier/fourier_base.py
@@ -54,18 +54,23 @@ def make_task_prompt(self, formula: str) -> str:
 - Objective (minimize):
     - """
             + formula
-            + """
+            + f"""
 - Tip: enforce structure (e.g., small |c|, P(0)≈0) to aid root placement.
 K = {self.n_terms}."
 """
         )
 
     def make_example_prompt(self, class_name: str) -> str:
-        accept_best_configuration = ""
+        accept_best_configuration = """
+        def __init__(self, n_terms: int):
+            # Accepts number of terms K for the problem.
+
+
+        """
         if self.best_known_configuration is not None:
             accept_best_configuration = """
-    def __init__(self, best_known_configuration: list[float] | None):
-        # Accepts a best known configuration (if available) for the problem, as a initial configuration, which is then 
+    def __init__(self, n_terms: int, best_known_configuration: list[float] | None):
+        # Accepts a mumber of terms K and best known configuration (if available) for the problem, as a initial configuration, which is then 
         optimised for better results.
 """
         return f"""
@@ -75,7 +80,7 @@ class {class_name}:
     {accept_best_configuration}
     def __call__(self):
         # Return K={self.n_terms} coefficients for H_0, H_4, H_8, ...
-        return [0.33, -0.01, -9e-05][: {self.n_terms}]
+        return [...., 0.33, -0.01, -9e-05][: {self.n_terms}]
 ```
 """
 
diff --git a/iohblade/benchmarks/fourier/get_fourier_problem.py b/iohblade/benchmarks/fourier/get_fourier_problem.py
index dd126c6..455a491 100644
--- a/iohblade/benchmarks/fourier/get_fourier_problem.py
+++ b/iohblade/benchmarks/fourier/get_fourier_problem.py
@@ -19,6 +19,9 @@ def get_fourier_problems(use_best: bool) -> list[UncertaintyInequality]:
             array[0] = Fourier Uncertainty Inequality benchmark object.
 
     """
-    ue1 = UncertaintyInequality(best_solution=best_known_configuration)
+    if use_best:
+        ue1 = UncertaintyInequality(best_solution=best_known_configuration)
+    else:
+        ue1 = UncertaintyInequality()
 
     return [ue1]
diff --git a/iohblade/benchmarks/fourier/uncertainty_inequality.py b/iohblade/benchmarks/fourier/uncertainty_inequality.py
index 940b404..3e6b261 100644
--- a/iohblade/benchmarks/fourier/uncertainty_inequality.py
+++ b/iohblade/benchmarks/fourier/uncertainty_inequality.py
@@ -5,7 +5,7 @@
 
 from iohblade.problem import Problem
 from iohblade.solution import Solution
-from iohblade.misc.prepare_namespace import prepare_namespace, clean_local_namespace
+from iohblade.misc.prepare_namespace import prepare_namespace
 
 from iohblade.benchmarks.fourier.fourier_base import FourierBase
 
@@ -114,23 +114,25 @@ def _check_tail_nonnegative(self, hcoef: np.ndarray, r: float) -> None:
 
     def evaluate(self, solution: Solution, explogger=None):
         code = solution.code
-
+        name = solution.name
         # 1) execute candidate
         try:
-            safe_globals = prepare_namespace(code, self.dependencies)
             local_ns = {}
-            exec(code, safe_globals, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe_globals)
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-            try:
+            safe_globals = prepare_namespace(code, self.dependencies)
+
+            compiled_code = compile(code, filename=name, mode="exec")
+            exec(compiled_code, safe_globals, local_ns)
+            cls = local_ns[name]
+
+            if self.best_known_configuration is not None:
                 c = np.asanyarray(
-                    cls(self.best_known_configuration)(), dtype=np.float64
+                    cls(self.n_terms, self.best_known_configuration)(), dtype=np.float64
                 )
-            except:
+            else:
                 c = np.asarray(cls(self.n_terms)(), dtype=np.float64)
 
         except Exception as e:
-            solution.set_scores(float("inf"), f"exec-error {e}", "exec-failed")
+            solution.set_scores(float("inf"), f"exec-error {e}", e)
             return solution
 
         # 2) validate and score
@@ -167,8 +169,7 @@ def evaluate(self, solution: Solution, explogger=None):
                 f"Score = {score:.9g}; r_max={r:.6g}; best known score = {self.best_known}",
             )
         except Exception as e:
-            solution.set_scores(float("inf"), f"calc-error {e}", "calc-failed")
-
+            solution.set_scores(float("inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution: Solution) -> Solution:
diff --git a/iohblade/benchmarks/geometry/get_geometry_problems.py b/iohblade/benchmarks/geometry/get_geometry_problems.py
index b06b6ff..c45ba43 100644
--- a/iohblade/benchmarks/geometry/get_geometry_problems.py
+++ b/iohblade/benchmarks/geometry/get_geometry_problems.py
@@ -7874,7 +7874,6 @@ def get_kissing_number_11D_problems(use_best: bool) -> list[KissingNumber11D]:
     if use_best:
         kn = KissingNumber11D(best_solution=best_kissing_number_11D)
         return [kn]
-
     kn = KissingNumber11D()
     return [kn]
 
diff --git a/iohblade/benchmarks/geometry/heilbronn_convex_region.py b/iohblade/benchmarks/geometry/heilbronn_convex_region.py
index b01b952..750ea25 100644
--- a/iohblade/benchmarks/geometry/heilbronn_convex_region.py
+++ b/iohblade/benchmarks/geometry/heilbronn_convex_region.py
@@ -112,17 +112,19 @@ def evaluate(self, solution: Solution, explogger=None):
         try:
             safe = prepare_namespace(code, self.dependencies)
             local_ns = {}
-            exec(code, safe, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe)
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-            try:
+            
+            compiled_code = compile(code, solution.name, "exec")
+            exec(compiled_code, safe, local_ns)
+            cls = local_ns[solution.name]
+
+            if self.best_solution is not None:
                 result = cls(self.n_points, self.best_solution)()
-            except:
+            else:
                 result = cls(self.n_points)()
             P = self.to_np_points(result)
         except Exception as e:
             # tb = e.__traceback__
-            solution.set_scores(float("-inf"), f"exec-error \n{e}", "exec-failed")
+            solution.set_scores(float("-inf"), f"exec-error \n{e}", e)
             return solution
 
         try:
@@ -144,7 +146,7 @@ def evaluate(self, solution: Solution, explogger=None):
                 f"min_triangle_area={min_area:.6g}, {'best known = ' + str(self.best_known) if self.best_known is not None else ''}.",
             )
         except Exception as e:
-            solution.set_scores(float("-inf"), f"calc-error {e}", "calc-failed")
+            solution.set_scores(float("-inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution):
diff --git a/iohblade/benchmarks/geometry/heilbronn_triangle.py b/iohblade/benchmarks/geometry/heilbronn_triangle.py
index 45f2bca..320937d 100644
--- a/iohblade/benchmarks/geometry/heilbronn_triangle.py
+++ b/iohblade/benchmarks/geometry/heilbronn_triangle.py
@@ -114,10 +114,13 @@ class HeilbronnTriangle-n{self.n_points}:
 
     def evaluate(self, solution, explogger=None):
         code = solution.code
+        name = solution.name
         try:
             safe = prepare_namespace(code, self.dependencies)
             local_ns = {}
-            exec(code, safe, local_ns)
+            compiled_code = compile(code, filename=name, mode="exec")
+            
+            exec(compiled_code, safe, local_ns)
             cls = local_ns[solution.name]
             if self.best_solution is None:
                 triangle, points = cls(self.n_points)()
@@ -132,12 +135,12 @@ def evaluate(self, solution, explogger=None):
             solution.set_scores(
                 float("-inf"),
                 f"exec-error {e}",
-                "exec-failed",
+                e,
             )
             return solution
 
         try:
-            if triangle:
+            if triangle is not None:
                 T, P = self._parse_candidate((triangle, points))
             else:
                 T, P = self._parse_candidate(points)
@@ -158,8 +161,8 @@ def evaluate(self, solution, explogger=None):
         except Exception as e:
             solution.set_scores(
                 float("-inf"),
-                f"calc-error {e}.",
-                f"Values Returned: Triangle {triangle}, points: {points}",
+                f"calc-error, for values returned by candidate: Triangle {triangle}, points: {points}",
+                e,
             )
         return solution
 
@@ -173,3 +176,4 @@ def to_dict(self):
 if __name__ == "__main__":
     hbt = HeilbronnTriangle(n_points=10, best_known=1.11)
     print(hbt.get_prompt())
+    print('------------------------------------------------------------------------------------------------')
diff --git a/iohblade/benchmarks/geometry/kissing_number_11d.py b/iohblade/benchmarks/geometry/kissing_number_11d.py
index 6a01b3c..fe1eef0 100644
--- a/iohblade/benchmarks/geometry/kissing_number_11d.py
+++ b/iohblade/benchmarks/geometry/kissing_number_11d.py
@@ -109,24 +109,29 @@ def _pairwise_d2(P: np.ndarray) -> np.ndarray:
 
     def evaluate(self, solution: Solution, explogger=None) -> Solution:
         code = solution.code
-        safe_globals = prepare_namespace(code, allowed=self.dependencies)
+        name = solution.name if solution.name else "KissingNumber11D"
+
         try:
             local_ns = {}
-            exec(code, safe_globals, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe_globals)
+            safe_globals = prepare_namespace(code, allowed=self.dependencies)
+            compiled_code = compile(code, name, "exec")
 
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-            try:
+            exec(compiled_code, safe_globals, local_ns)
+
+            cls = local_ns[name]
+            if self.best_solution is not None:
                 C = np.array(cls(self.best_solution)(), dtype=float)
-            except:
+            else:
                 C = np.array(cls()(), dtype=float)
 
             if C.ndim != 2 or C.shape[1] != self.dim:
                 raise ValueError(f"expected shape (m, {self.dim})")
             if not np.isfinite(C).all():
                 raise ValueError("non-finite coordinates")
-
-            norms2 = np.sum(C * C, axis=1)
+            try:
+                norms2 = np.sum(C * C, axis=1)
+            except Exception as e:
+                raise ValueError(f"Possibly jagged series: {c}. Got error {e}")
             if np.any(norms2 <= self.tolerance):
                 raise ValueError("zero vector present")
 
@@ -139,7 +144,7 @@ def evaluate(self, solution: Solution, explogger=None) -> Solution:
             m = int(C.shape[0])
             solution.set_scores(float(m), f"|C|={m}")
         except Exception as e:
-            solution.set_scores(float("-inf"), f"calc-error {e}", "calc-failed")
+            solution.set_scores(float("-inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution: Solution) -> Solution:
diff --git a/iohblade/benchmarks/geometry/min_max_distance_ratio.py b/iohblade/benchmarks/geometry/min_max_distance_ratio.py
index a86bc49..ba11f82 100644
--- a/iohblade/benchmarks/geometry/min_max_distance_ratio.py
+++ b/iohblade/benchmarks/geometry/min_max_distance_ratio.py
@@ -109,18 +109,20 @@ def _pairwise_d2(P: np.ndarray) -> np.ndarray:
 
     def evaluate(self, solution, explogger=None):
         code = solution.code
+        name = solution.name if solution.name else "MinMaxDistanceSolver"
         try:
             local_ns = {}
             safe = prepare_namespace(code, self.dependencies)
-            exec(code, safe, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe)
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-            try:
+            compiled_code = compile(code, name, "exec")
+            exec(compiled_code, safe, local_ns)
+
+            cls = local_ns[name]
+            if self.best_solution is not None:
                 P = cls(self.n_points, self.dim, self.best_solution)()
-            except:
+            else:
                 P = cls(self.n_points, self.dim)()
         except Exception as e:
-            solution.set_scores(float("inf"), f"exec-error {e}", "exec-failed")
+            solution.set_scores(float("inf"), f"exec-error {e}", e)
             return solution
 
         try:
@@ -142,7 +144,7 @@ def evaluate(self, solution, explogger=None):
                 msg += f" Best known score is {self.best_known}."
             solution.set_scores(score, msg)
         except Exception as e:
-            solution.set_scores(float("inf"), f"calc-error {e}", "calc-failed")
+            solution.set_scores(float("inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution):
diff --git a/iohblade/solution.py b/iohblade/solution.py
index c838ff1..ed5bc83 100644
--- a/iohblade/solution.py
+++ b/iohblade/solution.py
@@ -104,18 +104,28 @@ def set_scores(
         self.fitness = fitness
         self.feedback = feedback
 
-        if error:
-            tb = traceback.extract_tb(error.__traceback__)[-1]
-            line_no = tb.lineno
-            code_line = ""
-            code_lines = self.code.split("\n")
-            if line_no and len(code_lines) >= line_no:
-                code_line = code_lines[line_no - 1]
+        if error is not None:
+            if not isinstance(error, Exception):
+                self.error = str(error)
+                return self
+
             error_type = type(error).__name__
             error_msg = str(error)
-            self.error = f"{error_type}: {error_msg}.\n"
-            if tb.filename != "<string>" or tb.filename != self.name:
-                self.error += f"On line {line_no}: {code_line}.\n"
+            self.error = repr(error)
+
+            tb = traceback.extract_tb(error.__traceback__)[-1]
+
+            if tb.filename in ("<string>", self.name):
+                code_lines = self.code.splitlines()
+                line_no = tb.lineno
+
+                if 1 <= line_no <= len(code_lines):
+                    code_line = code_lines[line_no - 1]
+                    self.error = (
+                        f"{error_type}: {error_msg}.\n"
+                        f"On line {line_no}: {code_line}.\n"
+                    )
+
         return self
 
     def get_summary(self):
diff --git a/run_benchmarks/erdos-min-overlap.py b/run_benchmarks/erdos-min-overlap.py
index 876327b..943d829 100644
--- a/run_benchmarks/erdos-min-overlap.py
+++ b/run_benchmarks/erdos-min-overlap.py
@@ -11,12 +11,13 @@
 if __name__ == "__main__":
     budget = 10
 
-    api_key = environ.get("GOOGLE_API_KEY")
+    # api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM('gemma3:12b')
+
+    ollama_llm = Ollama_LLM('qwen2.5-coder:14b')
     # gemini_llm = Gemini_LLM(api_key=api_key)
 
-    erdos_min_overlap = get_combinatorics_problems(True)[0]
+    erdos_min_overlap = get_combinatorics_problems(False)[0]
 
     methods = []
     for llm in [ollama_llm]:
diff --git a/run_benchmarks/heilbronn_convex_region.py b/run_benchmarks/heilbronn_convex_region.py
index 64672b3..1f591cf 100644
--- a/run_benchmarks/heilbronn_convex_region.py
+++ b/run_benchmarks/heilbronn_convex_region.py
@@ -13,16 +13,16 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
-    heilbronn_convex_region = get_heilbronn_convex_region_problems(True)
+    heilbronn_convex_region = get_heilbronn_convex_region_problems(False)
     #Pick a Heilbronn problem, with known best solution.
     # heilbronn_convex_region[0] is 13 points problem and a[1] 14.
     heilbronn_convex_region = heilbronn_convex_region[1]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
diff --git a/run_benchmarks/heilbronn_triangle.py b/run_benchmarks/heilbronn_triangle.py
index ab0508f..cd1491c 100644
--- a/run_benchmarks/heilbronn_triangle.py
+++ b/run_benchmarks/heilbronn_triangle.py
@@ -1,7 +1,7 @@
 from os import environ
 
 from iohblade.experiment import Experiment
-from iohblade.llm import Gemini_LLM
+from iohblade.llm import Gemini_LLM, Ollama_LLM
 from iohblade.methods import LLaMEA
 from iohblade.loggers import ExperimentLogger
 
@@ -13,13 +13,14 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    # gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
 
     # Helibronn n11 benchmark.
     heilbronn_triangle = get_heilbronn_triangle_problems(False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
@@ -28,7 +29,7 @@
             minimization=heilbronn_triangle.minimisation,
         )
         methods.append(method)
-    logger = ExperimentLogger(f"results/{heilbronn_triangle.task_name}")
+    logger = ExperimentLogger(f"results/Helibronn_Triangle")
     experiment = Experiment(
         methods,
         [heilbronn_triangle],
diff --git a/run_benchmarks/kissing-11D.py b/run_benchmarks/kissing-11D.py
index 7281b74..3541b79 100644
--- a/run_benchmarks/kissing-11D.py
+++ b/run_benchmarks/kissing-11D.py
@@ -13,14 +13,14 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     # Kissing 11d
-    kissing_11d = get_kissing_number_11D_problems(True)[0]
+    kissing_11d = get_kissing_number_11D_problems(False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
diff --git a/run_benchmarks/min_max_min_distance_ratio.py b/run_benchmarks/min_max_min_distance_ratio.py
index a5a45c1..9b40147 100644
--- a/run_benchmarks/min_max_min_distance_ratio.py
+++ b/run_benchmarks/min_max_min_distance_ratio.py
@@ -12,17 +12,17 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     ## Min max Distance ratio problem;
     # a[0] = 2-D min max distance ration problem.
     # a[1] = 3-D min max distance ration problem.
 
-    min_max_min_distance = get_min_max_dist_ratio_problem(True)[0]
+    min_max_min_distance = get_min_max_dist_ratio_problem(False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
diff --git a/run_benchmarks/uncertainty_inequality.py b/run_benchmarks/uncertainty_inequality.py
index 3f575dd..f36d5cb 100644
--- a/run_benchmarks/uncertainty_inequality.py
+++ b/run_benchmarks/uncertainty_inequality.py
@@ -11,25 +11,26 @@
 if __name__ == "__main__":
     budget = 10
 
-    api_key = environ.get("GOOGLE_API_KEY")
+    # api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     # Helibronn n11 benchmark.
-    uncertain_ineq = get_fourier_problems(use_best=True)[0]
+    uncertain_ineq = get_fourier_problems(use_best=False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
             n_offspring=1,
             budget=budget,
             minimization=uncertain_ineq.minimisation,
+            elitism=True,
         )
         methods.append(method)
-    logger = ExperimentLogger(f"results/{uncertain_ineq.task_name}")
+    logger = ExperimentLogger(f"results/Fourier_Unequality")
     experiment = Experiment(
         methods,
         [uncertain_ineq],

From cbfef65ab59e6b084f231e05e5d88bcf4ace3062 Mon Sep 17 00:00:00 2001
From: "A.A. Shahane" <shahanea@neocortex.liacs.nl>
Date: Mon, 2 Feb 2026 16:52:09 +0000
Subject: [PATCH 4/4] All benchmarks fixed with compile isolation

---
 .../analysis/auto_correlation_base_spec.py    |  6 +-
 .../analysis/auto_correlation_ineq1.py        |  3 +-
 .../combinatorics/erdos_min_overlap.py        |  2 +-
 .../geometry/heilbronn_convex_region.py       |  2 +-
 .../benchmarks/geometry/heilbronn_triangle.py |  6 +-
 .../number_theory/sums_vs_differences.py      | 31 ++++++----
 .../benchmarks/packing/hexagon_packing.py     | 21 ++++---
 .../benchmarks/packing/rectangle_packing.py   | 56 ++++++-------------
 .../benchmarks/packing/unit_square_packing.py | 48 +++++++---------
 iohblade/solution.py                          |  2 +-
 run_benchmarks/hexagon_packing.py             |  8 +--
 run_benchmarks/rectangle_packing.py           |  8 +--
 run_benchmarks/sum_vs_differences.py          |  8 +--
 run_benchmarks/unit_square_packing.py         |  8 +--
 14 files changed, 100 insertions(+), 109 deletions(-)

diff --git a/iohblade/benchmarks/analysis/auto_correlation_base_spec.py b/iohblade/benchmarks/analysis/auto_correlation_base_spec.py
index 8b15a10..12b3fa9 100644
--- a/iohblade/benchmarks/analysis/auto_correlation_base_spec.py
+++ b/iohblade/benchmarks/analysis/auto_correlation_base_spec.py
@@ -138,14 +138,16 @@ def make_format_prompt(self):
 
 """
 
-    def _get_time_series(self, code, name) -> tuple[NDArray[np.float64], Optional[Exception]]:
+    def _get_time_series(
+        self, code, name
+    ) -> tuple[NDArray[np.float64], Optional[Exception]]:
         local_parameters = {}
 
         allowed = ["numpy", "scipy"]
 
         try:
             global_parameters = prepare_namespace(code, allowed)
-            compiled_code = compile(code, name, 'exec')
+            compiled_code = compile(code, name, "exec")
             exec(compiled_code, global_parameters, local_parameters)
             cls = local_parameters[name]
             if self.best_solution:
diff --git a/iohblade/benchmarks/analysis/auto_correlation_ineq1.py b/iohblade/benchmarks/analysis/auto_correlation_ineq1.py
index 200ecc4..c060a66 100644
--- a/iohblade/benchmarks/analysis/auto_correlation_ineq1.py
+++ b/iohblade/benchmarks/analysis/auto_correlation_ineq1.py
@@ -70,7 +70,8 @@ def evaluate(self, solution: Solution) -> Solution:
 
             score = float(np.max(g) / (I * I))  # minimize
             solution = solution.set_scores(
-                score, f"C1 ratio = {score:.6g}, best known = {self.best_known:.6g}; soln={f}"
+                score,
+                f"C1 ratio = {score:.6g}, best known = {self.best_known:.6g}; soln={f}",
             )
         except Exception as e:
             solution = solution.set_scores(float("inf"), f"calc-error {e}", e)
diff --git a/iohblade/benchmarks/combinatorics/erdos_min_overlap.py b/iohblade/benchmarks/combinatorics/erdos_min_overlap.py
index aaa6146..0a0dc60 100644
--- a/iohblade/benchmarks/combinatorics/erdos_min_overlap.py
+++ b/iohblade/benchmarks/combinatorics/erdos_min_overlap.py
@@ -126,7 +126,7 @@ def evaluate(self, solution: Solution, explogger=None):
             safe_globals = prepare_namespace(code, self.dependencies)
             compiled = compile(code, filename=name, mode="exec")
             exec(compiled, safe_globals, local_ns)
-            
+
             cls = local_ns[name]
 
             if self.best_solution is not None:
diff --git a/iohblade/benchmarks/geometry/heilbronn_convex_region.py b/iohblade/benchmarks/geometry/heilbronn_convex_region.py
index 750ea25..ba77ebb 100644
--- a/iohblade/benchmarks/geometry/heilbronn_convex_region.py
+++ b/iohblade/benchmarks/geometry/heilbronn_convex_region.py
@@ -112,7 +112,7 @@ def evaluate(self, solution: Solution, explogger=None):
         try:
             safe = prepare_namespace(code, self.dependencies)
             local_ns = {}
-            
+
             compiled_code = compile(code, solution.name, "exec")
             exec(compiled_code, safe, local_ns)
             cls = local_ns[solution.name]
diff --git a/iohblade/benchmarks/geometry/heilbronn_triangle.py b/iohblade/benchmarks/geometry/heilbronn_triangle.py
index 320937d..227f98e 100644
--- a/iohblade/benchmarks/geometry/heilbronn_triangle.py
+++ b/iohblade/benchmarks/geometry/heilbronn_triangle.py
@@ -119,7 +119,7 @@ def evaluate(self, solution, explogger=None):
             safe = prepare_namespace(code, self.dependencies)
             local_ns = {}
             compiled_code = compile(code, filename=name, mode="exec")
-            
+
             exec(compiled_code, safe, local_ns)
             cls = local_ns[solution.name]
             if self.best_solution is None:
@@ -176,4 +176,6 @@ def to_dict(self):
 if __name__ == "__main__":
     hbt = HeilbronnTriangle(n_points=10, best_known=1.11)
     print(hbt.get_prompt())
-    print('------------------------------------------------------------------------------------------------')
+    print(
+        "------------------------------------------------------------------------------------------------"
+    )
diff --git a/iohblade/benchmarks/number_theory/sums_vs_differences.py b/iohblade/benchmarks/number_theory/sums_vs_differences.py
index 367ab49..3cb3ba5 100644
--- a/iohblade/benchmarks/number_theory/sums_vs_differences.py
+++ b/iohblade/benchmarks/number_theory/sums_vs_differences.py
@@ -56,23 +56,26 @@ def evaluate(self, solution: Solution, explogger=None) -> Solution:
         """
 
         code = solution.code
+        name = solution.name if solution.name else "SumDiffCandidate"
 
         try:
             safe_globals = prepare_namespace(code, self.dependencies)
             local_ns = {}
-            exec(code, safe_globals, local_ns)
+
+            compiled_code = compile(code, name, "exec")
+            exec(compiled_code, safe_globals, local_ns)
             local_ns = clean_local_namespace(local_ns, safe_globals)
 
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
+            cls = local_ns[name]
             U = []
-            try:
+            if self.best_solution is not None:
                 U = cls(
                     self.max_set_size, best_known_configuration=self.best_solution
                 )()
-            except:
+            else:
                 U = cls(self.max_set_size)()
         except Exception as e:
-            solution.set_scores(-float("inf"), f"exec-error {e}", "exec-failed")
+            solution.set_scores(-float("inf"), f"exec-error {e}", e)
             return solution
 
         try:
@@ -80,13 +83,13 @@ def evaluate(self, solution: Solution, explogger=None) -> Solution:
             U = sorted({int(x) for x in U})
             ok, msg = self._validate_U(U)
             if not ok:
-                solution.set_scores(-float("inf"), msg, "invalid-U")
+                solution.set_scores(-float("inf"), f"invalid-U: {msg}", e)
                 return solution
 
             M = U[-1]
             if M > 1_000_000:
                 solution.set_scores(
-                    -float("inf"), f"max(U) too large: {M}", "range-exceeded"
+                    -float("inf"), f"Range exceeded: max(U) too large: {M}", e
                 )
                 return solution
 
@@ -112,12 +115,20 @@ def evaluate(self, solution: Solution, explogger=None) -> Solution:
             diff_sz = int((np.round(conv_diff) > 0).sum())
 
             if sum_sz <= 0 or diff_sz <= 0:
-                solution.set_scores(-float("inf"), "degenerate U", "degenerate")
+                solution.set_scores(
+                    -float("inf"),
+                    "degenerate U",
+                    ValueError(f"U degenerated: {sum_sz}, {diff_sz}"),
+                )
                 return solution
 
             denom = math.log(2 * M + 1)
             if denom <= 0:
-                solution.set_scores(-float("inf"), "log(2*max(U)+1) <= 0", "degenerate")
+                solution.set_scores(
+                    -float("inf"),
+                    "log(2*max(U)+1) <= 0",
+                    ValueError(f"Got log(2*max(U) + 1)={denom}"),
+                )
                 return solution
 
             c = 1.0 + math.log(diff_sz / sum_sz) / denom
@@ -127,7 +138,7 @@ def evaluate(self, solution: Solution, explogger=None) -> Solution:
             )
 
         except Exception as e:
-            solution.set_scores(-float("inf"), f"calc-error {e}", "calc-failed")
+            solution.set_scores(-float("inf"), f"calc-error {e}", e)
         return solution
 
     # --- helpers aligned to single-set U ---
diff --git a/iohblade/benchmarks/packing/hexagon_packing.py b/iohblade/benchmarks/packing/hexagon_packing.py
index 2b1b37e..6429f66 100644
--- a/iohblade/benchmarks/packing/hexagon_packing.py
+++ b/iohblade/benchmarks/packing/hexagon_packing.py
@@ -109,20 +109,21 @@ def axes(poly):
     # ---------- evaluation ----------
     def evaluate(self, solution, explogger=None):
         code = solution.code
+        name = solution.name if solution.name else "HexagonPackingSolver"
 
         try:
             safe = prepare_namespace(code, self.dependencies)
             local_ns = {}
-            exec(code, safe, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe)
+            compiled_code = compile(code, name, "exec")
+            exec(compiled_code, safe, local_ns)
 
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-            try:
-                arr = cls(self.n_hex, best_known_configuration=self.best_known)()
-            except:
+            cls = local_ns[name]
+            if self.best_solution is not None:
+                arr = cls(self.n_hex, best_known_configuration=self.best_solution)()
+            else:
                 arr = cls(self.n_hex)()
         except Exception as e:
-            solution.set_scores(float("inf"), f"exec-error {e}", "exec-failed")
+            solution.set_scores(float("inf"), f"exec-error {e}", e)
             return solution
 
         try:
@@ -141,7 +142,9 @@ def evaluate(self, solution, explogger=None):
             for i in range(self.n_hex):
                 for j in range(i + 1, self.n_hex):
                     if self._overlap_strict(polys[i], polys[j], self.tolerance):
-                        raise ValueError(f"hexagons {i} and {j} overlap")
+                        raise ValueError(
+                            f"hexagons {i} @ {polys[i]} and {j} @ {polys[j]} overlap"
+                        )
 
             V = np.vstack(polys)
             side = self._outer_side_from_vertices(V)
@@ -151,7 +154,7 @@ def evaluate(self, solution, explogger=None):
                 f"outer_side_length={side:.6g}, best known side length={self.best_known}",
             )
         except Exception as e:
-            solution.set_scores(float("inf"), f"calc-error {e}", "calc-failed")
+            solution.set_scores(float("inf"), f"calc-error {e}", e)
         return solution
 
     def test(self, solution: Solution):
diff --git a/iohblade/benchmarks/packing/rectangle_packing.py b/iohblade/benchmarks/packing/rectangle_packing.py
index 623b629..38ded39 100644
--- a/iohblade/benchmarks/packing/rectangle_packing.py
+++ b/iohblade/benchmarks/packing/rectangle_packing.py
@@ -32,7 +32,7 @@ def __init__(
         self.best_solution = best_solution
 
         task_name = f"rectangle_packing_n{self.n_circles}_perim{self.perimeter:g}"
-        PackingBase.__init__(self, task_name)
+        PackingBase.__init__(self, task_name, best_solution=best_solution)
         Problem.__init__(self, name=task_name)
 
         print(
@@ -76,16 +76,17 @@ def __init__(
 
     def evaluate(self, solution: Solution, explogger=None):
         code = solution.code
+        name = solution.name if solution.name else "RectanglePackingSolver"
         try:
             safe = prepare_namespace(code, self.dependencies)
             local_ns = {}
-            exec(code, safe, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe)
+            compiled_code = compile(code, name, "exec")
+            exec(compiled_code, safe, local_ns)
 
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-            try:
+            cls = local_ns[name]
+            if self.best_solution is not None:
                 result = cls(self.n_circles, self.best_solution)()
-            except:
+            else:
                 result = cls(self.n_circles)()
 
             if isinstance(result, tuple) and len(result) == 3:
@@ -96,44 +97,29 @@ def evaluate(self, solution: Solution, explogger=None):
                 U = result
                 width = height = self.perimeter / 4.0
         except Exception as e:
-            solution.set_scores(float("-inf"), f"exec-error {e}", "exec-failed")
+            solution.set_scores(float("-inf"), f"exec-error {e}", e)
             return solution
 
         try:
             # perimeter equality
             if width <= 0 or height <= 0:
-                solution.set_scores(
-                    float("-inf"),
-                    f"non-positive rectangle dimensions {width}×{height}",
-                    "invalid-dimensions",
+                raise ValueError(
+                    f"non-positive rectangle dimensions {width} x {height}"
                 )
-                return solution
             if abs(2 * (width + height) - self.perimeter) > self.tolerance:
-                solution.set_scores(
-                    float("-inf"),
-                    f"perimeter mismatch: 2*(w+h)={2*(width+height):.12f}",
-                    "perimeter-mismatch",
+                raise ValueError(
+                    f"perimeter mismatch {2*(width+height):.12f}, expected {self.perimeter:.12f}"
                 )
-                return solution
 
             U = np.asarray(U, dtype=float)
             if U.shape != (self.n_circles, 3):
-                solution.set_scores(
-                    float("-inf"),
-                    f"expected ({self.n_circles},3), got {U.shape}",
-                    "format-error",
-                )
-                return solution
+                raise ValueError(f"expected ({self.n_circles},3), got {U.shape}")
 
             if np.any(U[:, 2] <= 0):
                 idx = int(np.where(U[:, 2] <= 0)[0][0])
-                solution.set_scores(
-                    float("-inf"),
-                    f"non-positive radius at index {idx}",
-                    "invalid-radius",
+                raise ValueError(
+                    f"non-positive radius for circle {idx}: r = {U[idx, 2]}"
                 )
-                return solution
-
             # containment
             x, y, r = U[:, 0], U[:, 1], U[:, 2]
             if (
@@ -142,10 +128,7 @@ def evaluate(self, solution: Solution, explogger=None):
                 or np.any(y - r < -self.tolerance)
                 or np.any(y + r > height + self.tolerance)
             ):
-                solution.set_scores(
-                    float("-inf"), "circle outside rectangle", "out-of-bounds"
-                )
-                return solution
+                raise ValueError("circle(s) exceed rectangle boundary.")
 
             # disjointness
             for i in range(self.n_circles):
@@ -153,10 +136,7 @@ def evaluate(self, solution: Solution, explogger=None):
                     dx = U[i, 0] - U[j, 0]
                     dy = U[i, 1] - U[j, 1]
                     if dx * dx + dy * dy < (U[i, 2] + U[j, 2] - self.tolerance) ** 2:
-                        solution.set_scores(
-                            float("-inf"), f"overlap between {i} and {j}", "overlap"
-                        )
-                        return solution
+                        raise ValueError(f"circles {i} and {j} overlap.")
 
             score = float(np.sum(U[:, 2]))
             solution.set_scores(
@@ -165,7 +145,7 @@ def evaluate(self, solution: Solution, explogger=None):
             )
             return solution
         except Exception as e:
-            solution.set_scores(float("-inf"), f"calc-error {e}", "calc-failed")
+            solution.set_scores(float("-inf"), f"calc-error {e}", e)
             return solution
 
     def test(self, solution: Solution):
diff --git a/iohblade/benchmarks/packing/unit_square_packing.py b/iohblade/benchmarks/packing/unit_square_packing.py
index 9763c91..f3f26c5 100644
--- a/iohblade/benchmarks/packing/unit_square_packing.py
+++ b/iohblade/benchmarks/packing/unit_square_packing.py
@@ -4,7 +4,7 @@
 from iohblade.problem import Problem
 
 from .packing_base import PackingBase
-from iohblade.misc.prepare_namespace import prepare_namespace, clean_local_namespace
+from iohblade.misc.prepare_namespace import prepare_namespace
 
 
 class UnitSquarePacking(PackingBase, Problem):
@@ -23,7 +23,7 @@ def __init__(
         self.best_solution = best_solution
 
         task_name = f"unit_square_packing_n{self.n_circles}"
-        PackingBase.__init__(self, task_name)
+        PackingBase.__init__(self, task_name, best_solution=best_solution)
         Problem.__init__(self, name=task_name)
 
         print(
@@ -48,37 +48,35 @@ def __init__(
     # ---------- evaluation ----------
     def evaluate(self, solution: Solution, explogger=None):
         code = solution.code
-        safe = prepare_namespace(code, self.dependencies)
+        name = solution.name if solution.name else "UnitSquarePackingSolver"
+
         try:
             local_ns = {}
+            safe = prepare_namespace(code, self.dependencies)
+
+            compiled_code = compile(code, name, "exec")
 
-            exec(code, safe, local_ns)
-            local_ns = clean_local_namespace(local_ns, safe)
-            cls = next(v for v in local_ns.values() if isinstance(v, type))
-            circles = cls(self.n_circles)()
+            exec(compiled_code, safe, local_ns)
+            cls = local_ns[name]
+            if self.best_solution is not None:
+                circles = cls(self.n_circles, self.best_solution)()
+            else:
+                circles = cls(self.n_circles)()
         except Exception as e:
-            solution.set_scores(float("-inf"), f"exec-error {e}", "exec-failed")
+            solution.set_scores(float("-inf"), f"exec-error {e}", e)
             return solution
 
         try:
             U = np.asarray(circles, dtype=float)
             if U.shape != (self.n_circles, 3):
-                solution.set_scores(
-                    float("-inf"),
-                    f"expected ({self.n_circles},3), got {U.shape}",
-                    "format-error",
+                raise ValueError(
+                    f"Format error: expected ({self.n_circles},3), got {U.shape}"
                 )
-                return solution
 
             # radii positive
             if np.any(U[:, 2] <= 0):
                 idx = int(np.where(U[:, 2] <= 0)[0][0])
-                solution.set_scores(
-                    float("-inf"),
-                    f"non-positive radius at index {idx}",
-                    "invalid-radius",
-                )
-                return solution
+                raise ValueError(f"Negative radius at index {idx}.")
 
             # containment in unit square
             x, y, r = U[:, 0], U[:, 1], U[:, 2]
@@ -88,10 +86,7 @@ def evaluate(self, solution: Solution, explogger=None):
                 or np.any(y - r < -self.tolerance)
                 or np.any(y + r > 1 + self.tolerance)
             ):
-                solution.set_scores(
-                    float("-inf"), "circle outside the unit square", "out-of-bounds"
-                )
-                return solution
+                raise ValueError("Circle outside the unit square.")
 
             # pairwise disjoint
             for i in range(self.n_circles):
@@ -99,10 +94,7 @@ def evaluate(self, solution: Solution, explogger=None):
                     dx = U[i, 0] - U[j, 0]
                     dy = U[i, 1] - U[j, 1]
                     if dx * dx + dy * dy < (U[i, 2] + U[j, 2] - self.tolerance) ** 2:
-                        solution.set_scores(
-                            float("-inf"), f"overlap between {i} and {j}", "overlap"
-                        )
-                        return solution
+                        raise ValueError(f"circles {i} and {j} overlap.")
 
             score = float(np.sum(U[:, 2]))
             solution.set_scores(
@@ -111,7 +103,7 @@ def evaluate(self, solution: Solution, explogger=None):
             )
             return solution
         except Exception as e:
-            solution.set_scores(float("-inf"), f"calc-error {e}", "calc-failed")
+            solution.set_scores(float("-inf"), f"calc-error {e}", e)
             return solution
 
     def test(self, solution: Solution):
diff --git a/iohblade/solution.py b/iohblade/solution.py
index ed5bc83..355acc1 100644
--- a/iohblade/solution.py
+++ b/iohblade/solution.py
@@ -90,7 +90,7 @@ def set_scores(self, fitness, feedback="", error=""):
         self.feedback = feedback
         self.error = error
         return self
-    
+
     def set_scores(
         self, fitness: float, feedback="", error: Optional[Exception] = None
     ):
diff --git a/run_benchmarks/hexagon_packing.py b/run_benchmarks/hexagon_packing.py
index 09a9133..9a13ca4 100644
--- a/run_benchmarks/hexagon_packing.py
+++ b/run_benchmarks/hexagon_packing.py
@@ -13,18 +13,18 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     #----------------------------------------------
     # Helibronn packing problem.
     # * a[0] = n11 problem.
     # * a[1] = n12 problem.
     #----------------------------------------------
-    hexagon_packing = get_hexagon_packing_problems(True)[0]
+    hexagon_packing = get_hexagon_packing_problems(False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
diff --git a/run_benchmarks/rectangle_packing.py b/run_benchmarks/rectangle_packing.py
index 830010c..f7885b4 100644
--- a/run_benchmarks/rectangle_packing.py
+++ b/run_benchmarks/rectangle_packing.py
@@ -13,14 +13,14 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     # RectangleProblem(perimeter=4, circles=21)
-    rectangle_packing = get_rectangle_packing_problems(True)[0]
+    rectangle_packing = get_rectangle_packing_problems(False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
diff --git a/run_benchmarks/sum_vs_differences.py b/run_benchmarks/sum_vs_differences.py
index a62beda..4bb3044 100644
--- a/run_benchmarks/sum_vs_differences.py
+++ b/run_benchmarks/sum_vs_differences.py
@@ -13,14 +13,14 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     # Get sums vs differences benchmark, this one only has one instance.
-    sum_vs_difference = get_sum_vs_difference_problem(True)[0]
+    sum_vs_difference = get_sum_vs_difference_problem(False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,
diff --git a/run_benchmarks/unit_square_packing.py b/run_benchmarks/unit_square_packing.py
index 606b4fe..4381d3b 100644
--- a/run_benchmarks/unit_square_packing.py
+++ b/run_benchmarks/unit_square_packing.py
@@ -13,18 +13,18 @@
 
     api_key = environ.get("GOOGLE_API_KEY")
 
-    ollama_llm = Ollama_LLM()
-    gemini_llm = Gemini_LLM(api_key=api_key)
+    ollama_llm = Ollama_LLM('gemma3:12b')
+    # gemini_llm = Gemini_LLM(api_key=api_key)
 
     #----------------------------------------------------------------------------------
     # Gets benchmarks for unit square packing problems:
     #   arr[0]:  Unit Square Packing benchmark for Unit Square, packing 26 circles.
     #   arr[1] = Unit Square Packing benchmark for Unit Square, packing 32 circles.
     #----------------------------------------------------------------------------------
-    unit_square_packing = get_square_packing_problems(True)[0]
+    unit_square_packing = get_square_packing_problems(False)[0]
 
     methods = []
-    for llm in [gemini_llm]:
+    for llm in [ollama_llm]:
         method = LLaMEA(
             llm,
             n_parents=1,