From dc215e55962af03663920b0992530cf351eb1619 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sun, 8 Feb 2026 19:11:20 +0100
Subject: [PATCH 01/20] add original RL-DAS agent as a baseline

---
 .python-version                               |   1 +
 .../agents/RLDAS_agent.py                     | 405 ++++++++++++++++++
 .../agents/agent_state.py                     | 119 ++++-
 dynamicalgorithmselection/agents/ppo_utils.py |  48 +++
 dynamicalgorithmselection/experiments/core.py |   4 +-
 .../experiments/experiment.py                 |   2 +-
 .../experiments/neuroevolution.py             |   5 +-
 dynamicalgorithmselection/main.py             |   4 +-
 dynamicalgorithmselection/optimizers/DE/DE.py |  60 +++
 .../optimizers/DE/JDE21.py                    | 139 ++++++
 .../optimizers/DE/MADDE.py                    | 243 +++++++++++
 .../optimizers/DE/NL_SHADE_RSP.py             | 180 ++++++++
 .../optimizers/DE/__init__.py                 |   0
 .../optimizers/ES/CMAES.py                    |  13 -
 14 files changed, 1200 insertions(+), 23 deletions(-)
 create mode 100644 .python-version
 create mode 100644 dynamicalgorithmselection/agents/RLDAS_agent.py
 create mode 100644 dynamicalgorithmselection/optimizers/DE/DE.py
 create mode 100644 dynamicalgorithmselection/optimizers/DE/JDE21.py
 create mode 100644 dynamicalgorithmselection/optimizers/DE/MADDE.py
 create mode 100644 dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
 create mode 100644 dynamicalgorithmselection/optimizers/DE/__init__.py

diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..2c07333
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.11
diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
new file mode 100644
index 0000000..0664303
--- /dev/null
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -0,0 +1,405 @@
+import numpy as np
+import torch
+import copy
+import os
+
+from dynamicalgorithmselection.agents.agent import Agent
+from dynamicalgorithmselection.agents.agent_state import get_la_features
+from dynamicalgorithmselection.agents.ppo_utils import (
+    DEVICE,
+    RolloutBuffer,
+    RLDASNetwork,
+)
+from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
+
+INITIAL_POPSIZE = 170
+
+
+class RLDASAgent(Agent):
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+
+        self.alg_names = [alg.__name__ for alg in self.actions]
+        self.n_algorithms = len(self.actions)
+        self.dim = self.ndim_problem
+
+        self.network = RLDASNetwork(
+            d_dim=self.dim, num_algorithms=self.n_algorithms
+        ).to(DEVICE)
+
+        self.optimizer = torch.optim.Adam(self.network.parameters(), lr=3e-5)
+        self.ah_vectors = np.zeros((self.n_algorithms, 2, self.dim))
+        self.alg_usage_counts = np.zeros(self.n_algorithms)
+        self.context_memory = {name: {} for name in self.alg_names}
+        self.context_memory["Common"] = {}
+        self.mean_rewards = options.get("mean_rewards", [])
+        self.best_50_mean = float("inf")
+        self.schedule_interval = options.get(
+            "schedule_interval", int(self.max_function_evaluations / 50)
+        )
+
+        expected_trajectory_length = int(
+            np.ceil(self.max_function_evaluations / self.schedule_interval)
+        )
+        buffer_capacity = expected_trajectory_length + 10  # Safety margin
+        self.buffer = RolloutBuffer(capacity=buffer_capacity, device=DEVICE)
+
+    def _load_parameters(self, options):
+        if p := options.get("network_parameters", None):
+            self.network.load_state_dict(p)
+        if p := options.get("optimizer", None):
+            self.optimizer.load_state_dict(p)
+
+    def get_state(self, pop_x, pop_y):
+        la = get_la_features(self, pop_x, pop_y)
+        ah = self.ah_vectors.copy()
+
+        return la, ah
+
+    def _update_ah_history(
+        self, alg_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+    ):
+        """
+        Updates Shift Vectors (SV) for the selected algorithm.
+        Eq (8), (9).
+        """
+        sv_best_current = x_best_new - x_best_old
+        sv_worst_current = x_worst_new - x_worst_old
+
+        H = self.alg_usage_counts[alg_idx]
+
+        self.ah_vectors[alg_idx, 0] = (
+            self.ah_vectors[alg_idx, 0] * H + sv_best_current
+        ) / (H + 1)
+        self.ah_vectors[alg_idx, 1] = (
+            self.ah_vectors[alg_idx, 1] * H + sv_worst_current
+        ) / (H + 1)
+
+        self.alg_usage_counts[alg_idx] += 1
+
+    def _save_context(self, optimizer, alg_name):
+        common_attrs = ["memory_f", "memory_cr", "archive", "archive_fitness"]
+        for attr in common_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory["Common"][attr] = getattr(optimizer, attr)
+
+        specific_attrs = []
+        if "JDE21" in alg_name:
+            specific_attrs = [
+                "tau1",
+                "tau2",
+                "ageLmt",
+                "eps",
+                "myEqs",
+                "successful_f",
+                "successful_cr",
+            ]
+        elif "MadDE" in alg_name:
+            specific_attrs = ["pm", "pbest", "pqBX"]
+        elif "NL_SHADE" in alg_name:
+            specific_attrs = ["nA", "pA"]
+
+        for attr in specific_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory[alg_name][attr] = getattr(optimizer, attr)
+
+    def _restore_context(self, optimizer, alg_name):
+        """
+        Restores parameters to the optimizer from self.context_memory.
+        """
+        for attr, val in self.context_memory["Common"].items():
+            if hasattr(optimizer, attr):
+                setattr(optimizer, attr, copy.deepcopy(val))
+
+        if alg_name in self.context_memory:
+            for attr, val in self.context_memory[alg_name].items():
+                if hasattr(optimizer, attr):
+                    setattr(optimizer, attr, copy.deepcopy(val))
+
+    def _select_action(self, state):
+        """
+        Selects action using the shared network with split inputs.
+        """
+        la_state, ah_state = state
+        la_tensor = torch.FloatTensor(la_state).unsqueeze(0).to(DEVICE)
+        ah_tensor = torch.FloatTensor(ah_state).unsqueeze(0).to(DEVICE)
+
+        with torch.no_grad():
+            probs, value = self.network(la_tensor, ah_tensor)
+            dist = torch.distributions.Categorical(probs)
+            action = dist.sample()
+            log_prob = dist.log_prob(action)
+
+        return action.item(), log_prob, value
+
+    def initialize(self):
+        x = self.rng_initialization.uniform(
+            self.initial_lower_boundary,
+            self.initial_upper_boundary,
+            size=(INITIAL_POPSIZE, self.ndim_problem),
+        )
+        y = np.zeros((INITIAL_POPSIZE,))
+        for i in range(INITIAL_POPSIZE):
+            y[i] = self._evaluate_fitness(x[i])
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        """
+        Main Optimization Loop implementing RL-DAS workflow (Algorithm 1).
+        Does NOT use checkpoints. Uses interval-based scheduling.
+        """
+        fitness = Optimizer.optimize(self, fitness_function)
+        population_x, population_y = self.initialize()
+        self.n_function_evaluations = INITIAL_POPSIZE
+
+        best_idx = np.argmin(population_y)
+        best_y_global = population_y[best_idx]
+        best_x_global = population_x[best_idx].copy()
+
+        self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0
+
+        self.ah_vectors.fill(0.0)
+        self.alg_usage_counts.fill(0.0)
+        self.context_memory = {name: {} for name in self.alg_names}
+        self.context_memory["Common"] = {}
+
+        trajectory = []  # To store (s, a, r_raw, log_prob, val, done)
+
+        while self.n_function_evaluations < self.max_function_evaluations:
+            state = self.get_state(population_x, population_y)
+
+            action_idx, log_prob, value = self._select_action(state)
+            selected_alg_class = self.actions[action_idx]
+            alg_name = self.alg_names[action_idx]
+
+            sub_opt = selected_alg_class(self.problem, self.options)
+            sub_opt.n_function_evaluations = self.n_function_evaluations
+            sub_opt.max_function_evaluations = self.max_function_evaluations
+
+            self._restore_context(sub_opt, alg_name)
+
+            x_best_old = population_x[np.argmin(population_y)].copy()
+            x_worst_old = population_x[np.argmax(population_y)].copy()
+            cost_old = np.min(population_y)
+
+            target_fes = min(
+                self.n_function_evaluations + self.schedule_interval,
+                self.max_function_evaluations,
+            )
+            sub_opt.max_function_evaluations = target_fes
+
+            sub_opt.population = population_x
+            sub_opt.fitness = population_y
+
+            res = sub_opt.optimize()
+            population_x = sub_opt.population
+            population_y = sub_opt.fitness
+
+            self.n_function_evaluations = sub_opt.n_function_evaluations
+
+            self._save_context(sub_opt, alg_name)
+
+            x_best_new = population_x[np.argmin(population_y)].copy()
+            x_worst_new = population_x[np.argmax(population_y)].copy()
+            cost_new = np.min(population_y)
+
+            self._update_ah_history(
+                action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+            )
+
+            adc = (cost_old - cost_new) / self.initial_cost
+
+            done = self.n_function_evaluations >= self.max_function_evaluations
+
+            trajectory.append(
+                {
+                    "state": state,
+                    "action": action_idx,
+                    "adc": adc,
+                    "log_prob": log_prob,
+                    "value": value,
+                    "done": done,
+                }
+            )
+
+            best_y_global = min(best_y_global, cost_new)
+
+        fes_end = self.n_function_evaluations
+        speed_factor = self.max_function_evaluations / fes_end
+
+        for step in trajectory:
+            final_reward = step["adc"] * speed_factor
+            self.rewards.append(final_reward)
+            la_state, ah_state = step["state"]
+
+            la_tensor = torch.FloatTensor(la_state).to(DEVICE)
+            ah_tensor = torch.FloatTensor(ah_state).to(DEVICE)
+
+            self.buffer.add(
+                (la_tensor, ah_tensor),
+                step["action"],
+                final_reward,
+                step["done"],
+                step["log_prob"],
+                step["value"],
+            )
+
+        if self.train_mode:
+            T = len(trajectory)
+            K = max(1, int(0.3 * T))
+
+            self.ppo_update(
+                self.buffer,
+                epochs=K,
+                minibatch_size=32,
+                clip_eps=0.2,
+                value_coef=0.5,
+                entropy_coef=0.01,
+            )
+
+            self.buffer.clear()
+
+        return self._collect(fitness, self.best_so_far_y)
+
+    def _collect(self, fitness, y=None):
+        results, _ = super()._collect(fitness, y)
+        self.mean_rewards.append(sum(self.rewards) / len(self.rewards))
+        agent_state = {
+            "network_parameters": self.network.state_dict(),
+            "optimizer": self.optimizer.state_dict(),
+            "buffer": self.buffer,
+            "mean_rewards": self.mean_rewards,
+            "reward_normalizer": self.reward_normalizer,
+            "state_normalizer": self.state_normalizer,
+        }
+
+        last_50_mean = sum(self.mean_rewards[-50:]) / len(self.mean_rewards[-50:])
+        if self.best_50_mean > last_50_mean:
+            self.best_50_mean = last_50_mean
+            torch.save(agent_state, os.path.join("models", f"{self.name}_best.pth"))
+
+        if self.n_function_evaluations == self.max_function_evaluations:
+            torch.save(agent_state, os.path.join("models", f"{self.name}_final.pth"))
+
+        return results, agent_state
+
+    def _update_on_minibatch(
+        self,
+        mb_la,
+        mb_ah,
+        mb_actions,
+        mb_old_log_probs,
+        mb_returns,
+        mb_advantages,
+        clip_eps,
+        value_coef,
+        entropy_coef,
+    ):
+        policy_probs, values_pred = self.network(mb_la, mb_ah)
+
+        dist = torch.distributions.Categorical(policy_probs)
+        dist_log_probs = dist.log_prob(mb_actions)
+        entropy = dist.entropy().mean()
+
+        ratio = torch.exp(dist_log_probs - mb_old_log_probs)
+
+        values_pred = values_pred.squeeze(1)
+        value_loss = torch.nn.functional.mse_loss(values_pred, mb_returns)
+
+        surr1 = ratio * mb_advantages
+        surr2 = torch.clamp(ratio, 1.0 - clip_eps, 1.0 + clip_eps) * mb_advantages
+        actor_loss = -torch.min(surr1, surr2).mean()
+
+        loss = actor_loss + value_coef * value_loss - entropy_coef * entropy
+
+        self.optimizer.zero_grad()
+        loss.backward()
+        torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5)
+        self.optimizer.step()
+
+        return actor_loss.detach().item(), value_loss.detach().item()
+
+    def ppo_update(
+        self,
+        buffer,
+        epochs=4,
+        minibatch_size=256,
+        clip_eps=0.2,
+        value_coef=0.5,
+        entropy_coef=0.01,
+    ):
+        la_states, ah_states, actions, old_log_probs, returns, advantages = (
+            self._compute_advantages(buffer)
+        )
+        dataset_size = la_states.shape[0]
+
+        n_batches = 0
+
+        actual_minibatch_size = min(minibatch_size, dataset_size)
+
+        for epoch in range(epochs):
+            indices = np.arange(dataset_size)
+            np.random.shuffle(indices)
+
+            for start in range(0, dataset_size, actual_minibatch_size):
+                idx = indices[start : start + actual_minibatch_size]
+
+                self._update_on_minibatch(
+                    la_states[idx],
+                    ah_states[idx],  # Pass both
+                    actions[idx],
+                    old_log_probs[idx],
+                    returns[idx],
+                    advantages[idx],
+                    clip_eps,
+                    value_coef,
+                    entropy_coef,
+                )
+                n_batches += 1
+
+    def _compute_advantages(self, buffer):
+        """
+        Computes GAE handling split (LA, AH) state inputs.
+        """
+        la_list, ah_list = zip(*buffer.states)
+
+        la_states = torch.stack(la_list).to(DEVICE)
+        ah_states = torch.stack(ah_list).to(DEVICE)
+
+        rewards = torch.tensor(buffer.rewards, dtype=torch.float32).to(DEVICE)
+        dones = torch.tensor(buffer.dones, dtype=torch.float32).to(DEVICE)
+        values = torch.stack(buffer.values).squeeze().to(DEVICE)
+
+        with torch.no_grad():
+            if buffer.dones[-1]:
+                next_value = 0.0
+            else:
+                _, last_val_tens = self.network(
+                    la_states[-1].unsqueeze(0), ah_states[-1].unsqueeze(0)
+                )
+                next_value = last_val_tens.item()
+
+        advantages = []
+        last_gae_lam = 0
+        gamma = 0.99
+        lam = 0.95
+
+        for step in reversed(range(len(rewards))):
+            next_non_terminal = 1.0 - dones[step]
+            next_val = next_value if step == len(rewards) - 1 else values[step + 1]
+
+            delta = rewards[step] + gamma * next_val * next_non_terminal - values[step]
+            last_gae_lam = delta + gamma * lam * next_non_terminal * last_gae_lam
+            advantages.insert(0, last_gae_lam)
+
+        advantages = torch.tensor(advantages, dtype=torch.float32).to(DEVICE)
+        returns = advantages + values
+
+        return (
+            la_states,
+            ah_states,
+            torch.tensor(buffer.actions).to(DEVICE),
+            torch.stack(buffer.log_probs).to(DEVICE),
+            returns,
+            advantages,
+        )
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index dc3b896..789a8e1 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -11,6 +11,8 @@
     calculate_information_content,
     calculate_ela_distribution,  # Information Content
 )
+from scipy.spatial.distance import pdist
+from scipy.stats import spearmanr
 
 from dynamicalgorithmselection.NeurELA.NeurELA import feature_embedder
 from dynamicalgorithmselection.agents.agent_utils import MAX_DIM, RunningMeanStd
@@ -340,20 +342,127 @@ def normalize(self, state, update=True):
             update (bool): Whether to update the running statistics.
                            Usually True during training, False during testing.
         """
-        # Ensure state is an array
         state = np.asarray(state)
 
-        # If training, update the statistics
         if update:
-            # RunningMeanStd expects a batch, so we add a dimension if needed
             if len(state.shape) == 1:
                 self.rms.update(state.reshape(1, -1))
             else:
                 self.rms.update(state)
 
-        # Calculate standard deviation
         std = np.sqrt(self.rms.var) + 1e-8
 
-        # Normalize and Clip to prevent extreme outliers (e.g., -5 to 5)
         normalized_state = (state - self.rms.mean) / std
         return np.clip(normalized_state, -5.0, 5.0)
+
+
+def get_la_features(agent, pop_x, pop_y):
+    """
+    Extracts 9 Landscape Analysis features described in Reinforcement Learning Dynamic Algorithm Selection.
+    Includes sampling-based features (f5-f8) which consume function evaluations.
+    """
+    sorted_idx = np.argsort(pop_y)
+    pop_x = pop_x[sorted_idx]
+    pop_y = pop_y[sorted_idx]
+
+    best_y = pop_y[0]
+    best_x = pop_x[0]
+    n = len(pop_x)
+
+    norm_factor = (
+        agent.initial_cost
+        if agent.initial_cost and abs(agent.initial_cost) > 1e-9
+        else 1.0
+    )
+    f1 = best_y / norm_factor
+
+    dists_to_best = np.linalg.norm(pop_x - best_x, axis=1)
+    if np.std(pop_y) < 1e-9 or np.std(dists_to_best) < 1e-9:
+        f2 = 0.0
+    else:
+        fdc, _ = spearmanr(pop_y, dists_to_best)
+        f2 = fdc if not np.isnan(fdc) else 0.0
+
+    n_top = max(2, int(0.1 * n))
+
+    if n > 1:
+        dist_matrix_all = pdist(pop_x)
+        disp_all = np.mean(dist_matrix_all) if len(dist_matrix_all) > 0 else 0.0
+
+        dist_matrix_top = pdist(pop_x[:n_top])
+        disp_top = np.mean(dist_matrix_top) if len(dist_matrix_top) > 0 else 0.0
+
+        f3 = disp_all - disp_top
+        f4 = np.max(dist_matrix_all) if len(dist_matrix_all) > 0 else 0.0
+    else:
+        f3, f4 = 0.0, 0.0
+
+    remaining_fes = agent.max_function_evaluations - agent.n_function_evaluations
+    cost_per_sample = n  # 1 generation of size N
+
+    sampled_pops_y = []
+
+    if remaining_fes >= (2 * cost_per_sample):
+        sample_indices = np.random.choice(len(agent.actions), 2, replace=False)
+
+        for idx in sample_indices:
+            alg_class = agent.actions[idx]
+
+            sub_opt = alg_class(agent.problem, agent.options)
+
+            sub_opt.population = pop_x.copy()
+            sub_opt.fitness = pop_y.copy()
+
+            sub_opt.n_function_evaluations = 0
+            sub_opt.max_function_evaluations = cost_per_sample
+
+            sub_opt.optimize()
+
+            sampled_pops_y.append(sub_opt.fitness)
+            agent.n_function_evaluations += sub_opt.n_function_evaluations
+
+    f5, f6, f7, f8 = 0.0, 0.0, 0.0, 0.0
+
+    if len(sampled_pops_y) > 0:
+        sorted_current = np.sort(pop_y)
+        sorted_samples = [np.sort(sy) for sy in sampled_pops_y]
+        avg_sample_y = np.mean(sorted_samples, axis=0)
+
+        # Slopes: (y_{i+1} - y_i)
+        diff_current = np.diff(sorted_current)
+        diff_sample = np.diff(avg_sample_y)
+
+        with np.errstate(divide="ignore", invalid="ignore"):
+            ratios = diff_current / diff_sample
+            ratios[diff_sample == 0] = 0.0
+            ratios[np.isnan(ratios)] = 0.0
+
+        f5 = min(np.sum(ratios), 0.0)
+
+        S = len(sampled_pops_y)
+        eps = 1e-8
+
+        neutral_count = 0
+        no_improve_counts = np.zeros(n)  # For f7
+        all_worse_counts = np.zeros(n)  # For f8
+
+        for sy in sampled_pops_y:
+            neutral_count += np.sum(np.abs(pop_y - sy) < eps)
+
+            improved = sy < pop_y
+            no_improve_counts += improved.astype(int)  # Add 1 if improved
+
+            worse = sy > pop_y
+            all_worse_counts += worse.astype(int)
+
+        f6 = neutral_count / (n * S)
+
+        alphas = (no_improve_counts == 0).astype(float)
+        f7 = np.mean(alphas)
+
+        betas = (all_worse_counts == S).astype(float)
+        f8 = np.mean(betas)
+
+    f9 = agent.n_function_evaluations / agent.max_function_evaluations
+
+    return np.array([f1, f2, f3, f4, f5, f6, f7, f8, f9])
diff --git a/dynamicalgorithmselection/agents/ppo_utils.py b/dynamicalgorithmselection/agents/ppo_utils.py
index 651631a..cf1f19f 100644
--- a/dynamicalgorithmselection/agents/ppo_utils.py
+++ b/dynamicalgorithmselection/agents/ppo_utils.py
@@ -139,3 +139,51 @@ def __init__(self):
 
     def forward(self, advantage, log_prob):
         return -advantage * log_prob
+
+
+class RLDASNetwork(nn.Module):
+    def __init__(self, d_dim, num_algorithms, la_dim=9):
+        super(RLDASNetwork, self).__init__()
+        self.L = num_algorithms
+        self.D = d_dim
+        self.la_dim = la_dim
+
+        self.ah_input_flat_dim = self.L * 2 * self.D
+
+        self.ah_embed = nn.Sequential(
+            nn.Linear(self.ah_input_flat_dim, 64),
+            nn.ReLU(),
+            nn.Linear(64, 2 * self.L),  # Output size aligned with paper description
+            nn.ReLU(),
+        )
+        self.fusion_input_dim = self.la_dim + (2 * self.L)
+
+        self.dv_layer = nn.Sequential(nn.Linear(self.fusion_input_dim, 64), nn.Tanh())
+
+        self.actor_head = nn.Sequential(
+            nn.Linear(64, 16), nn.Tanh(), nn.Linear(16, self.L), nn.Softmax(dim=-1)
+        )
+
+        self.critic_head = nn.Sequential(
+            nn.Linear(64, 64),
+            nn.ReLU(),
+            nn.Linear(64, 1),  # Scalar Value
+        )
+
+    def forward(self, la_state, ah_state):
+        if ah_state.dim() > 2:
+            batch_size = ah_state.size(0)
+            ah_flat = ah_state.view(batch_size, -1)
+        else:
+            ah_flat = ah_state
+
+        v_ah = self.ah_embed(ah_flat)
+
+        combined = torch.cat([la_state, v_ah], dim=1)
+
+        dv = self.dv_layer(combined)
+
+        probs = self.actor_head(dv)
+        value = self.critic_head(dv)
+
+        return probs, value
diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py
index 357145c..0dd0441 100644
--- a/dynamicalgorithmselection/experiments/core.py
+++ b/dynamicalgorithmselection/experiments/core.py
@@ -19,7 +19,7 @@ def run_testing(
     problem_ids: list[str],
     observer: cocoex.Observer,
 ):
-    for problem_id in tqdm(problem_ids):
+    for problem_id in tqdm(problem_ids, smoothing=0.0):
         problem_instance = problems_suite.get_problem(problem_id)
         problem_instance.observe_with(observer)
         max_fe = evaluations_multiplier * problem_instance.dimension
@@ -47,7 +47,7 @@ def run_training(
     problem_ids: list[str],
 ):
     agent_state = {}
-    for problem_id in tqdm(np.random.permutation(problem_ids)):
+    for problem_id in tqdm(np.random.permutation(problem_ids), smoothing=0.0):
         problem_instance = problems_suite.get_problem(problem_id)
         max_fe = evaluations_multiplier * problem_instance.dimension
         options["max_function_evaluations"] = max_fe
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index 489fd02..a78fd55 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -190,7 +190,7 @@ def run_comparison(
     # We use the problem_ids from the first suite to iterate
     _, problem_ids = get_suite("all", False)
 
-    for problem_id in tqdm(problem_ids, desc="Evaluating Problems"):
+    for problem_id in tqdm(problem_ids, desc="Evaluating Problems", smoothing=0.0):
         stats = {}
         max_fe = None
 
diff --git a/dynamicalgorithmselection/experiments/neuroevolution.py b/dynamicalgorithmselection/experiments/neuroevolution.py
index 30420dc..04830b7 100644
--- a/dynamicalgorithmselection/experiments/neuroevolution.py
+++ b/dynamicalgorithmselection/experiments/neuroevolution.py
@@ -84,7 +84,10 @@ def evaluate(self, genomes, config):
                 jobs.append(self.pool.apply_async(self.eval_function, (genome, config)))
 
         for job, (ignored_genome_id, genome) in tqdm(
-            zip(jobs, genomes), total=len(jobs), desc="Evaluating Genomes"
+            zip(jobs, genomes),
+            total=len(jobs),
+            desc="Evaluating Genomes",
+            smoothing=0.0,
         ):
             # Result is now a tuple: (fitness, log_dict)
             fitness, log_data = job.get(timeout=self.timeout)
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index c120fa2..1f80d71 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -8,6 +8,7 @@
 import torch
 import wandb
 
+from dynamicalgorithmselection.agents.RLDAS_agent import RLDASAgent
 from dynamicalgorithmselection.agents.neuroevolution_agent import NeuroevolutionAgent
 from dynamicalgorithmselection.agents.policy_gradient_agent import PolicyGradientAgent
 from dynamicalgorithmselection.agents.random_agent import RandomAgent
@@ -19,6 +20,7 @@
     "random": RandomAgent,
     "neuroevolution": NeuroevolutionAgent,
     "policy-gradient": PolicyGradientAgent,
+    "RL-DAS": RLDASAgent,
 }
 
 
@@ -97,7 +99,7 @@ def parse_arguments():
         "--agent",
         type=str,
         default="policy-gradient",
-        choices=["random", "neuroevolution", "policy-gradient"],
+        choices=list(AGENTS_DICT.keys()),
         help="specify which agent to use",
     )
 
diff --git a/dynamicalgorithmselection/optimizers/DE/DE.py b/dynamicalgorithmselection/optimizers/DE/DE.py
new file mode 100644
index 0000000..fae8ce9
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/DE.py
@@ -0,0 +1,60 @@
+import numpy as np  # engine for numerical computing
+from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
+
+
+class DE(Optimizer):
+    def __init__(self, problem, options):
+        Optimizer.__init__(self, problem, options)
+        if (
+            self.n_individuals is None
+        ):  # number of offspring, aka offspring population size
+            self.n_individuals = 170
+        assert self.n_individuals > 0
+        self._n_generations = 0  # number of generations
+        self._printed_evaluations = self.n_function_evaluations
+
+    def initialize(self):
+        raise NotImplementedError
+
+    def mutate(self):
+        raise NotImplementedError
+
+    def crossover(self):
+        raise NotImplementedError
+
+    def select(self):
+        raise NotImplementedError
+
+    def iterate(self):
+        raise NotImplementedError
+
+    def _print_verbose_info(self, fitness, y, is_print=False):
+        if y is not None and self.saving_fitness:
+            if not np.isscalar(y):
+                fitness.extend(y)
+            else:
+                fitness.append(y)
+        if self.verbose:
+            is_verbose = (
+                self._printed_evaluations != self.n_function_evaluations
+            )  # to avoid repeated printing
+            is_verbose_1 = (not self._n_generations % self.verbose) and is_verbose
+            is_verbose_2 = self.termination_signal > 0 and is_verbose
+            is_verbose_3 = is_print and is_verbose
+            if is_verbose_1 or is_verbose_2 or is_verbose_3:
+                info = "  * Generation {:d}: best_so_far_y {:7.5e}, min(y) {:7.5e} & Evaluations {:d}"
+                print(
+                    info.format(
+                        self._n_generations,
+                        self.best_so_far_y,
+                        np.min(y),
+                        self.n_function_evaluations,
+                    )
+                )
+                self._printed_evaluations = self.n_function_evaluations
+
+    def _collect(self, fitness=None, y=None):
+        self._print_verbose_info(fitness, y)
+        results = Optimizer._collect(self, fitness)
+        results["_n_generations"] = self._n_generations
+        return results
diff --git a/dynamicalgorithmselection/optimizers/DE/JDE21.py b/dynamicalgorithmselection/optimizers/DE/JDE21.py
new file mode 100644
index 0000000..32ac219
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/JDE21.py
@@ -0,0 +1,139 @@
+import numpy as np
+
+from dynamicalgorithmselection.optimizers.DE.DE import DE
+
+
+class JDE21(DE):
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+        self.sNP = 10
+        self.bNP = self.n_individuals - self.sNP
+        self.age = 0
+        self.tao1 = self.tao2 = 0.1
+        self.Finit, self.CRinit = 0.5, 0.9
+        self.Fu = 1.1
+        self.Fl_b, self.CRu_b = 0.1, 1.1
+        self.Nmax = self.n_individuals
+        self.Nmin = 30
+
+    def initialize(self, args=None, x=None, y=None):
+        if x is None:
+            x = self.rng_initialization.uniform(
+                self.initial_lower_boundary,
+                self.initial_upper_boundary,
+                (self.n_individuals, self.ndim_problem),
+            )
+        if y is None:
+            y = np.array([self._evaluate_fitness(xi, args) for xi in x])
+        self.F = np.full(self.n_individuals, self.Finit)
+        self.Cr = np.full(self.n_individuals, self.CRinit)
+        return x, y
+
+    def _mutate_cross_select(self, x, y, indices, args=None):
+        NP_sub = len(indices)
+        if NP_sub < 4:
+            return x, y
+
+        # Self-adaptation
+        new_F = np.where(
+            self.rng_optimization.random(NP_sub) < self.tao1,
+            self.rng_optimization.random(NP_sub) * self.Fu + self.Fl_b,
+            self.F[indices],
+        )
+        new_Cr = np.where(
+            self.rng_optimization.random(NP_sub) < self.tao2,
+            self.rng_optimization.random(NP_sub) * self.CRu_b,
+            self.Cr[indices],
+        )
+
+        # Mutation & Crossover
+        # Simplified vectorized parent selection
+        r1, r2, r3 = [self.rng_optimization.choice(indices, NP_sub) for _ in range(3)]
+        vs = x[r1] + new_F[:, np.newaxis] * (x[r2] - x[r3])
+        vs = np.clip(vs, self.lower_boundary, self.upper_boundary)
+
+        mask = (
+            self.rng_optimization.random((NP_sub, self.ndim_problem))
+            < new_Cr[:, np.newaxis]
+        )
+        us = np.where(mask, vs, x[indices])
+
+        new_y = np.array([self._evaluate_fitness(ui, args) for ui in us])
+
+        # Crowding Selection
+        dists = np.linalg.norm(
+            x[indices][:, np.newaxis, :] - us[np.newaxis, :, :], axis=2
+        )
+        closest_sub_idx = np.argmin(dists, axis=0)
+        closest_global_idx = indices[closest_sub_idx]
+
+        improved = new_y < y[closest_global_idx]
+        for i, idx in enumerate(closest_global_idx):
+            if improved[i]:
+                x[idx], y[idx] = us[i], new_y[i]
+                self.F[idx], self.Cr[idx] = new_F[i], new_Cr[i]
+                self.age = 0
+
+        if not np.any(improved):
+            self.age += NP_sub
+        return x, y
+
+    def iterate(self, x=None, y=None, args=None):
+        bNP = x.shape[0] - self.sNP
+        # Evolution of big population
+        x, y = self._mutate_cross_select(x, y, np.arange(bNP), args)
+
+        # Evolution of small population (repeated)
+        small_idx = np.arange(bNP, x.shape[0])
+        for _ in range(bNP // self.sNP):
+            x, y = self._mutate_cross_select(x, y, small_idx, args)
+
+        progress = self.n_function_evaluations / self.max_function_evaluations
+        self.n_individuals = int(round(self.Nmax - progress * (self.Nmax - self.Nmin)))
+
+        self._n_generations += 1
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        fitness = super().optimize(fitness_function)
+        x, y = self.initialize(
+            args, self.start_conditions.get("x"), self.start_conditions.get("y")
+        )
+        idx = 0
+        while True:
+            old_evals = self.n_function_evaluations
+
+            x, y = self.iterate(x, y, args)
+            self.results.update(
+                {
+                    "x": x,
+                    "y": y,
+                }
+            )
+            if self._check_terminations():
+                break
+            idx += 1
+            if self.n_function_evaluations == old_evals:
+                break
+
+        return self._collect(fitness, y)
+
+    def set_data(
+        self,
+        x=None,
+        y=None,
+        *args,
+        **kwargs,
+    ):
+        if x is None or y is None:
+            self.start_conditions = {"x": None, "y": None}
+        elif not isinstance(y, np.ndarray):
+            loc = locals()
+            self.start_conditions = {}
+        else:
+            indices = np.argsort(y)[: self.n_individuals]
+            start_conditions = {}
+            start_conditions.update({"x": x[indices], "y": y[indices]})
+            self.start_conditions = start_conditions
+        self.best_so_far_x = kwargs.get("best_x", None)
+        self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/MADDE.py b/dynamicalgorithmselection/optimizers/DE/MADDE.py
new file mode 100644
index 0000000..5a7704f
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/MADDE.py
@@ -0,0 +1,243 @@
+import numpy as np
+from dynamicalgorithmselection.optimizers.DE.DE import DE
+
+
+class MADDE(DE):
+    start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx", "pm"]
+
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+        # Constants from MadDE paper/original code
+        self.Nmax = self.n_individuals if self.n_individuals else 170
+        self.Nmin = options.get("Nmin", 4)
+        self.p = 0.18
+        self.PqBX = 0.01
+
+        # Adaptive strategy probabilities
+        self.pm = np.ones(3) / 3
+
+        # Archive and Memory
+        self.NA = int(self.Nmax * 2.1)
+        self.archive = np.empty((0, self.ndim_problem))
+
+        # Memory for F and Cr
+        self.memory_size = 20  # Standard for SHADE-based
+        self.MF = np.ones(self.memory_size) * 0.2
+        self.MCr = np.ones(self.memory_size) * 0.2
+        self.k_idx = 0
+
+    def initialize(self, args=None, x=None, y=None):
+        if x is None:
+            x = self.rng_initialization.uniform(
+                self.initial_lower_boundary,
+                self.initial_upper_boundary,
+                (self.n_individuals, self.ndim_problem),
+            )
+        if y is None:
+            y = np.array([self._evaluate_fitness(xi, args) for xi in x])
+        return x, y
+
+    def _choose_F_Cr(self, NP):
+        indices = self.rng_optimization.integers(0, self.memory_size, size=NP)
+        Cr = self.rng_optimization.normal(loc=self.MCr[indices], scale=0.1, size=NP)
+        Cr = np.clip(Cr, 0, 1)
+
+        # Cauchy-like sampling for F
+        F = self.MF[indices] + 0.1 * np.tan(
+            np.pi * (self.rng_optimization.random(NP) - 0.5)
+        )
+        while np.any(F <= 0):
+            idx = np.where(F <= 0)[0]
+            F[idx] = self.MF[indices[idx]] + 0.1 * np.tan(
+                np.pi * (self.rng_optimization.random(len(idx)) - 0.5)
+            )
+        return Cr, np.minimum(1.0, F)
+
+    def _mutate(self, x, y, F, strategy_idx, q, Fa):
+        NP = x.shape[0]
+        dim = self.ndim_problem
+        v = np.zeros_like(x)
+
+        # Indices for 3 strategies
+        m0 = strategy_idx == 0
+        m1 = strategy_idx == 1
+        m2 = strategy_idx == 2
+
+        # p-best and q-best sets
+        order = np.argsort(y)
+        p_best = x[order[: max(int(self.p * NP), 2)]]
+        q_best = x[order[: max(int(q * NP), 2)]]
+
+        # Strategy 0: Current-to-pbest/1 with archive
+        if np.any(m0):
+            v[m0] = self._ctb_w_arc(x[m0], p_best, self.archive, F[m0])
+
+        # Strategy 1: Current-to-rand/1 with archive
+        if np.any(m1):
+            v[m1] = self._ctr_w_arc(x[m1], self.archive, F[m1])
+
+        # Strategy 2: Weighted Rand-to-best
+        if np.any(m2):
+            v[m2] = self._weighted_rtb(x[m2], q_best, F[m2], Fa)
+
+        return v
+
+    def iterate(self, x, y, args=None):
+        NP = x.shape[0]
+        dim = self.ndim_problem
+        FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
+
+        # Linear parameters for MadDE
+        q = 2 * self.p - self.p * FEs / MaxFEs
+        Fa = 0.5 + 0.5 * FEs / MaxFEs
+
+        # 1. Parameter sampling
+        Cr, F = self._choose_F_Cr(NP)
+        mu = self.rng_optimization.choice(3, size=NP, p=self.pm)
+
+        # 2. Mutation
+        v = self._mutate(x, y, F, mu, q, Fa)
+
+        # Boundary handling (MadDE specific)
+        low, high = self.lower_boundary, self.upper_boundary
+        v = np.where(v < low, (x + low) / 2, v)
+        v = np.where(v > high, (x + high) / 2, v)
+
+        # 3. Crossover (Binomial + qBX)
+        u = np.zeros_like(x)
+        rvs = self.rng_optimization.random(NP)
+
+        # Standard Binomial
+        bu_idx = rvs > self.PqBX
+        if np.any(bu_idx):
+            u[bu_idx] = self._binomial(x[bu_idx], v[bu_idx], Cr[bu_idx])
+
+        # quasi-Best Crossover (qBX)
+        qu_idx = rvs <= self.PqBX
+        if np.any(qu_idx):
+            # Pick qbest from combined population and archive
+            combined = np.vstack([x, self.archive]) if len(self.archive) > 0 else x
+            q_limit = max(int(q * len(combined)), 2)
+            q_best_combined = combined[
+                np.argsort(np.concatenate([y, np.full(len(self.archive), np.inf)]))[
+                    :q_limit
+                ]
+            ]
+            cross_qbest = q_best_combined[
+                self.rng_optimization.integers(0, len(q_best_combined), np.sum(qu_idx))
+            ]
+            u[qu_idx] = self._binomial(cross_qbest, v[qu_idx], Cr[qu_idx])
+
+        # 4. Evaluation and Selection
+        new_y = np.array([self._evaluate_fitness(ui, args) for ui in u])
+        optim = new_y < y
+
+        if np.any(optim):
+            # Archive update
+            self.archive = np.vstack([self.archive, x[optim]])
+            if len(self.archive) > self.NA:
+                self.archive = self.archive[
+                    self.rng_optimization.choice(
+                        len(self.archive), self.NA, replace=False
+                    )
+                ]
+
+            # Memory and Strategy probability update
+            df = np.maximum(0, y - new_y)
+            self._update_memory(F[optim], Cr[optim], df[optim])
+            self._update_pm(df, mu)
+
+            x[optim], y[optim] = u[optim], new_y[optim]
+
+        # 5. NLPSR
+        x, y = self._nlpsr(x, y)
+
+        self._n_generations += 1
+        return x, y
+
+    def _update_pm(self, df, mu):
+        count_S = np.zeros(3)
+        for i in range(3):
+            if np.any(mu == i):
+                count_S[i] = np.mean(df[mu == i])
+
+        if np.sum(count_S) > 0:
+            self.pm = np.maximum(
+                0.1, np.minimum(0.9, count_S / (np.sum(count_S) + 1e-15))
+            )
+            self.pm /= np.sum(self.pm)
+        else:
+            self.pm = np.ones(3) / 3
+
+    def _nlpsr(self, x, y):
+        FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
+        new_NP = int(
+            np.round(
+                self.Nmax
+                + (self.Nmin - self.Nmax) * np.power(FEs / MaxFEs, 1 - FEs / MaxFEs)
+            )
+        )
+        if new_NP < x.shape[0]:
+            idx = np.argsort(y)[:new_NP]
+            x, y = x[idx], y[idx]
+            self.n_individuals = new_NP
+            self.NA = int(max(new_NP * 2.1, self.Nmin))
+        return x, y
+
+    # Helper mutation methods (Vectorized)
+    def _ctb_w_arc(self, x, best, archive, F):
+        NP = x.shape[0]
+        xb = best[self.rng_optimization.integers(0, len(best), NP)]
+        r1 = self.rng_optimization.integers(0, NP, NP)
+        combined = np.vstack([x, archive]) if len(archive) > 0 else x
+        r2 = self.rng_optimization.integers(0, len(combined), NP)
+        return (
+            x + F[:, np.newaxis] * (xb - x) + F[:, np.newaxis] * (x[r1] - combined[r2])
+        )
+
+    def _ctr_w_arc(self, x, archive, F):
+        NP = x.shape[0]
+        r1 = self.rng_optimization.integers(0, NP, NP)
+        combined = np.vstack([x, archive]) if len(archive) > 0 else x
+        r2 = self.rng_optimization.integers(0, len(combined), NP)
+        return x + F[:, np.newaxis] * (x[r1] - combined[r2])
+
+    def _weighted_rtb(self, x, best, F, Fa):
+        NP = x.shape[0]
+        xb = best[self.rng_optimization.integers(0, len(best), NP)]
+        r1 = self.rng_optimization.integers(0, NP, NP)
+        r2 = self.rng_optimization.integers(0, NP, NP)
+        return F[:, np.newaxis] * x[r1] + (F * Fa)[:, np.newaxis] * (xb - x[r2])
+
+    def _binomial(self, x, v, Cr):
+        NP, dim = x.shape
+        jrand = self.rng_optimization.integers(dim, size=NP)
+        mask = self.rng_optimization.random((NP, dim)) < Cr[:, np.newaxis]
+        u = np.where(mask, v, x)
+        u[np.arange(NP), jrand] = v[np.arange(NP), jrand]
+        return u
+
+    def _update_memory(self, SF, SCr, df):
+        if len(SF) > 0:
+            w = df / (np.sum(df) + 1e-15)
+            self.MF[self.k_idx] = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15)
+            self.MCr[self.k_idx] = np.sum(w * SCr)
+            self.k_idx = (self.k_idx + 1) % self.memory_size
+
+    def optimize(self, fitness_function=None, args=None):
+        fitness = super().optimize(fitness_function)
+        x, y = self.initialize(
+            args, self.start_conditions.get("x"), self.start_conditions.get("y")
+        )
+        while True:
+            self._print_verbose_info(fitness, y)
+            x, y = self.iterate(x, y, args)
+            self.results.update(
+                {
+                    "x": x,
+                    "y": y,
+                }
+            )
+            if self._check_terminations():
+                break
+        return self._collect(fitness, y)
diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
new file mode 100644
index 0000000..2282740
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
@@ -0,0 +1,180 @@
+import numpy as np
+from dynamicalgorithmselection.optimizers.DE.DE import DE
+
+
+class NL_SHADE_RSP(DE):
+    start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx"]
+
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+        self.Nmax = self.n_individuals if self.n_individuals else 170
+        self.Nmin = options.get("Nmin", 30)
+        self.n_individuals = self.Nmax
+
+        self.pb = 0.4
+        self.pa = 0.5
+
+        # Archive
+        self.NA = int(self.Nmax * 2.1)
+        self.archive = np.empty((0, self.ndim_problem))
+
+        # Memory MF and MCr
+        self.memory_size = self.ndim_problem * 20
+        self.MF = np.ones(self.memory_size) * 0.2
+        self.MCr = np.ones(self.memory_size) * 0.2
+        self.k_idx = 0
+
+    def initialize(self, args=None, x=None, y=None):
+        if x is None:
+            x = self.rng_initialization.uniform(
+                self.initial_lower_boundary,
+                self.initial_upper_boundary,
+                (self.n_individuals, self.ndim_problem),
+            )
+        if y is None:
+            y = np.array([self._evaluate_fitness(xi, args) for xi in x])
+        return x, y
+
+    def _sample_cauchy(self, loc, scale, size):
+        """Manual Cauchy sampling: loc + scale * tan(pi * (rand - 0.5))"""
+        rand = self.rng_optimization.random(size)
+        return loc + scale * np.tan(np.pi * (rand - 0.5))
+
+    def _choose_F_Cr(self, NP):
+        ind_r = self.rng_optimization.integers(0, self.memory_size, size=NP)
+        # Crossover Rate (Normal)
+        Cr = self.rng_optimization.normal(loc=self.MCr[ind_r], scale=0.1, size=NP)
+        Cr = np.clip(Cr, 0, 1)
+        # Step Length (Cauchy)
+        cauchy_locs = self.MF[ind_r]
+        F = self._sample_cauchy(cauchy_locs, 0.1, NP)
+        # Symmetry correction for negative values
+        while np.any(F <= 0):
+            idx = np.where(F <= 0)[0]
+            F[idx] = self._sample_cauchy(cauchy_locs[idx], 0.1, len(idx))
+        return Cr, np.minimum(1, F)
+
+    def _update_memory(self, SF, SCr, df):
+        if len(SF) > 0:
+            w = df / np.sum(df)
+            # Weighted Lehmer Mean for F
+            self.MF[self.k_idx] = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15)
+            # Weighted Arithmetic Mean for Cr
+            self.MCr[self.k_idx] = np.sum(w * SCr)
+            self.k_idx = (self.k_idx + 1) % self.memory_size
+
+    def iterate(self, x, y, args=None):
+        NP = x.shape[0]
+        Cr, F = self._choose_F_Cr(NP)
+
+        # Mutation: current-to-pbest/1 with archive
+        pb_upper = int(max(2, NP * self.pb))
+        pbest_idx = np.argsort(y)[:pb_upper]
+        x_pbest = x[self.rng_optimization.choice(pbest_idx, NP)]
+
+        r1 = self.rng_optimization.integers(0, NP, size=NP)
+        # Ensure distinct r1
+        for i in range(NP):
+            while r1[i] == i:
+                r1[i] = self.rng_optimization.integers(0, NP)
+
+        # Archive vs Population selection for x2
+        x2 = np.zeros_like(x)
+        use_arc = self.rng_optimization.random(NP) < self.pa
+        arc_idx = np.where(use_arc & (len(self.archive) > 0))[0]
+        pop_idx = np.where(~use_arc | (len(self.archive) == 0))[0]
+
+        if len(pop_idx) > 0:
+            r2 = self.rng_optimization.integers(0, NP, size=len(pop_idx))
+            x2[pop_idx] = x[r2]
+        if len(arc_idx) > 0:
+            r_arc = self.rng_optimization.integers(
+                0, len(self.archive), size=len(arc_idx)
+            )
+            x2[arc_idx] = self.archive[r_arc]
+
+        # Generate Trials
+        vs = x + F[:, np.newaxis] * (x_pbest - x) + F[:, np.newaxis] * (x[r1] - x2)
+        vs = np.clip(vs, self.lower_boundary, self.upper_boundary)
+
+        # Binomial Crossover
+        jrand = self.rng_optimization.integers(self.ndim_problem, size=NP)
+        mask = self.rng_optimization.random((NP, self.ndim_problem)) < Cr[:, np.newaxis]
+        us = np.where(mask, vs, x)
+        us[np.arange(NP), jrand] = vs[np.arange(NP), jrand]
+
+        # Selection
+        new_y = np.array([self._evaluate_fitness(ui, args) for ui in us])
+        better = new_y < y
+
+        if np.any(better):
+            # Update Archive
+            success_x = x[better]
+            self.archive = np.vstack([self.archive, success_x])
+            if len(self.archive) > self.NA:
+                self.archive = self.archive[-self.NA :]
+
+            # Record successes for memory
+            df = (y[better] - new_y[better]) / (y[better] + 1e-15)
+            self._update_memory(F[better], Cr[better], df)
+
+            x[better], y[better] = us[better], new_y[better]
+
+        # NLPSR
+        FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
+        new_NP = int(
+            np.round(
+                self.Nmax
+                + (self.Nmin - self.Nmax) * np.power(FEs / MaxFEs, 1 - FEs / MaxFEs)
+            )
+        )
+        if new_NP < NP:
+            idx = np.argsort(y)[:new_NP]
+            x, y = x[idx], y[idx]
+            self.n_individuals = new_NP
+            self.NA = int(max(new_NP * 2.1, self.Nmin))
+
+        self._n_generations += 1
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        fitness = DE.optimize(self, fitness_function)
+
+        x = self.start_conditions.get("x", None)
+        y = self.start_conditions.get("y", None)
+
+        x, y = self.initialize(args, x, y)
+
+        while True:
+            self._print_verbose_info(fitness, y)
+            x, y = self.iterate(x, y, args)
+            self.results.update(
+                {
+                    "x": x,
+                    "y": y,
+                }
+            )
+            if self._check_terminations():
+                break
+
+        return self._collect(fitness, y)
+
+    def set_data(
+        self,
+        x=None,
+        y=None,
+        *args,
+        **kwargs,
+    ):
+        if x is None or y is None:
+            self.start_conditions = {"x": None, "y": None}
+        elif not isinstance(y, np.ndarray):
+            loc = locals()
+            self.start_conditions = {}
+        else:
+            indices = np.argsort(y)[: self.n_individuals]
+            start_conditions = {}
+            start_conditions.update({"x": x[indices], "y": y[indices]})
+            self.start_conditions = start_conditions
+        self.best_so_far_x = kwargs.get("best_x", None)
+        self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/__init__.py b/dynamicalgorithmselection/optimizers/DE/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dynamicalgorithmselection/optimizers/ES/CMAES.py b/dynamicalgorithmselection/optimizers/ES/CMAES.py
index 70cb864..7dbb308 100644
--- a/dynamicalgorithmselection/optimizers/ES/CMAES.py
+++ b/dynamicalgorithmselection/optimizers/ES/CMAES.py
@@ -314,19 +314,6 @@ def optimize(
                 "mean": mean,
             }
         )
-        self.results.update(
-            {
-                "p_c": p_c,
-                "p_s": p_s,
-                "cm": cm,
-                "e_va": e_va,
-                "e_ve": e_ve,
-                "d": d,
-                "x": x,
-                "y": y,
-                "mean": mean,
-            }
-        )
         results = self._collect(fitness, y, mean)
         # by default do *NOT* save eigenvalues and eigenvectors (with *quadratic* space complexity)
         if self._save_eig:

From 3d4060f165a17fcb0d7ed5b769e8cb58ec910b69 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sun, 8 Feb 2026 20:37:30 +0100
Subject: [PATCH 02/20] add early stopping for policy-gradient agent

---
 .../agents/policy_gradient_agent.py           | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/dynamicalgorithmselection/agents/policy_gradient_agent.py b/dynamicalgorithmselection/agents/policy_gradient_agent.py
index 036ab9f..6461341 100644
--- a/dynamicalgorithmselection/agents/policy_gradient_agent.py
+++ b/dynamicalgorithmselection/agents/policy_gradient_agent.py
@@ -269,9 +269,15 @@ def _update_history(self, iteration_result):
                 if historic_val is None:
                     self.iterations_history[variable_name] = appended_val
                 else:
-                    self.iterations_history[variable_name] = np.concatenate(
-                        (historic_val, appended_val)
-                    )
+                    if appended_val.shape != (0,):
+                        self.iterations_history[variable_name] = np.concatenate(
+                            (historic_val, appended_val)
+                        )
+                    else:
+                        self.iterations_history[variable_name] = historic_val
+                        self._counter_early_stopping = self.early_stopping_evaluations
+                        # Population has collapsed - further optimization makes no sense
+                        # case when sub-optimizer didn't run - DE variants case
 
         return iteration_result
 
@@ -306,18 +312,18 @@ def optimize(self, fitness_function=None, args=None):
         iteration_result = {"x": x, "y": y}
         idx = 0
         last_used_params = []
-        while not self._check_terminations():
+        while True:
             full_buffer = self.buffer.size() >= self.buffer.capacity
 
-            # 1. Prepare State (uses self.iterations_history internally)
+            # Prepare State (uses self.iterations_history internally)
             state = self._prepare_state_tensor(x, y, full_buffer)
 
-            # 2. Select Action
+            # Select Action
             action, log_prob, value = self._select_action(state, full_buffer)
             self.choices_history.append(action)
 
-            # 3. Execute Optimization Step
             best_parent = self.best_so_far_y
+            # Execute Optimization Step
 
             iteration_result, optimizer = self._execute_action(action, iteration_result)
             if len(last_used_params) > 0:
@@ -327,14 +333,14 @@ def optimize(self, fitness_function=None, args=None):
             last_used_params = optimizer.start_condition_parameters
             x, y = iteration_result.get("x"), iteration_result.get("y")
 
-            # 4. Update and Deduplicate History (updates self.iterations_history internally)
+            # Update History
 
             iteration_result = self._update_history(iteration_result)
 
-            # 5. Process Reward
+            # Process Reward
             reward = self._process_step_reward(best_parent, idx, full_buffer)
 
-            # 6. Store in Buffer
+            # Store in Buffer
             self.n_function_evaluations = optimizer.n_function_evaluations
             is_done = self.n_function_evaluations >= self.max_function_evaluations
             self.buffer.add(
@@ -346,7 +352,7 @@ def optimize(self, fitness_function=None, args=None):
                 value.detach(),
             )
 
-            # 7. PPO Update if needed
+            # PPO Update
             if self.train_mode and self.buffer.size() >= batch_size:
                 self.ppo_update(
                     self.buffer,
@@ -356,7 +362,6 @@ def optimize(self, fitness_function=None, args=None):
                     entropy_coef=entropy_coef,
                 )
 
-            # 8. Post-step updates
             entropy_coef = max(entropy_coef * 0.99, 0.001)
             self._print_verbose_info(fitness, y)
 
@@ -370,5 +375,7 @@ def optimize(self, fitness_function=None, args=None):
 
             self.n_function_evaluations = optimizer.n_function_evaluations
             idx += 1
+            if self._check_terminations():
+                break
 
         return self._collect(fitness, self.best_so_far_y)

From 7f49db2def0dccf0e1aaed2684ae4832be59bbe5 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sun, 8 Feb 2026 21:53:27 +0100
Subject: [PATCH 03/20] fix logging with early stopping and add hooks

---
 .pre-commit-config.yaml                   |  5 ++
 dynamicalgorithmselection/agents/agent.py |  2 +-
 pyproject.toml                            |  1 +
 tests/test_experiments_core.py            | 10 +++-
 tests/test_policy_gradient_agent.py       | 23 +-------
 uv.lock                                   | 68 +++++++++++++++++++++++
 6 files changed, 86 insertions(+), 23 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..d286e02
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,5 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.15.0
+  hooks:
+    - id: ruff-format
\ No newline at end of file
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index 0ed6827..a5cb7a8 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -196,7 +196,7 @@ def _log_run_metrics(self):
                 1 if self.choices_history[i] == action_id else 0
             )
             for i, (action_id, action) in product(
-                range(self.n_checkpoints), enumerate(self.actions)
+                range(len(self.choices_history)), enumerate(self.actions)
             )
         }
         self.run.log(checkpoint_choices)
diff --git a/pyproject.toml b/pyproject.toml
index 0e54b8a..3a4ac3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ build-backend = "hatchling.build"
 
 [dependency-groups]
 dev = [
+    "pre-commit>=4.5.1",
     "pytest>=9.0.2",
     "ruff>=0.14.5",
 ]
diff --git a/tests/test_experiments_core.py b/tests/test_experiments_core.py
index b2b6b25..b2d57c4 100644
--- a/tests/test_experiments_core.py
+++ b/tests/test_experiments_core.py
@@ -24,7 +24,10 @@ def setUp(self):
         self.suite_mock.get_problem.return_value = self.problem_mock
         self.observer_mock = MagicMock()
 
-    @patch("dynamicalgorithmselection.experiments.core.tqdm", side_effect=lambda x: x)
+    @patch(
+        "dynamicalgorithmselection.experiments.core.tqdm",
+        side_effect=lambda x, smoothing: x,
+    )
     @patch("dynamicalgorithmselection.experiments.core.dump_stats")
     @patch("dynamicalgorithmselection.experiments.core.coco_bbob_single_function")
     def test_run_testing(self, mock_single_func, mock_dump_stats, mock_tqdm):
@@ -50,7 +53,10 @@ def test_run_testing(self, mock_single_func, mock_dump_stats, mock_tqdm):
         self.assertEqual(self.options["max_function_evaluations"], expected_max_fe)
         self.assertFalse(self.options["train_mode"])
 
-    @patch("dynamicalgorithmselection.experiments.core.tqdm", side_effect=lambda x: x)
+    @patch(
+        "dynamicalgorithmselection.experiments.core.tqdm",
+        side_effect=lambda x, smoothing: x,
+    )
     @patch("dynamicalgorithmselection.experiments.core.coco_bbob_single_function")
     def test_run_training(self, mock_single_func, mock_tqdm):
         fake_state_1 = {
diff --git a/tests/test_policy_gradient_agent.py b/tests/test_policy_gradient_agent.py
index 802004f..9e2d024 100644
--- a/tests/test_policy_gradient_agent.py
+++ b/tests/test_policy_gradient_agent.py
@@ -49,19 +49,7 @@ def test_select_action_tensor_shape(self, mock_problem, ppo_options):
             return_value=(MagicMock(), 10),
         ):
             agent = PolicyGradientAgent(mock_problem, ppo_options)
-
-            # Przygotuj dummy state (tensor)
-            state = torch.randn(1, 10).to(torch.float32)  # Zakładamy wymiar stanu 10
-
-            # Wymuszamy, żeby sieci zwracały poprawne kształty (jeśli nie używamy prawdziwych wag)
-            # Ale PolicyGradientAgent tworzy prawdziwe sieci w __init__, więc powinny działać "z pudełka"
-            # o ile ppo_utils.Actor/Critic są poprawne.
-
-            # Jeśli ppo_utils wymaga GPU, a testujesz na CPU, upewnij się że DEVICE w ppo_utils.py to 'cpu'
-            # lub nadpisz go w teście.
-
-            # Testujemy metodę
-            # full_buffer=False -> exploration mode (losowe lub uniform)
+            state = torch.randn(1, 10).to(torch.float32)
             action, log_prob, value = agent._select_action(state, full_buffer=True)
 
             assert isinstance(action, (int, np.integer))
@@ -112,7 +100,6 @@ def test_execute_action_instantiation(self, mock_problem, ppo_options):
         mock_optimizer_instance = MagicMock()
         MockOptimizerClass.return_value = mock_optimizer_instance
 
-        # Konfigurujemy instancję
         mock_optimizer_instance.n_function_evaluations = 100
         mock_optimizer_instance.best_so_far_y = 5.0
         # iterate zwraca słownik wyników
@@ -125,12 +112,8 @@ def test_execute_action_instantiation(self, mock_problem, ppo_options):
             return_value=(MagicMock(), 10),
         ):
             agent = PolicyGradientAgent(mock_problem, ppo_options)
-            agent.actions = [
-                MockOptimizerClass
-            ]  # Podmieniamy akcje na naszą klasę mocka
-            agent.iterate = MagicMock(
-                return_value={"result": "ok"}
-            )  # Mockujemy wywołanie iterate
+            agent.actions = [MockOptimizerClass]
+            agent.iterate = MagicMock(return_value={"result": "ok"})
 
             iteration_result = {"x": None, "y": None}
             result, optimizer = agent._execute_action(0, iteration_result)
diff --git a/uv.lock b/uv.lock
index c4bc2bc..f0879fa 100644
--- a/uv.lock
+++ b/uv.lock
@@ -52,6 +52,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
 ]
 
+[[package]]
+name = "cfgv"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.4"
@@ -279,6 +288,15 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/42/b0/876bc174ff34a0b2e3b75f10d7c3c9a267a1f56dbac59e943b6f682f6aa8/directsearch-1.0.tar.gz", hash = "sha256:8093ecc401a3d5eff28f053d4ef1b726f5a9c577bd33d6a8b2413a5ba753c734", size = 13605, upload-time = "2022-04-01T03:06:11.488Z" }
 
+[[package]]
+name = "distlib"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
+]
+
 [[package]]
 name = "dynamicalgorithmselection"
 version = "0.1.0"
@@ -299,6 +317,7 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "pre-commit" },
     { name = "pytest" },
     { name = "ruff" },
 ]
@@ -320,6 +339,7 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "pre-commit", specifier = ">=4.5.1" },
     { name = "pytest", specifier = ">=9.0.2" },
     { name = "ruff", specifier = ">=0.14.5" },
 ]
@@ -444,6 +464,15 @@ classic-control = [
     { name = "pygame" },
 ]
 
+[[package]]
+name = "identify"
+version = "2.6.16"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5b/8d/e8b97e6bd3fb6fb271346f7981362f1e04d6a7463abd0de79e1fda17c067/identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980", size = 99360, upload-time = "2026-01-12T18:58:58.201Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/58/40fbbcefeda82364720eba5cf2270f98496bdfa19ea75b4cccae79c698e6/identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", size = 99202, upload-time = "2026-01-12T18:58:56.627Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.11"
@@ -811,6 +840,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/13/8267afdb84a890d7fc3e6f0eef170b0323915c28879e79e8184f7257cf8a/nevergrad-1.0.12-py3-none-any.whl", hash = "sha256:56ff65d6a2f497ecd79af5a796968ee946c05705a6a69ca616eae5988cc5d999", size = 506324, upload-time = "2025-04-23T15:34:16.012Z" },
 ]
 
+[[package]]
+name = "nodeenv"
+version = "1.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
+]
+
 [[package]]
 name = "numba"
 version = "0.62.1"
@@ -1151,6 +1189,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "pre-commit"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cfgv" },
+    { name = "identify" },
+    { name = "nodeenv" },
+    { name = "pyyaml" },
+    { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload-time = "2025-12-16T21:14:33.552Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" },
+]
+
 [[package]]
 name = "protobuf"
 version = "6.33.0"
@@ -1740,6 +1794,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
 ]
 
+[[package]]
+name = "virtualenv"
+version = "20.35.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "distlib" },
+    { name = "filelock" },
+    { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/20/28/e6f1a6f655d620846bd9df527390ecc26b3805a0c5989048c210e22c5ca9/virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c", size = 6028799, upload-time = "2025-10-29T06:57:40.511Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/0c/c05523fa3181fdf0c9c52a6ba91a23fbf3246cc095f26f6516f9c60e6771/virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b", size = 6005095, upload-time = "2025-10-29T06:57:37.598Z" },
+]
+
 [[package]]
 name = "wandb"
 version = "0.22.2"

From f1f65e0aadfc92b80f48b040461f3149e5d31bbf Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Mon, 9 Feb 2026 21:47:06 +0100
Subject: [PATCH 04/20] fix ELA agent

---
 dynamicalgorithmselection/agents/agent.py     | 24 +++++++++----
 .../agents/agent_state.py                     | 35 ++++++++++++++-----
 .../experiments/experiment.py                 |  2 --
 3 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index a5cb7a8..5c66aac 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -8,6 +8,7 @@
 from dynamicalgorithmselection.agents.agent_utils import (
     get_checkpoints,
     StepwiseRewardNormalizer,
+    MAX_DIM,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 from dynamicalgorithmselection.optimizers.RestartOptimizer import restart_optimizer
@@ -72,11 +73,15 @@ def get_partial_state(
         )
 
         if x is None or y is None:
-            state_representation = self.state_representation(
-                np.zeros((50, self.ndim_problem)),
-                np.zeros((50,)),
-                sr_additional_params,
-            )
+            if self.options.get("state_representation") != "ELA":
+                state_representation = self.state_representation(
+                    np.zeros((50, self.ndim_problem)),
+                    np.zeros((50,)),
+                    sr_additional_params,
+                )
+            else:
+                state_representation = (np.zeros((43,)),)
+
             return np.append(state_representation, (0, 0) if optimization_state else ())
         used_fe = self.n_function_evaluations / self.max_function_evaluations
         stagnation_coef = self.stagnation_count / self.max_function_evaluations
@@ -106,7 +111,14 @@ def get_state(
             optimization_state = self.get_partial_state(x, y, True).flatten()
             state = np.concatenate((landscape_state, optimization_state))
         else:
-            state = self.get_partial_state(x_history, y_history, True).flatten()
+            partial_state = self.get_partial_state(x_history, y_history, True).flatten()
+            state = np.append(
+                partial_state,
+                (
+                    self.ndim_problem / MAX_DIM,
+                    self.n_function_evaluations / self.max_function_evaluations,
+                ),
+            )
         return self.state_normalizer.normalize(state, update)
 
     def _print_verbose_info(self, fitness, y):
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index 789a8e1..70d9b52 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -34,9 +34,7 @@ def get_state_representation(
             x[-MAX_CONSIDERED_POPSIZE:], y[-MAX_CONSIDERED_POPSIZE:]
         )[0].mean(axis=0), 34
     elif name == "ELA":
-        return lambda x, y, *args: ela_state_representation(
-            x[-MAX_CONSIDERED_POPSIZE:], y[-MAX_CONSIDERED_POPSIZE:]
-        ), 45
+        return lambda x, y, *args: ela_state_representation(x, y), 47
     elif name == "custom":
         return lambda x, y, args: AgentState(
             x, y, n_actions, *args
@@ -48,20 +46,39 @@ def get_state_representation(
 def ela_state_representation(x, y, *args):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
-        x_norm, y_norm = (
-            (x - x.mean()) / (x.std() + 1e-8),
-            (y - y.mean()) / (y.std() + 1e-8),
+
+        _, unique_indices = np.unique(x, axis=0, return_index=True)
+        unique_indices = np.sort(unique_indices)
+        x_deduplicated = x[unique_indices][-MAX_CONSIDERED_POPSIZE:]
+        y_deduplicated = y[unique_indices][-MAX_CONSIDERED_POPSIZE:]
+
+        x_raw = np.ascontiguousarray(x_deduplicated - x_deduplicated.mean()) / (
+            x_deduplicated.std() + 1e-8
         )
+        y_raw = np.ascontiguousarray(y_deduplicated - y_deduplicated.mean()) / (
+            y_deduplicated.std() + 1e-8
+        )
+
+        x_norm = pd.DataFrame(x_raw).reset_index(drop=True)
+        x_norm.columns = [f"x_{i}" for i in range(x_norm.shape[1])]
+        y_norm = pd.Series(y_raw).reset_index(drop=True)
+
+        is_unique = ~x_norm.duplicated()
+
+        # If we lost data, re-slice to ensure alignment
+        if not is_unique.all():
+            x_norm = x_norm[is_unique].reset_index(drop=True)
+            y_norm = y_norm[is_unique].reset_index(drop=True)
+
         meta_feats = calculate_ela_meta(x_norm, y_norm)
         ela_distr = (
             calculate_ela_distribution(x_norm, y_norm)
-            if (y**2).sum() > 0
+            if ((y**2).sum() > 0 and np.var(y_norm) > 1e-8)
             else {str(i): 0 for i in range(4)}
         )
         nbc_feats = calculate_nbc(x_norm, y_norm)
         disp_feats = calculate_dispersion(x_norm, y_norm)
-        df_temp = pd.DataFrame(x_norm)
-        df_temp.columns = [f"x_{i}" for i in range(df_temp.shape[1])]
+
         ic_feats = calculate_information_content(x_norm, y_norm)
 
         all_features = {
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index a78fd55..650df59 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -2,8 +2,6 @@
 import os
 from typing import Type, Optional
 
-import cocopp
-
 from dynamicalgorithmselection.experiments.core import run_testing, run_training
 from dynamicalgorithmselection.experiments.cross_validation import run_cross_validation
 from dynamicalgorithmselection.experiments.neuroevolution import (

From b282d866429f8d5dd83c3d45fc2e0c510ed9ea78 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Tue, 10 Feb 2026 00:06:17 +0100
Subject: [PATCH 05/20] fix random_agent

---
 dynamicalgorithmselection/agents/random_agent.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dynamicalgorithmselection/agents/random_agent.py b/dynamicalgorithmselection/agents/random_agent.py
index a8e5551..3c73df0 100644
--- a/dynamicalgorithmselection/agents/random_agent.py
+++ b/dynamicalgorithmselection/agents/random_agent.py
@@ -41,7 +41,7 @@ def _update_history(self, iteration_result):
 
                 if historic_val is None:
                     self.iterations_history[variable_name] = appended_val
-                else:
+                elif appended_val.shape != (0,):
                     self.iterations_history[variable_name] = np.concatenate(
                         (historic_val, appended_val)
                     )
@@ -54,7 +54,7 @@ def optimize(self, fitness_function=None, args=None):
         self.iterations_history = {"x": None, "y": None}
         iteration_result = {"x": x, "y": y}
         last_used_params = []
-        while not self._check_terminations():
+        while True:
             action = self._select_action()
             iteration_result, optimizer = self._execute_action(action, iteration_result)
             if len(last_used_params) > 0:
@@ -67,4 +67,6 @@ def optimize(self, fitness_function=None, args=None):
             self.n_function_evaluations = optimizer.n_function_evaluations
             self._print_verbose_info(fitness, y)
             self.n_function_evaluations = optimizer.n_function_evaluations
+            if self._check_terminations() or self._n_generations == self.n_checkpoints:
+                break
         return self._collect(fitness, self.best_so_far_y)

From df479a04bc29944e481af9e8e94c31c7f393703d Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Tue, 10 Feb 2026 07:11:31 +0100
Subject: [PATCH 06/20] improve RLDAS_agent wandb logging

---
 .../agents/RLDAS_agent.py                     | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index 0664303..a5adf16 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -129,6 +129,10 @@ def _select_action(self, state):
             dist = torch.distributions.Categorical(probs)
             action = dist.sample()
             log_prob = dist.log_prob(action)
+            probs = probs.detach().cpu().numpy()
+            if self.run is not None:
+                entropy = -np.sum(probs * np.log(probs + 1e-12)) / np.log(len(probs))
+                self.run.log({"normalized entropy": entropy})
 
         return action.item(), log_prob, value
 
@@ -156,6 +160,14 @@ def optimize(self, fitness_function=None, args=None):
         best_y_global = population_y[best_idx]
         best_x_global = population_x[best_idx].copy()
 
+        self.best_so_far_y = best_y_global
+        self.best_so_far_x = best_x_global
+
+        self.history.append(self.best_so_far_y)
+        self.fitness_history.append(self.best_so_far_y)
+        if self.saving_fitness:
+            fitness.append(self.best_so_far_y)
+
         self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0
 
         self.ah_vectors.fill(0.0)
@@ -169,6 +181,8 @@ def optimize(self, fitness_function=None, args=None):
             state = self.get_state(population_x, population_y)
 
             action_idx, log_prob, value = self._select_action(state)
+            self.choices_history.append(action_idx)
+
             selected_alg_class = self.actions[action_idx]
             alg_name = self.alg_names[action_idx]
 
@@ -208,6 +222,8 @@ def optimize(self, fitness_function=None, args=None):
             )
 
             adc = (cost_old - cost_new) / self.initial_cost
+            if self.run:
+                self.run.log({"adc": adc})
 
             done = self.n_function_evaluations >= self.max_function_evaluations
 
@@ -224,6 +240,19 @@ def optimize(self, fitness_function=None, args=None):
 
             best_y_global = min(best_y_global, cost_new)
 
+            # Update Agent Best State and History
+            if cost_new < self.best_so_far_y:
+                self.best_so_far_y = cost_new
+                self.best_so_far_x = x_best_new
+
+            self.history.append(self.best_so_far_y)
+            self.fitness_history.append(self.best_so_far_y)
+            if self.saving_fitness:
+                fitness.append(self.best_so_far_y)
+
+            self._n_generations += 1
+            self._print_verbose_info(fitness, self.best_so_far_y)
+
         fes_end = self.n_function_evaluations
         speed_factor = self.max_function_evaluations / fes_end
 

From 3339c5e921e3f2e8e7768b130746cf6b5eda0d4c Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sun, 15 Feb 2026 12:04:30 +0100
Subject: [PATCH 07/20] fix RL-DAS agent behaviour

---
 LOIO_train_set.json                           | 1444 -----------------
 .../agents/RLDAS_agent.py                     |   36 +-
 .../agents/agent_state.py                     |    4 +-
 .../agents/agent_utils.py                     |   17 +-
 .../agents/policy_gradient_agent.py           |   25 +-
 dynamicalgorithmselection/experiments/core.py |    9 +-
 .../experiments/cross_validation.py           |   11 +-
 .../experiments/experiment.py                 |   22 +-
 .../experiments/neuroevolution.py             |    8 +-
 .../experiments/utils.py                      |   24 +-
 dynamicalgorithmselection/main.py             |   37 +-
 dynamicalgorithmselection/optimizers/DE/DE.py |    1 +
 .../optimizers/Optimizer.py                   |   10 +-
 .../optimizers/RestartOptimizer.py            |   14 +-
 pyproject.toml                                |    1 +
 tests/test_cross_validation.py                |    2 +-
 tests/test_experiments_core.py                |    1 +
 tests/test_policy_gradient_agent.py           |    4 +
 uv.lock                                       |  132 ++
 19 files changed, 292 insertions(+), 1510 deletions(-)
 delete mode 100644 LOIO_train_set.json

diff --git a/LOIO_train_set.json b/LOIO_train_set.json
deleted file mode 100644
index 84afe0c..0000000
--- a/LOIO_train_set.json
+++ /dev/null
@@ -1,1444 +0,0 @@
-{
-  "data": [
-    "bbob_f021_i78_d02",
-    "bbob_f001_i03_d05",
-    "bbob_f021_i05_d02",
-    "bbob_f021_i05_d03",
-    "bbob_f002_i71_d03",
-    "bbob_f010_i04_d03",
-    "bbob_f012_i74_d10",
-    "bbob_f003_i73_d03",
-    "bbob_f002_i04_d03",
-    "bbob_f013_i75_d20",
-    "bbob_f005_i05_d40",
-    "bbob_f004_i80_d10",
-    "bbob_f021_i78_d10",
-    "bbob_f008_i01_d10",
-    "bbob_f009_i04_d03",
-    "bbob_f009_i01_d05",
-    "bbob_f020_i79_d10",
-    "bbob_f015_i73_d02",
-    "bbob_f013_i74_d40",
-    "bbob_f017_i78_d20",
-    "bbob_f023_i03_d03",
-    "bbob_f005_i02_d40",
-    "bbob_f017_i75_d05",
-    "bbob_f003_i73_d02",
-    "bbob_f022_i05_d02",
-    "bbob_f011_i05_d03",
-    "bbob_f012_i02_d10",
-    "bbob_f024_i04_d05",
-    "bbob_f003_i72_d02",
-    "bbob_f013_i02_d02",
-    "bbob_f011_i72_d05",
-    "bbob_f021_i80_d40",
-    "bbob_f014_i78_d40",
-    "bbob_f024_i01_d02",
-    "bbob_f018_i02_d10",
-    "bbob_f012_i03_d20",
-    "bbob_f002_i71_d02",
-    "bbob_f002_i75_d20",
-    "bbob_f007_i04_d40",
-    "bbob_f005_i73_d40",
-    "bbob_f022_i75_d20",
-    "bbob_f024_i77_d02",
-    "bbob_f017_i74_d20",
-    "bbob_f011_i02_d03",
-    "bbob_f004_i04_d40",
-    "bbob_f009_i02_d40",
-    "bbob_f020_i03_d20",
-    "bbob_f023_i78_d10",
-    "bbob_f004_i71_d05",
-    "bbob_f020_i72_d20",
-    "bbob_f018_i77_d10",
-    "bbob_f010_i02_d20",
-    "bbob_f009_i04_d05",
-    "bbob_f023_i73_d02",
-    "bbob_f019_i03_d10",
-    "bbob_f011_i04_d40",
-    "bbob_f014_i71_d05",
-    "bbob_f010_i02_d05",
-    "bbob_f022_i05_d20",
-    "bbob_f005_i03_d05",
-    "bbob_f002_i79_d03",
-    "bbob_f024_i80_d03",
-    "bbob_f001_i73_d02",
-    "bbob_f024_i74_d20",
-    "bbob_f013_i74_d10",
-    "bbob_f005_i74_d03",
-    "bbob_f003_i71_d02",
-    "bbob_f018_i80_d02",
-    "bbob_f008_i75_d02",
-    "bbob_f015_i74_d02",
-    "bbob_f020_i73_d05",
-    "bbob_f007_i73_d05",
-    "bbob_f014_i04_d40",
-    "bbob_f009_i77_d02",
-    "bbob_f004_i76_d40",
-    "bbob_f019_i80_d02",
-    "bbob_f013_i76_d40",
-    "bbob_f018_i71_d20",
-    "bbob_f009_i71_d02",
-    "bbob_f011_i75_d40",
-    "bbob_f012_i05_d10",
-    "bbob_f012_i01_d10",
-    "bbob_f019_i73_d40",
-    "bbob_f005_i71_d40",
-    "bbob_f021_i77_d02",
-    "bbob_f014_i02_d05",
-    "bbob_f001_i04_d10",
-    "bbob_f023_i80_d02",
-    "bbob_f009_i73_d10",
-    "bbob_f015_i80_d40",
-    "bbob_f024_i72_d20",
-    "bbob_f012_i75_d20",
-    "bbob_f009_i77_d10",
-    "bbob_f008_i03_d02",
-    "bbob_f011_i75_d20",
-    "bbob_f016_i01_d10",
-    "bbob_f012_i72_d05",
-    "bbob_f004_i02_d03",
-    "bbob_f017_i77_d40",
-    "bbob_f012_i79_d02",
-    "bbob_f022_i75_d03",
-    "bbob_f007_i03_d05",
-    "bbob_f017_i76_d10",
-    "bbob_f010_i75_d40",
-    "bbob_f010_i72_d20",
-    "bbob_f024_i77_d03",
-    "bbob_f011_i73_d20",
-    "bbob_f007_i80_d40",
-    "bbob_f004_i78_d40",
-    "bbob_f019_i79_d05",
-    "bbob_f014_i05_d05",
-    "bbob_f020_i76_d20",
-    "bbob_f024_i79_d03",
-    "bbob_f019_i77_d05",
-    "bbob_f016_i05_d02",
-    "bbob_f024_i79_d05",
-    "bbob_f001_i71_d02",
-    "bbob_f015_i05_d20",
-    "bbob_f002_i77_d05",
-    "bbob_f001_i75_d03",
-    "bbob_f022_i78_d03",
-    "bbob_f011_i80_d20",
-    "bbob_f013_i72_d05",
-    "bbob_f002_i01_d10",
-    "bbob_f013_i02_d03",
-    "bbob_f008_i78_d05",
-    "bbob_f019_i04_d20",
-    "bbob_f011_i03_d03",
-    "bbob_f014_i05_d40",
-    "bbob_f019_i79_d40",
-    "bbob_f022_i76_d10",
-    "bbob_f022_i74_d03",
-    "bbob_f023_i78_d40",
-    "bbob_f023_i76_d03",
-    "bbob_f007_i80_d05",
-    "bbob_f002_i02_d20",
-    "bbob_f013_i74_d20",
-    "bbob_f021_i03_d10",
-    "bbob_f013_i80_d05",
-    "bbob_f007_i73_d02",
-    "bbob_f010_i77_d05",
-    "bbob_f022_i79_d40",
-    "bbob_f004_i80_d02",
-    "bbob_f014_i03_d20",
-    "bbob_f015_i02_d02",
-    "bbob_f011_i01_d03",
-    "bbob_f006_i75_d40",
-    "bbob_f024_i71_d03",
-    "bbob_f021_i73_d02",
-    "bbob_f012_i05_d20",
-    "bbob_f008_i73_d20",
-    "bbob_f024_i04_d40",
-    "bbob_f017_i76_d02",
-    "bbob_f021_i80_d03",
-    "bbob_f002_i78_d02",
-    "bbob_f023_i72_d10",
-    "bbob_f019_i75_d03",
-    "bbob_f005_i71_d02",
-    "bbob_f001_i80_d05",
-    "bbob_f006_i04_d05",
-    "bbob_f001_i03_d10",
-    "bbob_f023_i71_d02",
-    "bbob_f005_i75_d03",
-    "bbob_f004_i78_d03",
-    "bbob_f010_i71_d05",
-    "bbob_f021_i71_d20",
-    "bbob_f011_i78_d20",
-    "bbob_f008_i77_d10",
-    "bbob_f004_i72_d20",
-    "bbob_f018_i78_d05",
-    "bbob_f003_i73_d05",
-    "bbob_f010_i73_d40",
-    "bbob_f003_i75_d03",
-    "bbob_f020_i04_d05",
-    "bbob_f011_i76_d20",
-    "bbob_f004_i80_d05",
-    "bbob_f015_i71_d02",
-    "bbob_f012_i74_d40",
-    "bbob_f010_i04_d40",
-    "bbob_f016_i04_d05",
-    "bbob_f020_i79_d05",
-    "bbob_f009_i72_d10",
-    "bbob_f015_i74_d40",
-    "bbob_f019_i80_d10",
-    "bbob_f007_i74_d10",
-    "bbob_f009_i02_d03",
-    "bbob_f007_i04_d05",
-    "bbob_f017_i72_d10",
-    "bbob_f024_i78_d10",
-    "bbob_f015_i71_d20",
-    "bbob_f001_i01_d40",
-    "bbob_f008_i02_d03",
-    "bbob_f005_i76_d05",
-    "bbob_f015_i03_d20",
-    "bbob_f008_i80_d05",
-    "bbob_f017_i02_d03",
-    "bbob_f020_i72_d40",
-    "bbob_f021_i02_d02",
-    "bbob_f008_i05_d05",
-    "bbob_f001_i74_d02",
-    "bbob_f022_i76_d40",
-    "bbob_f012_i02_d02",
-    "bbob_f006_i74_d05",
-    "bbob_f021_i73_d20",
-    "bbob_f017_i78_d02",
-    "bbob_f016_i02_d03",
-    "bbob_f005_i74_d10",
-    "bbob_f010_i03_d20",
-    "bbob_f008_i80_d20",
-    "bbob_f005_i04_d05",
-    "bbob_f002_i78_d20",
-    "bbob_f018_i73_d02",
-    "bbob_f013_i05_d02",
-    "bbob_f010_i74_d02",
-    "bbob_f009_i03_d20",
-    "bbob_f021_i04_d10",
-    "bbob_f015_i05_d03",
-    "bbob_f021_i04_d40",
-    "bbob_f003_i04_d20",
-    "bbob_f021_i78_d20",
-    "bbob_f008_i02_d05",
-    "bbob_f015_i80_d10",
-    "bbob_f021_i76_d03",
-    "bbob_f013_i02_d10",
-    "bbob_f008_i03_d03",
-    "bbob_f017_i05_d20",
-    "bbob_f020_i03_d02",
-    "bbob_f004_i02_d02",
-    "bbob_f017_i80_d05",
-    "bbob_f013_i73_d05",
-    "bbob_f022_i04_d20",
-    "bbob_f020_i78_d10",
-    "bbob_f008_i74_d10",
-    "bbob_f020_i05_d10",
-    "bbob_f003_i71_d20",
-    "bbob_f007_i05_d02",
-    "bbob_f019_i72_d20",
-    "bbob_f013_i79_d03",
-    "bbob_f014_i72_d03",
-    "bbob_f015_i03_d05",
-    "bbob_f005_i71_d03",
-    "bbob_f001_i04_d02",
-    "bbob_f010_i80_d03",
-    "bbob_f023_i78_d05",
-    "bbob_f005_i72_d20",
-    "bbob_f020_i76_d10",
-    "bbob_f009_i80_d20",
-    "bbob_f016_i04_d10",
-    "bbob_f006_i74_d10",
-    "bbob_f017_i75_d40",
-    "bbob_f021_i71_d02",
-    "bbob_f002_i72_d40",
-    "bbob_f014_i05_d03",
-    "bbob_f008_i78_d03",
-    "bbob_f007_i02_d10",
-    "bbob_f022_i75_d05",
-    "bbob_f016_i77_d40",
-    "bbob_f002_i75_d10",
-    "bbob_f020_i02_d05",
-    "bbob_f019_i01_d20",
-    "bbob_f017_i73_d03",
-    "bbob_f018_i05_d40",
-    "bbob_f015_i77_d05",
-    "bbob_f017_i72_d20",
-    "bbob_f010_i01_d40",
-    "bbob_f001_i80_d02",
-    "bbob_f006_i77_d03",
-    "bbob_f009_i74_d10",
-    "bbob_f012_i79_d20",
-    "bbob_f017_i73_d20",
-    "bbob_f006_i75_d05",
-    "bbob_f013_i03_d20",
-    "bbob_f008_i79_d02",
-    "bbob_f010_i73_d20",
-    "bbob_f003_i02_d05",
-    "bbob_f023_i05_d02",
-    "bbob_f011_i04_d03",
-    "bbob_f006_i74_d40",
-    "bbob_f011_i79_d02",
-    "bbob_f011_i05_d10",
-    "bbob_f011_i03_d20",
-    "bbob_f012_i02_d05",
-    "bbob_f018_i71_d05",
-    "bbob_f020_i78_d40",
-    "bbob_f012_i75_d05",
-    "bbob_f009_i80_d40",
-    "bbob_f004_i71_d40",
-    "bbob_f004_i04_d10",
-    "bbob_f004_i74_d03",
-    "bbob_f016_i72_d40",
-    "bbob_f024_i78_d03",
-    "bbob_f003_i79_d10",
-    "bbob_f023_i75_d05",
-    "bbob_f015_i77_d40",
-    "bbob_f010_i75_d02",
-    "bbob_f024_i02_d10",
-    "bbob_f008_i01_d05",
-    "bbob_f017_i79_d40",
-    "bbob_f003_i74_d02",
-    "bbob_f005_i72_d05",
-    "bbob_f008_i05_d10",
-    "bbob_f019_i78_d03",
-    "bbob_f019_i01_d03",
-    "bbob_f011_i75_d10",
-    "bbob_f004_i79_d03",
-    "bbob_f017_i80_d10",
-    "bbob_f008_i77_d03",
-    "bbob_f017_i71_d20",
-    "bbob_f019_i05_d40",
-    "bbob_f004_i73_d10",
-    "bbob_f006_i73_d40",
-    "bbob_f016_i05_d10",
-    "bbob_f002_i05_d20",
-    "bbob_f009_i05_d20",
-    "bbob_f019_i80_d05",
-    "bbob_f020_i79_d40",
-    "bbob_f019_i05_d03",
-    "bbob_f011_i01_d20",
-    "bbob_f009_i71_d03",
-    "bbob_f021_i72_d10",
-    "bbob_f006_i80_d10",
-    "bbob_f001_i04_d05",
-    "bbob_f022_i71_d05",
-    "bbob_f008_i73_d40",
-    "bbob_f006_i72_d05",
-    "bbob_f019_i02_d20",
-    "bbob_f011_i02_d40",
-    "bbob_f001_i73_d03",
-    "bbob_f015_i76_d05",
-    "bbob_f014_i04_d05",
-    "bbob_f009_i73_d40",
-    "bbob_f019_i76_d02",
-    "bbob_f024_i80_d10",
-    "bbob_f003_i75_d02",
-    "bbob_f016_i79_d20",
-    "bbob_f010_i78_d20",
-    "bbob_f018_i05_d02",
-    "bbob_f015_i80_d03",
-    "bbob_f016_i05_d40",
-    "bbob_f021_i04_d05",
-    "bbob_f019_i02_d40",
-    "bbob_f022_i72_d40",
-    "bbob_f015_i73_d10",
-    "bbob_f011_i77_d40",
-    "bbob_f023_i80_d40",
-    "bbob_f012_i75_d40",
-    "bbob_f012_i78_d03",
-    "bbob_f010_i76_d02",
-    "bbob_f021_i75_d40",
-    "bbob_f007_i74_d40",
-    "bbob_f012_i75_d03",
-    "bbob_f018_i02_d03",
-    "bbob_f009_i03_d02",
-    "bbob_f002_i79_d10",
-    "bbob_f022_i03_d20",
-    "bbob_f018_i73_d05",
-    "bbob_f004_i76_d03",
-    "bbob_f016_i04_d20",
-    "bbob_f020_i76_d02",
-    "bbob_f002_i02_d03",
-    "bbob_f006_i72_d10",
-    "bbob_f015_i71_d40",
-    "bbob_f019_i71_d02",
-    "bbob_f019_i04_d40",
-    "bbob_f021_i01_d10",
-    "bbob_f019_i76_d20",
-    "bbob_f023_i75_d10",
-    "bbob_f016_i72_d03",
-    "bbob_f016_i78_d03",
-    "bbob_f022_i01_d40",
-    "bbob_f017_i75_d10",
-    "bbob_f016_i01_d05",
-    "bbob_f023_i04_d10",
-    "bbob_f004_i79_d20",
-    "bbob_f002_i72_d02",
-    "bbob_f019_i05_d02",
-    "bbob_f023_i80_d10",
-    "bbob_f017_i03_d05",
-    "bbob_f019_i80_d40",
-    "bbob_f002_i05_d02",
-    "bbob_f002_i73_d03",
-    "bbob_f015_i04_d02",
-    "bbob_f013_i05_d20",
-    "bbob_f022_i02_d40",
-    "bbob_f017_i02_d05",
-    "bbob_f018_i71_d03",
-    "bbob_f022_i01_d03",
-    "bbob_f014_i03_d05",
-    "bbob_f001_i03_d20",
-    "bbob_f023_i73_d10",
-    "bbob_f013_i76_d05",
-    "bbob_f006_i03_d05",
-    "bbob_f006_i76_d20",
-    "bbob_f003_i01_d20",
-    "bbob_f022_i74_d05",
-    "bbob_f003_i01_d10",
-    "bbob_f015_i72_d40",
-    "bbob_f015_i74_d10",
-    "bbob_f008_i02_d02",
-    "bbob_f004_i74_d20",
-    "bbob_f007_i77_d40",
-    "bbob_f017_i75_d20",
-    "bbob_f015_i78_d40",
-    "bbob_f021_i74_d03",
-    "bbob_f023_i72_d03",
-    "bbob_f016_i03_d10",
-    "bbob_f002_i71_d05",
-    "bbob_f023_i76_d40",
-    "bbob_f012_i73_d10",
-    "bbob_f012_i80_d40",
-    "bbob_f008_i04_d40",
-    "bbob_f006_i75_d20",
-    "bbob_f001_i79_d05",
-    "bbob_f004_i04_d20",
-    "bbob_f002_i05_d03",
-    "bbob_f019_i75_d02",
-    "bbob_f019_i72_d02",
-    "bbob_f002_i03_d02",
-    "bbob_f024_i01_d20",
-    "bbob_f020_i75_d40",
-    "bbob_f019_i74_d10",
-    "bbob_f001_i72_d05",
-    "bbob_f022_i77_d20",
-    "bbob_f011_i72_d02",
-    "bbob_f003_i03_d10",
-    "bbob_f004_i75_d40",
-    "bbob_f010_i77_d10",
-    "bbob_f014_i80_d40",
-    "bbob_f023_i01_d10",
-    "bbob_f009_i71_d40",
-    "bbob_f011_i79_d10",
-    "bbob_f011_i77_d03",
-    "bbob_f018_i77_d05",
-    "bbob_f004_i72_d05",
-    "bbob_f020_i79_d02",
-    "bbob_f001_i77_d03",
-    "bbob_f015_i73_d20",
-    "bbob_f020_i04_d10",
-    "bbob_f001_i02_d02",
-    "bbob_f020_i80_d03",
-    "bbob_f011_i79_d40",
-    "bbob_f001_i05_d02",
-    "bbob_f016_i72_d10",
-    "bbob_f008_i77_d40",
-    "bbob_f020_i73_d02",
-    "bbob_f024_i74_d03",
-    "bbob_f007_i76_d20",
-    "bbob_f020_i71_d03",
-    "bbob_f008_i79_d05",
-    "bbob_f002_i05_d05",
-    "bbob_f002_i74_d03",
-    "bbob_f013_i76_d03",
-    "bbob_f019_i71_d03",
-    "bbob_f010_i04_d05",
-    "bbob_f011_i79_d03",
-    "bbob_f023_i05_d03",
-    "bbob_f001_i72_d03",
-    "bbob_f010_i76_d03",
-    "bbob_f009_i80_d02",
-    "bbob_f013_i71_d40",
-    "bbob_f009_i77_d05",
-    "bbob_f007_i71_d03",
-    "bbob_f024_i77_d05",
-    "bbob_f007_i05_d40",
-    "bbob_f003_i77_d05",
-    "bbob_f018_i76_d03",
-    "bbob_f010_i04_d20",
-    "bbob_f007_i05_d03",
-    "bbob_f017_i01_d40",
-    "bbob_f015_i73_d05",
-    "bbob_f016_i01_d03",
-    "bbob_f022_i03_d05",
-    "bbob_f014_i02_d03",
-    "bbob_f001_i02_d40",
-    "bbob_f004_i74_d40",
-    "bbob_f003_i74_d10",
-    "bbob_f002_i04_d10",
-    "bbob_f001_i73_d20",
-    "bbob_f017_i73_d10",
-    "bbob_f008_i74_d05",
-    "bbob_f004_i05_d10",
-    "bbob_f012_i77_d10",
-    "bbob_f010_i74_d03",
-    "bbob_f022_i01_d02",
-    "bbob_f009_i76_d02",
-    "bbob_f009_i80_d05",
-    "bbob_f009_i04_d20",
-    "bbob_f007_i05_d10",
-    "bbob_f007_i76_d40",
-    "bbob_f022_i73_d02",
-    "bbob_f020_i03_d05",
-    "bbob_f012_i72_d03",
-    "bbob_f007_i80_d02",
-    "bbob_f021_i02_d40",
-    "bbob_f023_i71_d03",
-    "bbob_f011_i77_d20",
-    "bbob_f011_i02_d02",
-    "bbob_f002_i73_d20",
-    "bbob_f011_i71_d10",
-    "bbob_f009_i02_d05",
-    "bbob_f014_i72_d05",
-    "bbob_f017_i72_d40",
-    "bbob_f021_i03_d20",
-    "bbob_f022_i77_d10",
-    "bbob_f011_i04_d10",
-    "bbob_f010_i79_d20",
-    "bbob_f007_i74_d20",
-    "bbob_f019_i01_d05",
-    "bbob_f003_i05_d03",
-    "bbob_f021_i05_d10",
-    "bbob_f017_i73_d05",
-    "bbob_f005_i04_d03",
-    "bbob_f004_i73_d20",
-    "bbob_f016_i05_d05",
-    "bbob_f005_i72_d03",
-    "bbob_f002_i74_d20",
-    "bbob_f014_i75_d02",
-    "bbob_f004_i76_d10",
-    "bbob_f019_i01_d10",
-    "bbob_f024_i05_d03",
-    "bbob_f014_i76_d03",
-    "bbob_f022_i75_d10",
-    "bbob_f009_i79_d05",
-    "bbob_f011_i71_d03",
-    "bbob_f014_i04_d02",
-    "bbob_f003_i03_d40",
-    "bbob_f008_i75_d03",
-    "bbob_f004_i05_d05",
-    "bbob_f022_i02_d10",
-    "bbob_f014_i04_d10",
-    "bbob_f003_i03_d03",
-    "bbob_f013_i71_d10",
-    "bbob_f004_i78_d02",
-    "bbob_f002_i74_d05",
-    "bbob_f012_i72_d02",
-    "bbob_f012_i76_d10",
-    "bbob_f010_i77_d40",
-    "bbob_f015_i05_d10",
-    "bbob_f007_i03_d20",
-    "bbob_f009_i72_d20",
-    "bbob_f019_i78_d20",
-    "bbob_f010_i72_d02",
-    "bbob_f005_i80_d05",
-    "bbob_f022_i04_d02",
-    "bbob_f008_i04_d03",
-    "bbob_f004_i75_d05",
-    "bbob_f013_i75_d05",
-    "bbob_f009_i78_d03",
-    "bbob_f022_i74_d10",
-    "bbob_f024_i05_d10",
-    "bbob_f009_i78_d40",
-    "bbob_f012_i77_d03",
-    "bbob_f002_i79_d20",
-    "bbob_f001_i01_d20",
-    "bbob_f016_i80_d02",
-    "bbob_f017_i74_d05",
-    "bbob_f024_i02_d20",
-    "bbob_f010_i80_d20",
-    "bbob_f016_i71_d05",
-    "bbob_f019_i05_d10",
-    "bbob_f022_i71_d40",
-    "bbob_f015_i75_d05",
-    "bbob_f002_i78_d40",
-    "bbob_f004_i71_d10",
-    "bbob_f012_i80_d10",
-    "bbob_f017_i75_d03",
-    "bbob_f020_i04_d02",
-    "bbob_f023_i02_d40",
-    "bbob_f018_i02_d02",
-    "bbob_f020_i79_d03",
-    "bbob_f002_i80_d10",
-    "bbob_f020_i73_d20",
-    "bbob_f013_i02_d20",
-    "bbob_f017_i74_d10",
-    "bbob_f018_i04_d03",
-    "bbob_f013_i80_d20",
-    "bbob_f006_i01_d20",
-    "bbob_f020_i75_d05",
-    "bbob_f007_i79_d10",
-    "bbob_f015_i76_d10",
-    "bbob_f017_i77_d10",
-    "bbob_f022_i77_d05",
-    "bbob_f003_i79_d02",
-    "bbob_f015_i04_d03",
-    "bbob_f018_i75_d05",
-    "bbob_f002_i78_d05",
-    "bbob_f024_i76_d20",
-    "bbob_f008_i78_d40",
-    "bbob_f008_i05_d40",
-    "bbob_f008_i71_d03",
-    "bbob_f022_i03_d40",
-    "bbob_f001_i05_d40",
-    "bbob_f005_i79_d10",
-    "bbob_f007_i04_d10",
-    "bbob_f009_i74_d05",
-    "bbob_f003_i77_d40",
-    "bbob_f020_i77_d20",
-    "bbob_f021_i78_d40",
-    "bbob_f023_i72_d02",
-    "bbob_f002_i03_d20",
-    "bbob_f006_i03_d10",
-    "bbob_f003_i72_d20",
-    "bbob_f014_i03_d03",
-    "bbob_f002_i77_d20",
-    "bbob_f012_i05_d03",
-    "bbob_f004_i76_d05",
-    "bbob_f001_i78_d10",
-    "bbob_f012_i04_d02",
-    "bbob_f010_i01_d20",
-    "bbob_f016_i02_d02",
-    "bbob_f018_i75_d02",
-    "bbob_f007_i75_d05",
-    "bbob_f017_i02_d20",
-    "bbob_f005_i05_d03",
-    "bbob_f021_i79_d10",
-    "bbob_f005_i71_d05",
-    "bbob_f007_i71_d20",
-    "bbob_f020_i72_d05",
-    "bbob_f014_i03_d40",
-    "bbob_f005_i03_d20",
-    "bbob_f022_i73_d20",
-    "bbob_f012_i79_d03",
-    "bbob_f020_i75_d03",
-    "bbob_f003_i72_d40",
-    "bbob_f016_i04_d02",
-    "bbob_f002_i72_d20",
-    "bbob_f007_i71_d02",
-    "bbob_f009_i74_d02",
-    "bbob_f011_i76_d40",
-    "bbob_f020_i01_d02",
-    "bbob_f006_i71_d05",
-    "bbob_f014_i76_d10",
-    "bbob_f014_i80_d10",
-    "bbob_f017_i74_d40",
-    "bbob_f012_i04_d05",
-    "bbob_f023_i75_d03",
-    "bbob_f002_i80_d05",
-    "bbob_f009_i05_d10",
-    "bbob_f005_i76_d02",
-    "bbob_f017_i72_d03",
-    "bbob_f014_i04_d03",
-    "bbob_f011_i80_d02",
-    "bbob_f022_i71_d03",
-    "bbob_f009_i75_d20",
-    "bbob_f020_i74_d40",
-    "bbob_f015_i79_d40",
-    "bbob_f014_i75_d20",
-    "bbob_f024_i75_d03",
-    "bbob_f021_i02_d03",
-    "bbob_f010_i75_d10",
-    "bbob_f013_i78_d03",
-    "bbob_f003_i01_d05",
-    "bbob_f009_i78_d10",
-    "bbob_f010_i79_d02",
-    "bbob_f013_i73_d20",
-    "bbob_f011_i78_d05",
-    "bbob_f015_i80_d02",
-    "bbob_f006_i03_d03",
-    "bbob_f012_i71_d02",
-    "bbob_f008_i76_d03",
-    "bbob_f014_i03_d10",
-    "bbob_f014_i79_d10",
-    "bbob_f022_i80_d05",
-    "bbob_f015_i05_d05",
-    "bbob_f013_i02_d40",
-    "bbob_f003_i72_d10",
-    "bbob_f021_i80_d05",
-    "bbob_f023_i74_d10",
-    "bbob_f009_i74_d20",
-    "bbob_f015_i72_d20",
-    "bbob_f018_i73_d20",
-    "bbob_f010_i72_d10",
-    "bbob_f004_i02_d05",
-    "bbob_f009_i03_d40",
-    "bbob_f016_i73_d03",
-    "bbob_f011_i72_d10",
-    "bbob_f012_i80_d05",
-    "bbob_f017_i80_d20",
-    "bbob_f005_i71_d20",
-    "bbob_f007_i80_d03",
-    "bbob_f002_i03_d40",
-    "bbob_f018_i79_d40",
-    "bbob_f013_i80_d02",
-    "bbob_f013_i74_d05",
-    "bbob_f021_i01_d20",
-    "bbob_f006_i73_d20",
-    "bbob_f020_i77_d02",
-    "bbob_f014_i72_d10",
-    "bbob_f022_i01_d05",
-    "bbob_f007_i79_d40",
-    "bbob_f006_i79_d40",
-    "bbob_f015_i77_d02",
-    "bbob_f010_i05_d40",
-    "bbob_f001_i72_d20",
-    "bbob_f002_i73_d10",
-    "bbob_f009_i76_d03",
-    "bbob_f019_i79_d02",
-    "bbob_f002_i76_d20",
-    "bbob_f008_i04_d05",
-    "bbob_f010_i78_d05",
-    "bbob_f013_i71_d20",
-    "bbob_f014_i76_d05",
-    "bbob_f009_i79_d40",
-    "bbob_f001_i79_d03",
-    "bbob_f021_i71_d05",
-    "bbob_f002_i74_d02",
-    "bbob_f024_i73_d05",
-    "bbob_f016_i71_d20",
-    "bbob_f002_i76_d02",
-    "bbob_f023_i05_d20",
-    "bbob_f020_i01_d10",
-    "bbob_f023_i03_d05",
-    "bbob_f016_i74_d10",
-    "bbob_f008_i79_d10",
-    "bbob_f018_i04_d05",
-    "bbob_f002_i72_d05",
-    "bbob_f018_i04_d10",
-    "bbob_f022_i76_d02",
-    "bbob_f011_i77_d02",
-    "bbob_f004_i75_d03",
-    "bbob_f021_i71_d10",
-    "bbob_f007_i72_d10",
-    "bbob_f016_i01_d20",
-    "bbob_f007_i01_d40",
-    "bbob_f018_i78_d10",
-    "bbob_f012_i04_d20",
-    "bbob_f024_i71_d40",
-    "bbob_f003_i78_d02",
-    "bbob_f023_i78_d20",
-    "bbob_f019_i03_d40",
-    "bbob_f009_i01_d40",
-    "bbob_f021_i72_d02",
-    "bbob_f020_i74_d05",
-    "bbob_f014_i01_d40",
-    "bbob_f007_i72_d03",
-    "bbob_f006_i72_d03",
-    "bbob_f018_i71_d02",
-    "bbob_f018_i74_d40",
-    "bbob_f019_i73_d20",
-    "bbob_f007_i03_d03",
-    "bbob_f006_i01_d03",
-    "bbob_f004_i75_d02",
-    "bbob_f009_i75_d05",
-    "bbob_f004_i75_d10",
-    "bbob_f016_i04_d40",
-    "bbob_f024_i02_d40",
-    "bbob_f004_i77_d03",
-    "bbob_f021_i76_d05",
-    "bbob_f004_i79_d02",
-    "bbob_f017_i02_d02",
-    "bbob_f009_i05_d05",
-    "bbob_f007_i73_d03",
-    "bbob_f009_i71_d20",
-    "bbob_f020_i74_d10",
-    "bbob_f010_i71_d20",
-    "bbob_f008_i04_d20",
-    "bbob_f007_i75_d20",
-    "bbob_f017_i79_d03",
-    "bbob_f003_i01_d40",
-    "bbob_f024_i03_d40",
-    "bbob_f023_i76_d20",
-    "bbob_f006_i04_d40",
-    "bbob_f009_i74_d40",
-    "bbob_f015_i72_d02",
-    "bbob_f003_i78_d05",
-    "bbob_f012_i79_d40",
-    "bbob_f007_i79_d05",
-    "bbob_f006_i74_d03",
-    "bbob_f006_i01_d10",
-    "bbob_f016_i74_d02",
-    "bbob_f011_i78_d03",
-    "bbob_f012_i77_d02",
-    "bbob_f015_i74_d20",
-    "bbob_f024_i76_d05",
-    "bbob_f022_i75_d02",
-    "bbob_f001_i01_d05",
-    "bbob_f011_i03_d10",
-    "bbob_f011_i05_d40",
-    "bbob_f006_i76_d05",
-    "bbob_f003_i74_d05",
-    "bbob_f017_i78_d10",
-    "bbob_f019_i03_d20",
-    "bbob_f019_i76_d05",
-    "bbob_f012_i77_d20",
-    "bbob_f019_i79_d03",
-    "bbob_f013_i71_d02",
-    "bbob_f002_i02_d02",
-    "bbob_f003_i71_d40",
-    "bbob_f017_i01_d03",
-    "bbob_f015_i79_d02",
-    "bbob_f010_i05_d20",
-    "bbob_f004_i04_d03",
-    "bbob_f024_i05_d40",
-    "bbob_f017_i03_d20",
-    "bbob_f010_i80_d02",
-    "bbob_f006_i05_d05",
-    "bbob_f007_i78_d40",
-    "bbob_f009_i75_d02",
-    "bbob_f018_i72_d20",
-    "bbob_f010_i76_d20",
-    "bbob_f004_i80_d40",
-    "bbob_f012_i04_d03",
-    "bbob_f011_i01_d05",
-    "bbob_f003_i71_d10",
-    "bbob_f006_i03_d02",
-    "bbob_f023_i73_d40",
-    "bbob_f016_i71_d03",
-    "bbob_f016_i77_d02",
-    "bbob_f011_i71_d02",
-    "bbob_f014_i04_d20",
-    "bbob_f016_i77_d03",
-    "bbob_f012_i76_d03",
-    "bbob_f021_i02_d20",
-    "bbob_f017_i05_d02",
-    "bbob_f004_i03_d40",
-    "bbob_f006_i03_d20",
-    "bbob_f023_i78_d03",
-    "bbob_f012_i03_d03",
-    "bbob_f012_i01_d40",
-    "bbob_f022_i03_d02",
-    "bbob_f022_i02_d05",
-    "bbob_f013_i79_d40",
-    "bbob_f005_i73_d05",
-    "bbob_f014_i05_d20",
-    "bbob_f010_i03_d02",
-    "bbob_f022_i02_d20",
-    "bbob_f013_i04_d20",
-    "bbob_f014_i73_d02",
-    "bbob_f008_i76_d02",
-    "bbob_f018_i73_d10",
-    "bbob_f021_i76_d40",
-    "bbob_f005_i03_d10",
-    "bbob_f016_i73_d10",
-    "bbob_f020_i76_d40",
-    "bbob_f008_i78_d02",
-    "bbob_f011_i73_d40",
-    "bbob_f007_i76_d10",
-    "bbob_f011_i73_d10",
-    "bbob_f019_i78_d10",
-    "bbob_f024_i05_d20",
-    "bbob_f001_i03_d03",
-    "bbob_f010_i04_d10",
-    "bbob_f002_i80_d03",
-    "bbob_f002_i73_d05",
-    "bbob_f007_i02_d05",
-    "bbob_f004_i74_d10",
-    "bbob_f003_i76_d03",
-    "bbob_f003_i79_d03",
-    "bbob_f024_i04_d02",
-    "bbob_f023_i75_d40",
-    "bbob_f008_i01_d20",
-    "bbob_f008_i79_d03",
-    "bbob_f005_i02_d03",
-    "bbob_f002_i79_d05",
-    "bbob_f016_i03_d03",
-    "bbob_f020_i73_d40",
-    "bbob_f004_i78_d20",
-    "bbob_f015_i75_d20",
-    "bbob_f023_i78_d02",
-    "bbob_f018_i77_d40",
-    "bbob_f017_i71_d10",
-    "bbob_f021_i80_d02",
-    "bbob_f003_i77_d20",
-    "bbob_f004_i76_d20",
-    "bbob_f019_i74_d05",
-    "bbob_f023_i71_d40",
-    "bbob_f021_i73_d10",
-    "bbob_f018_i72_d40",
-    "bbob_f003_i04_d10",
-    "bbob_f014_i76_d40",
-    "bbob_f015_i03_d10",
-    "bbob_f007_i75_d03",
-    "bbob_f024_i79_d10",
-    "bbob_f006_i76_d10",
-    "bbob_f016_i02_d40",
-    "bbob_f003_i01_d02",
-    "bbob_f002_i80_d40",
-    "bbob_f024_i78_d02",
-    "bbob_f002_i02_d40",
-    "bbob_f018_i01_d40",
-    "bbob_f008_i72_d40",
-    "bbob_f004_i05_d40",
-    "bbob_f012_i78_d20",
-    "bbob_f004_i77_d05",
-    "bbob_f015_i76_d02",
-    "bbob_f023_i79_d02",
-    "bbob_f008_i73_d10",
-    "bbob_f005_i02_d20",
-    "bbob_f020_i80_d05",
-    "bbob_f024_i01_d03",
-    "bbob_f015_i78_d05",
-    "bbob_f010_i74_d10",
-    "bbob_f010_i78_d10",
-    "bbob_f001_i72_d40",
-    "bbob_f020_i05_d40",
-    "bbob_f004_i77_d40",
-    "bbob_f018_i76_d40",
-    "bbob_f006_i73_d02",
-    "bbob_f009_i01_d10",
-    "bbob_f023_i04_d03",
-    "bbob_f008_i73_d05",
-    "bbob_f022_i80_d02",
-    "bbob_f018_i72_d03",
-    "bbob_f019_i01_d02",
-    "bbob_f020_i78_d05",
-    "bbob_f007_i80_d20",
-    "bbob_f016_i02_d10",
-    "bbob_f019_i79_d20",
-    "bbob_f018_i73_d40",
-    "bbob_f015_i79_d20",
-    "bbob_f017_i77_d03",
-    "bbob_f007_i77_d02",
-    "bbob_f003_i03_d05",
-    "bbob_f001_i01_d02",
-    "bbob_f016_i73_d05",
-    "bbob_f006_i79_d02",
-    "bbob_f015_i01_d03",
-    "bbob_f018_i77_d20",
-    "bbob_f007_i05_d05",
-    "bbob_f014_i05_d02",
-    "bbob_f023_i01_d40",
-    "bbob_f022_i04_d03",
-    "bbob_f018_i80_d20",
-    "bbob_f013_i01_d40",
-    "bbob_f020_i71_d20",
-    "bbob_f011_i05_d20",
-    "bbob_f020_i79_d20",
-    "bbob_f005_i05_d10",
-    "bbob_f006_i05_d40",
-    "bbob_f014_i75_d03",
-    "bbob_f012_i75_d02",
-    "bbob_f003_i77_d03",
-    "bbob_f001_i02_d10",
-    "bbob_f008_i77_d05",
-    "bbob_f020_i74_d02",
-    "bbob_f002_i78_d10",
-    "bbob_f014_i71_d20",
-    "bbob_f020_i77_d05",
-    "bbob_f013_i72_d20",
-    "bbob_f001_i74_d10",
-    "bbob_f001_i76_d03",
-    "bbob_f008_i05_d20",
-    "bbob_f011_i02_d05",
-    "bbob_f023_i74_d40",
-    "bbob_f024_i73_d03",
-    "bbob_f012_i73_d40",
-    "bbob_f014_i75_d05",
-    "bbob_f008_i80_d02",
-    "bbob_f004_i72_d03",
-    "bbob_f003_i76_d02",
-    "bbob_f023_i77_d10",
-    "bbob_f023_i03_d40",
-    "bbob_f013_i05_d03",
-    "bbob_f015_i77_d10",
-    "bbob_f022_i78_d20",
-    "bbob_f005_i74_d20",
-    "bbob_f024_i02_d05",
-    "bbob_f015_i75_d02",
-    "bbob_f009_i74_d03",
-    "bbob_f010_i78_d02",
-    "bbob_f008_i76_d40",
-    "bbob_f012_i01_d05",
-    "bbob_f005_i79_d03",
-    "bbob_f008_i76_d05",
-    "bbob_f008_i77_d02",
-    "bbob_f007_i78_d20",
-    "bbob_f003_i04_d02",
-    "bbob_f015_i72_d10",
-    "bbob_f004_i75_d20",
-    "bbob_f003_i77_d10",
-    "bbob_f016_i80_d05",
-    "bbob_f024_i78_d20",
-    "bbob_f011_i79_d20",
-    "bbob_f013_i72_d02",
-    "bbob_f023_i72_d20",
-    "bbob_f021_i74_d20",
-    "bbob_f003_i74_d40",
-    "bbob_f019_i02_d10",
-    "bbob_f007_i77_d20",
-    "bbob_f009_i01_d02",
-    "bbob_f024_i04_d03",
-    "bbob_f008_i03_d05",
-    "bbob_f020_i02_d20",
-    "bbob_f013_i05_d40",
-    "bbob_f010_i79_d10",
-    "bbob_f022_i01_d20",
-    "bbob_f014_i75_d10",
-    "bbob_f006_i05_d20",
-    "bbob_f005_i78_d03",
-    "bbob_f020_i05_d20",
-    "bbob_f018_i80_d03",
-    "bbob_f019_i72_d40",
-    "bbob_f017_i04_d20",
-    "bbob_f009_i76_d40",
-    "bbob_f006_i80_d05",
-    "bbob_f008_i74_d02",
-    "bbob_f022_i78_d02",
-    "bbob_f015_i77_d03",
-    "bbob_f003_i72_d03",
-    "bbob_f016_i05_d20",
-    "bbob_f005_i77_d05",
-    "bbob_f024_i74_d05",
-    "bbob_f022_i02_d02",
-    "bbob_f011_i73_d03",
-    "bbob_f006_i80_d20",
-    "bbob_f016_i78_d10",
-    "bbob_f005_i77_d02",
-    "bbob_f024_i77_d40",
-    "bbob_f013_i78_d10",
-    "bbob_f005_i01_d40",
-    "bbob_f011_i80_d03",
-    "bbob_f020_i78_d20",
-    "bbob_f008_i76_d10",
-    "bbob_f009_i03_d05",
-    "bbob_f021_i79_d20",
-    "bbob_f016_i74_d40",
-    "bbob_f003_i80_d40",
-    "bbob_f011_i76_d05",
-    "bbob_f021_i01_d05",
-    "bbob_f018_i05_d03",
-    "bbob_f008_i80_d10",
-    "bbob_f004_i01_d20",
-    "bbob_f023_i73_d03",
-    "bbob_f007_i72_d40",
-    "bbob_f022_i72_d10",
-    "bbob_f024_i80_d05",
-    "bbob_f015_i79_d10",
-    "bbob_f017_i05_d10",
-    "bbob_f017_i76_d05",
-    "bbob_f023_i79_d03",
-    "bbob_f003_i75_d05",
-    "bbob_f009_i77_d03",
-    "bbob_f023_i76_d05",
-    "bbob_f014_i72_d02",
-    "bbob_f006_i01_d05",
-    "bbob_f003_i73_d40",
-    "bbob_f001_i79_d10",
-    "bbob_f007_i71_d40",
-    "bbob_f008_i76_d20",
-    "bbob_f021_i78_d05",
-    "bbob_f008_i05_d03",
-    "bbob_f004_i02_d20",
-    "bbob_f017_i78_d40",
-    "bbob_f002_i74_d10",
-    "bbob_f013_i77_d05",
-    "bbob_f016_i80_d03",
-    "bbob_f012_i03_d02",
-    "bbob_f024_i72_d03",
-    "bbob_f007_i79_d02",
-    "bbob_f018_i71_d10",
-    "bbob_f016_i75_d03",
-    "bbob_f009_i77_d20",
-    "bbob_f020_i77_d10",
-    "bbob_f017_i77_d20",
-    "bbob_f002_i01_d20",
-    "bbob_f017_i76_d40",
-    "bbob_f010_i75_d20",
-    "bbob_f018_i03_d20",
-    "bbob_f006_i76_d03",
-    "bbob_f005_i73_d20",
-    "bbob_f012_i04_d40",
-    "bbob_f014_i01_d03",
-    "bbob_f012_i74_d20",
-    "bbob_f012_i78_d05",
-    "bbob_f003_i03_d02",
-    "bbob_f002_i77_d02",
-    "bbob_f002_i01_d03",
-    "bbob_f014_i02_d02",
-    "bbob_f001_i71_d10",
-    "bbob_f010_i73_d05",
-    "bbob_f023_i05_d05",
-    "bbob_f010_i73_d03",
-    "bbob_f011_i78_d10",
-    "bbob_f015_i02_d40",
-    "bbob_f014_i77_d10",
-    "bbob_f011_i80_d10",
-    "bbob_f020_i03_d40",
-    "bbob_f003_i76_d10",
-    "bbob_f011_i02_d10",
-    "bbob_f003_i73_d10",
-    "bbob_f012_i76_d05",
-    "bbob_f008_i73_d02",
-    "bbob_f002_i80_d02",
-    "bbob_f011_i01_d02",
-    "bbob_f014_i72_d40",
-    "bbob_f018_i73_d03",
-    "bbob_f004_i04_d02",
-    "bbob_f022_i79_d03",
-    "bbob_f007_i03_d10",
-    "bbob_f013_i80_d10",
-    "bbob_f010_i01_d02",
-    "bbob_f008_i71_d02",
-    "bbob_f003_i75_d20",
-    "bbob_f024_i78_d05",
-    "bbob_f006_i02_d40",
-    "bbob_f004_i74_d02",
-    "bbob_f011_i79_d05",
-    "bbob_f019_i01_d40",
-    "bbob_f017_i76_d03",
-    "bbob_f003_i73_d20",
-    "bbob_f017_i02_d40",
-    "bbob_f010_i80_d05",
-    "bbob_f014_i79_d40",
-    "bbob_f017_i73_d40",
-    "bbob_f020_i72_d10",
-    "bbob_f008_i72_d20",
-    "bbob_f009_i73_d20",
-    "bbob_f005_i72_d10",
-    "bbob_f018_i03_d02",
-    "bbob_f001_i73_d05",
-    "bbob_f011_i80_d40",
-    "bbob_f001_i76_d02",
-    "bbob_f006_i03_d40",
-    "bbob_f007_i75_d02",
-    "bbob_f005_i76_d10",
-    "bbob_f002_i72_d10",
-    "bbob_f001_i75_d10",
-    "bbob_f009_i78_d02",
-    "bbob_f013_i71_d03",
-    "bbob_f023_i73_d20",
-    "bbob_f012_i71_d40",
-    "bbob_f004_i79_d10",
-    "bbob_f022_i02_d03",
-    "bbob_f004_i71_d03",
-    "bbob_f010_i02_d03",
-    "bbob_f018_i72_d10",
-    "bbob_f018_i02_d40",
-    "bbob_f016_i72_d02",
-    "bbob_f010_i76_d10",
-    "bbob_f001_i01_d03",
-    "bbob_f023_i02_d10",
-    "bbob_f012_i74_d02",
-    "bbob_f019_i74_d02",
-    "bbob_f005_i74_d40",
-    "bbob_f007_i02_d03",
-    "bbob_f007_i76_d05",
-    "bbob_f001_i79_d40",
-    "bbob_f018_i72_d05",
-    "bbob_f022_i79_d02",
-    "bbob_f014_i76_d02",
-    "bbob_f017_i74_d02",
-    "bbob_f010_i79_d05",
-    "bbob_f022_i80_d10",
-    "bbob_f005_i77_d03",
-    "bbob_f006_i77_d10",
-    "bbob_f023_i01_d03",
-    "bbob_f009_i79_d02",
-    "bbob_f014_i77_d20",
-    "bbob_f006_i75_d10",
-    "bbob_f003_i75_d10",
-    "bbob_f021_i05_d20",
-    "bbob_f018_i78_d02",
-    "bbob_f019_i72_d10",
-    "bbob_f009_i72_d02",
-    "bbob_f015_i01_d02",
-    "bbob_f015_i75_d40",
-    "bbob_f024_i02_d03",
-    "bbob_f020_i02_d03",
-    "bbob_f002_i03_d10",
-    "bbob_f021_i74_d02",
-    "bbob_f015_i80_d05",
-    "bbob_f014_i77_d02",
-    "bbob_f020_i04_d20",
-    "bbob_f024_i03_d10",
-    "bbob_f002_i04_d02",
-    "bbob_f018_i76_d10",
-    "bbob_f005_i78_d40",
-    "bbob_f011_i73_d05",
-    "bbob_f023_i79_d05",
-    "bbob_f002_i05_d40",
-    "bbob_f023_i77_d05",
-    "bbob_f004_i03_d03",
-    "bbob_f004_i71_d02",
-    "bbob_f001_i71_d03",
-    "bbob_f021_i77_d10",
-    "bbob_f016_i80_d20",
-    "bbob_f010_i73_d02",
-    "bbob_f011_i01_d10",
-    "bbob_f009_i04_d02",
-    "bbob_f024_i78_d40",
-    "bbob_f011_i77_d05",
-    "bbob_f011_i01_d40",
-    "bbob_f009_i02_d20",
-    "bbob_f007_i75_d40",
-    "bbob_f006_i04_d02",
-    "bbob_f019_i05_d20",
-    "bbob_f004_i72_d40",
-    "bbob_f021_i05_d40",
-    "bbob_f024_i01_d40",
-    "bbob_f005_i75_d05",
-    "bbob_f003_i76_d05",
-    "bbob_f002_i76_d05",
-    "bbob_f008_i01_d02",
-    "bbob_f006_i71_d10",
-    "bbob_f018_i77_d02",
-    "bbob_f016_i01_d02",
-    "bbob_f010_i74_d40",
-    "bbob_f004_i73_d03",
-    "bbob_f014_i73_d05",
-    "bbob_f006_i79_d20",
-    "bbob_f006_i05_d02",
-    "bbob_f006_i01_d02",
-    "bbob_f016_i79_d05",
-    "bbob_f012_i72_d10",
-    "bbob_f015_i76_d20",
-    "bbob_f019_i04_d05",
-    "bbob_f001_i04_d03",
-    "bbob_f005_i77_d40",
-    "bbob_f022_i75_d40",
-    "bbob_f002_i01_d02",
-    "bbob_f019_i78_d05",
-    "bbob_f024_i80_d40",
-    "bbob_f008_i74_d03",
-    "bbob_f020_i01_d05",
-    "bbob_f013_i04_d05",
-    "bbob_f007_i02_d20",
-    "bbob_f010_i71_d03",
-    "bbob_f006_i78_d03",
-    "bbob_f021_i05_d05",
-    "bbob_f020_i72_d02",
-    "bbob_f003_i02_d40",
-    "bbob_f014_i79_d05",
-    "bbob_f017_i71_d03",
-    "bbob_f012_i76_d02",
-    "bbob_f011_i04_d05",
-    "bbob_f005_i79_d40",
-    "bbob_f023_i01_d02",
-    "bbob_f018_i71_d40",
-    "bbob_f018_i05_d05",
-    "bbob_f023_i02_d02",
-    "bbob_f021_i74_d05",
-    "bbob_f011_i74_d20",
-    "bbob_f006_i72_d02",
-    "bbob_f021_i03_d02",
-    "bbob_f014_i02_d40",
-    "bbob_f024_i77_d10",
-    "bbob_f023_i76_d02",
-    "bbob_f019_i78_d02",
-    "bbob_f016_i76_d40",
-    "bbob_f020_i77_d03",
-    "bbob_f008_i05_d02",
-    "bbob_f022_i80_d03",
-    "bbob_f004_i72_d10",
-    "bbob_f023_i03_d20",
-    "bbob_f005_i72_d02",
-    "bbob_f004_i03_d20",
-    "bbob_f004_i03_d02",
-    "bbob_f004_i78_d10",
-    "bbob_f020_i02_d10",
-    "bbob_f005_i73_d03",
-    "bbob_f007_i74_d02",
-    "bbob_f019_i04_d02",
-    "bbob_f002_i03_d03",
-    "bbob_f017_i03_d03",
-    "bbob_f016_i77_d05",
-    "bbob_f011_i76_d10",
-    "bbob_f008_i71_d05",
-    "bbob_f004_i74_d05",
-    "bbob_f010_i71_d40",
-    "bbob_f020_i05_d03",
-    "bbob_f001_i73_d10",
-    "bbob_f022_i80_d40",
-    "bbob_f020_i01_d03",
-    "bbob_f014_i71_d02",
-    "bbob_f006_i04_d10",
-    "bbob_f015_i05_d40",
-    "bbob_f022_i77_d02",
-    "bbob_f020_i03_d10",
-    "bbob_f010_i78_d40",
-    "bbob_f013_i72_d10",
-    "bbob_f015_i04_d05",
-    "bbob_f002_i71_d10",
-    "bbob_f007_i01_d20",
-    "bbob_f021_i01_d40",
-    "bbob_f023_i71_d10",
-    "bbob_f007_i72_d02",
-    "bbob_f016_i78_d20",
-    "bbob_f013_i79_d02",
-    "bbob_f024_i02_d02",
-    "bbob_f003_i02_d02",
-    "bbob_f022_i73_d03",
-    "bbob_f023_i73_d05",
-    "bbob_f010_i01_d10",
-    "bbob_f021_i78_d03",
-    "bbob_f002_i79_d02",
-    "bbob_f006_i79_d10",
-    "bbob_f014_i74_d20",
-    "bbob_f022_i73_d05",
-    "bbob_f022_i72_d02",
-    "bbob_f020_i01_d20",
-    "bbob_f010_i72_d40",
-    "bbob_f023_i71_d05",
-    "bbob_f001_i75_d20",
-    "bbob_f014_i77_d03",
-    "bbob_f013_i77_d40",
-    "bbob_f016_i71_d10",
-    "bbob_f018_i75_d40",
-    "bbob_f010_i74_d05",
-    "bbob_f004_i80_d03",
-    "bbob_f002_i73_d40",
-    "bbob_f007_i77_d03",
-    "bbob_f015_i72_d03",
-    "bbob_f013_i73_d03",
-    "bbob_f019_i72_d03",
-    "bbob_f001_i75_d05",
-    "bbob_f001_i78_d05",
-    "bbob_f024_i75_d05",
-    "bbob_f016_i78_d05",
-    "bbob_f023_i74_d03",
-    "bbob_f024_i80_d02",
-    "bbob_f011_i74_d40",
-    "bbob_f005_i76_d40",
-    "bbob_f012_i80_d03",
-    "bbob_f021_i02_d10",
-    "bbob_f013_i72_d03",
-    "bbob_f014_i76_d20",
-    "bbob_f015_i74_d03",
-    "bbob_f001_i05_d03",
-    "bbob_f010_i05_d02",
-    "bbob_f018_i02_d20",
-    "bbob_f009_i76_d10",
-    "bbob_f014_i78_d20",
-    "bbob_f005_i78_d05",
-    "bbob_f003_i78_d20",
-    "bbob_f019_i71_d10",
-    "bbob_f021_i76_d20",
-    "bbob_f005_i01_d05",
-    "bbob_f006_i72_d40",
-    "bbob_f022_i71_d10",
-    "bbob_f016_i76_d20",
-    "bbob_f002_i02_d05",
-    "bbob_f010_i02_d10",
-    "bbob_f006_i77_d05",
-    "bbob_f018_i79_d05",
-    "bbob_f015_i04_d10",
-    "bbob_f017_i01_d20",
-    "bbob_f014_i05_d10",
-    "bbob_f021_i77_d05",
-    "bbob_f002_i76_d03",
-    "bbob_f015_i01_d40",
-    "bbob_f001_i71_d20",
-    "bbob_f003_i71_d03",
-    "bbob_f018_i76_d02",
-    "bbob_f009_i71_d05",
-    "bbob_f001_i79_d02",
-    "bbob_f017_i79_d05",
-    "bbob_f011_i05_d02",
-    "bbob_f007_i74_d03",
-    "bbob_f002_i01_d05",
-    "bbob_f010_i03_d03",
-    "bbob_f010_i80_d10",
-    "bbob_f013_i77_d10",
-    "bbob_f002_i76_d10",
-    "bbob_f012_i02_d03",
-    "bbob_f006_i76_d02",
-    "bbob_f017_i78_d03",
-    "bbob_f014_i03_d02",
-    "bbob_f005_i75_d20",
-    "bbob_f023_i71_d20",
-    "bbob_f011_i76_d03",
-    "bbob_f005_i03_d02",
-    "bbob_f007_i74_d05",
-    "bbob_f005_i75_d10",
-    "bbob_f009_i79_d10",
-    "bbob_f011_i74_d03",
-    "bbob_f019_i77_d20",
-    "bbob_f012_i03_d05",
-    "bbob_f011_i72_d20",
-    "bbob_f008_i02_d40",
-    "bbob_f010_i72_d05",
-    "bbob_f016_i77_d20",
-    "bbob_f023_i74_d05",
-    "bbob_f022_i03_d10",
-    "bbob_f018_i74_d02",
-    "bbob_f001_i78_d40",
-    "bbob_f010_i74_d20",
-    "bbob_f012_i74_d05",
-    "bbob_f005_i79_d05",
-    "bbob_f016_i76_d05",
-    "bbob_f014_i74_d05",
-    "bbob_f005_i74_d05",
-    "bbob_f020_i75_d20",
-    "bbob_f018_i03_d10",
-    "bbob_f011_i03_d05",
-    "bbob_f002_i71_d20",
-    "bbob_f009_i04_d10",
-    "bbob_f017_i04_d03",
-    "bbob_f010_i05_d03",
-    "bbob_f024_i75_d10",
-    "bbob_f005_i78_d20",
-    "bbob_f020_i75_d02",
-    "bbob_f024_i75_d40",
-    "bbob_f011_i78_d02",
-    "bbob_f008_i78_d10",
-    "bbob_f001_i80_d40",
-    "bbob_f013_i74_d03",
-    "bbob_f009_i76_d05",
-    "bbob_f023_i79_d10",
-    "bbob_f001_i02_d05",
-    "bbob_f003_i01_d03",
-    "bbob_f001_i73_d40",
-    "bbob_f010_i04_d02",
-    "bbob_f015_i04_d20",
-    "bbob_f001_i74_d40",
-    "bbob_f017_i05_d40",
-    "bbob_f010_i76_d40",
-    "bbob_f018_i04_d02",
-    "bbob_f011_i75_d05",
-    "bbob_f018_i74_d20",
-    "bbob_f024_i76_d10",
-    "bbob_f024_i73_d10",
-    "bbob_f015_i77_d20",
-    "bbob_f009_i72_d05",
-    "bbob_f024_i73_d40",
-    "bbob_f019_i73_d02",
-    "bbob_f016_i03_d05",
-    "bbob_f017_i01_d05",
-    "bbob_f014_i02_d20",
-    "bbob_f008_i72_d10",
-    "bbob_f004_i02_d10",
-    "bbob_f018_i75_d20",
-    "bbob_f001_i02_d20",
-    "bbob_f017_i04_d40",
-    "bbob_f022_i79_d20",
-    "bbob_f007_i04_d20",
-    "bbob_f004_i80_d20",
-    "bbob_f016_i75_d05",
-    "bbob_f012_i78_d10",
-    "bbob_f006_i78_d02",
-    "bbob_f022_i74_d02",
-    "bbob_f005_i77_d10",
-    "bbob_f017_i80_d02",
-    "bbob_f017_i01_d10",
-    "bbob_f018_i76_d20",
-    "bbob_f011_i72_d03",
-    "bbob_f023_i01_d05",
-    "bbob_f018_i02_d05",
-    "bbob_f005_i79_d20",
-    "bbob_f001_i76_d10",
-    "bbob_f015_i78_d20"
-  ]
-}
\ No newline at end of file
diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index a5adf16..e8e17cf 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -21,14 +21,15 @@ def __init__(self, problem, options):
 
         self.alg_names = [alg.__name__ for alg in self.actions]
         self.n_algorithms = len(self.actions)
-        self.dim = self.ndim_problem
 
         self.network = RLDASNetwork(
-            d_dim=self.dim, num_algorithms=self.n_algorithms
+            num_algorithms=self.n_algorithms, d_dim=self.ndim_problem
         ).to(DEVICE)
 
         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=3e-5)
-        self.ah_vectors = np.zeros((self.n_algorithms, 2, self.dim))
+
+        self._load_parameters(options)
+        self.ah_vectors = np.zeros((self.n_algorithms, 2, self.ndim_problem))
         self.alg_usage_counts = np.zeros(self.n_algorithms)
         self.context_memory = {name: {} for name in self.alg_names}
         self.context_memory["Common"] = {}
@@ -41,7 +42,7 @@ def __init__(self, problem, options):
         expected_trajectory_length = int(
             np.ceil(self.max_function_evaluations / self.schedule_interval)
         )
-        buffer_capacity = expected_trajectory_length + 10  # Safety margin
+        buffer_capacity = expected_trajectory_length * 10  # Safety margin
         self.buffer = RolloutBuffer(capacity=buffer_capacity, device=DEVICE)
 
     def _load_parameters(self, options):
@@ -129,7 +130,8 @@ def _select_action(self, state):
             dist = torch.distributions.Categorical(probs)
             action = dist.sample()
             log_prob = dist.log_prob(action)
-            probs = probs.detach().cpu().numpy()
+            probs = probs.detach().cpu().numpy()[0]
+
             if self.run is not None:
                 entropy = -np.sum(probs * np.log(probs + 1e-12)) / np.log(len(probs))
                 self.run.log({"normalized entropy": entropy})
@@ -194,20 +196,24 @@ def optimize(self, fitness_function=None, args=None):
 
             x_best_old = population_x[np.argmin(population_y)].copy()
             x_worst_old = population_x[np.argmax(population_y)].copy()
-            cost_old = np.min(population_y)
+            cost_old = np.copy(np.min(population_y))
 
             target_fes = min(
                 self.n_function_evaluations + self.schedule_interval,
                 self.max_function_evaluations,
             )
-            sub_opt.max_function_evaluations = target_fes
-
-            sub_opt.population = population_x
-            sub_opt.fitness = population_y
+            sub_opt.target_FE = target_fes
+            sub_opt.set_data(
+                x=population_x,
+                y=population_y,
+                best_x=self.best_so_far_x,
+                best_y=self.best_so_far_y,
+            )
 
             res = sub_opt.optimize()
-            population_x = sub_opt.population
-            population_y = sub_opt.fitness
+
+            population_x = res["x"]
+            population_y = res["y"]
 
             self.n_function_evaluations = sub_opt.n_function_evaluations
 
@@ -339,7 +345,7 @@ def _update_on_minibatch(
         surr2 = torch.clamp(ratio, 1.0 - clip_eps, 1.0 + clip_eps) * mb_advantages
         actor_loss = -torch.min(surr1, surr2).mean()
 
-        loss = actor_loss + value_coef * value_loss - entropy_coef * entropy
+        loss = actor_loss + value_coef * value_loss  # - entropy_coef * entropy
 
         self.optimizer.zero_grad()
         loss.backward()
@@ -410,8 +416,8 @@ def _compute_advantages(self, buffer):
 
         advantages = []
         last_gae_lam = 0
-        gamma = 0.99
-        lam = 0.95
+        gamma = 0.90
+        lam = 0.5
 
         for step in reversed(range(len(rewards))):
             next_non_terminal = 1.0 - dones[step]
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index 70d9b52..8757f8f 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -12,7 +12,7 @@
     calculate_ela_distribution,  # Information Content
 )
 from scipy.spatial.distance import pdist
-from scipy.stats import spearmanr
+from scipy.stats import spearmanr  # type: ignore
 
 from dynamicalgorithmselection.NeurELA.NeurELA import feature_embedder
 from dynamicalgorithmselection.agents.agent_utils import MAX_DIM, RunningMeanStd
@@ -162,7 +162,7 @@ def get_weighted_central_moment(self, n: int):
         numerator = min((weights * norms_squared**exponent).sum(), 1e8)
         inertia_denom_w = np.linalg.norm(weights)
         inertia_denom_n = np.linalg.norm(norms_squared**exponent)
-        return numerator / max(1e-5, inertia_denom_w * max(1e-5, inertia_denom_n))
+        return numerator / max(1e-5, inertia_denom_w * inertia_denom_n)
 
     def normalized_distance(self, x0: np.ndarray, x1: np.ndarray) -> float:
         return min(np.linalg.norm(x0 - x1) / self.max_distance, 1.0)
diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py
index 6b9c1c8..96d2513 100644
--- a/dynamicalgorithmselection/agents/agent_utils.py
+++ b/dynamicalgorithmselection/agents/agent_utils.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 
 MAX_DIM = 40
@@ -7,7 +9,7 @@ def get_runtime_stats(
     fitness_history: list[tuple[int, float]],
     function_evaluations: int,
     checkpoints: np.ndarray,
-) -> dict[str, float | list[float]]:
+) -> dict[str, float | list[Optional[float]]]:
     """
     :param fitness_history: list of tuples [fe, fitness] with only points where best so far fitness improved
     :param function_evaluations: max number of function evaluations during run.
@@ -21,7 +23,10 @@ def get_runtime_stats(
     checkpoints_fitness = []
     for i, fitness in fitness_history:
         area_under_optimization_curve += fitness * (i - last_i)
-        while last_i <= checkpoints[checkpoint_idx] < i:
+        while (
+            checkpoint_idx < len(checkpoints)
+            and last_i <= checkpoints[checkpoint_idx] < i
+        ):
             checkpoints_fitness.append(last_fitness)
             checkpoint_idx += 1
         last_i = i
@@ -72,17 +77,17 @@ def get_extreme_stats(
         key=lambda x: (x[0], -x[2])
     )  # sort fe - increasing and by fitness - decreasing
 
-    current_fitness = {
+    current_fitnesses = {
         alg: float("inf") for alg in fitness_histories
     }  # current best fitness for each algorithm
     current_worst_fitness = float("inf")  # worst performance so far for each algorithm
 
     worst_history = []
     for fe, algorithm, fitness in all_improvements:
-        if fitness < current_fitness[algorithm]:
-            current_fitness[algorithm] = fitness
+        if fitness < current_fitnesses[algorithm]:
+            current_fitnesses[algorithm] = fitness
             new_worst_fitness = max(
-                i for i in current_fitness.values() if i != float("inf")
+                i for i in current_fitnesses.values() if i != float("inf")
             )
             if new_worst_fitness < current_worst_fitness:
                 worst_history.append((fe, fitness))
diff --git a/dynamicalgorithmselection/agents/policy_gradient_agent.py b/dynamicalgorithmselection/agents/policy_gradient_agent.py
index 6461341..98ce440 100644
--- a/dynamicalgorithmselection/agents/policy_gradient_agent.py
+++ b/dynamicalgorithmselection/agents/policy_gradient_agent.py
@@ -17,8 +17,14 @@
 class PolicyGradientAgent(Agent):
     def __init__(self, problem, options):
         Agent.__init__(self, problem, options)
+        buffer_len = int(
+            options.get("n_problems")
+            * self.n_checkpoints
+            * 0.17
+            * options.get("n_epochs")
+        )
         self.buffer = options.get("buffer") or RolloutBuffer(
-            capacity=options.get("ppo_batch_size", 2_500), device=DEVICE
+            capacity=buffer_len, device=DEVICE
         )
         self.actor = Actor(n_actions=len(self.actions), input_size=self.state_dim).to(
             DEVICE
@@ -34,7 +40,6 @@ def __init__(self, problem, options):
         self.mean_rewards = options.get("mean_rewards", [])
         self.best_50_mean = float("inf")
 
-        self.tau = self.options.get("critic_target_tau", 0.05)
         self.target_kl = 0.03
 
         # Initialize history dict
@@ -59,7 +64,7 @@ def _update_learning_rate(self, mean_kl):
         elif mean_kl < self.target_kl / 1.5:
             current_lr *= 1.5
 
-        current_lr = np.clip(current_lr, 3e-6, 1e-4)
+        current_lr = np.clip(current_lr, 3e-6, 3e-4)
 
         for param_group in self.actor_optimizer.param_groups:
             param_group["lr"] = current_lr
@@ -245,13 +250,14 @@ def _select_action(self, state, full_buffer):
     def _execute_action(self, action_idx, iteration_result):
         """Instantiates and runs the selected optimizer."""
         action_options = {k: v for k, v in self.options.items()}
-        action_options["max_function_evaluations"] = min(
-            self.checkpoints[self._n_generations],
-            self.max_function_evaluations,
-        )
+        action_options["max_function_evaluations"] = self.max_function_evaluations
         action_options["verbose"] = False
 
         optimizer = self.actions[action_idx](self.problem, action_options)
+        optimizer.target_FE = min(
+            self.checkpoints[self._n_generations],
+            self.max_function_evaluations,
+        )
         optimizer.n_function_evaluations = self.n_function_evaluations
         optimizer._n_generations = 0
 
@@ -342,7 +348,10 @@ def optimize(self, fitness_function=None, args=None):
 
             # Store in Buffer
             self.n_function_evaluations = optimizer.n_function_evaluations
-            is_done = self.n_function_evaluations >= self.max_function_evaluations
+            is_done = (
+                self.n_function_evaluations >= self.max_function_evaluations
+                or idx == self.n_checkpoints - 1
+            )
             self.buffer.add(
                 state.squeeze(0).to(DEVICE),
                 action,
diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py
index 0dd0441..237f636 100644
--- a/dynamicalgorithmselection/experiments/core.py
+++ b/dynamicalgorithmselection/experiments/core.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Type, Any
 
 import cocoex
 import numpy as np
@@ -46,8 +46,11 @@ def run_training(
     problems_suite: cocoex.Suite,
     problem_ids: list[str],
 ):
-    agent_state = {}
-    for problem_id in tqdm(np.random.permutation(problem_ids), smoothing=0.0):
+    agent_state: dict[str, Any] = {}
+    n_epochs = options["n_epochs"]
+    for problem_id in tqdm(
+        np.random.permutation(problem_ids).tolist() * n_epochs, smoothing=0.0
+    ):
         problem_instance = problems_suite.get_problem(problem_id)
         max_fe = evaluations_multiplier * problem_instance.dimension
         options["max_function_evaluations"] = max_fe
diff --git a/dynamicalgorithmselection/experiments/cross_validation.py b/dynamicalgorithmselection/experiments/cross_validation.py
index 584c543..2233575 100644
--- a/dynamicalgorithmselection/experiments/cross_validation.py
+++ b/dynamicalgorithmselection/experiments/cross_validation.py
@@ -1,6 +1,6 @@
 import os
 from itertools import product
-from typing import Type
+from typing import Type, Optional
 
 import cocoex
 import numpy as np
@@ -24,7 +24,7 @@ def run_cross_validation(
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, cv_folds = _get_cv_folds(4, is_loio)
+    problems_suite, cv_folds = _get_cv_folds(4, is_loio, options.get("dimensionality"))
     observer = cocoex.Observer("bbob", "result_folder: " + options.get("name"))
     for i, (train_set, test_set) in enumerate(cv_folds):
         print(f"Running cross validation training, fold {i + 1}")
@@ -53,10 +53,11 @@ def run_cross_validation(
     return observer.result_folder
 
 
-def _get_cv_folds(n: int, is_loio: bool):
+def _get_cv_folds(n: int, is_loio: bool, dim: Optional[int]):
     """
     :param n:  number of cross validation folds
     :param is_loio: boolean to indicate how train and test sets should be split (leave-instance-out/leave-problem-out).
+    :param dim: dimensionality of the problems. None indicates all of them.
     :return suite, list of (train set, test set) pairs:
     """
     np.random.seed(1234)
@@ -64,7 +65,9 @@ def _get_cv_folds(n: int, is_loio: bool):
     problems_suite = cocoex.Suite("bbob", "", "")
     all_problem_ids = [
         f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, DIMENSIONS)
+        for i_id, f_id, dim in product(
+            INSTANCE_IDS, ALL_FUNCTIONS, (DIMENSIONS if dim is None else [dim])
+        )
     ]
     remaining_problem_ids = set(all_problem_ids)
     remaining_function_ids = {i for i in ALL_FUNCTIONS}
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index 650df59..d50f3ea 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -14,7 +14,7 @@
 )
 
 import cocoex
-from tqdm import tqdm
+from tqdm import tqdm  # type: ignore
 
 from dynamicalgorithmselection.agents.agent_utils import (
     get_extreme_stats,
@@ -109,7 +109,8 @@ def _coco_bbob_policy_gradient_train(
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, problem_ids = get_suite(mode, True)
+    problems_suite, problem_ids = get_suite(mode, True, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
     run_training(
         optimizer, options, evaluations_multiplier, problems_suite, problem_ids
     )
@@ -125,8 +126,9 @@ def _coco_bbob_test(
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, problem_ids = get_suite(mode, False)
-    observer = cocoex.Observer("bbob", "result_folder: " + options.get("name"))
+    problems_suite, problem_ids = get_suite(mode, False, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
+    observer = cocoex.Observer("bbob", "result_folder: " + options["name"])
     run_testing(
         optimizer,
         options,
@@ -143,7 +145,10 @@ def _coco_bbob_test_all(optimizer, options, evaluations_multiplier, mode):
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, problem_ids = get_suite("baselines", False)
+    problems_suite, problem_ids = get_suite(
+        "baselines", False, options.get("dimensionality")
+    )
+    options["n_problems"] = len(problem_ids)
     observer = cocoex.Observer("bbob", "result_folder: " + options.get("name"))
     run_testing(
         optimizer,
@@ -176,7 +181,9 @@ def run_comparison(
         observers[optimizer_name] = observer
         results_folders.append("exdata/" + optimizer_name)  # Adjust path if needed
 
-        suites[optimizer_name] = get_suite("all", False)[0]
+        suites[optimizer_name] = get_suite("all", False, options.get("dimensionality"))[
+            0
+        ]
 
     # Create directories for best/worst JSON stats
     portfolio_name = "_".join(i.__name__ for i in optimizer_portfolio)
@@ -186,7 +193,8 @@ def run_comparison(
     cocoex.utilities.MiniPrint()
 
     # We use the problem_ids from the first suite to iterate
-    _, problem_ids = get_suite("all", False)
+    _, problem_ids = get_suite("all", False, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
 
     for problem_id in tqdm(problem_ids, desc="Evaluating Problems", smoothing=0.0):
         stats = {}
diff --git a/dynamicalgorithmselection/experiments/neuroevolution.py b/dynamicalgorithmselection/experiments/neuroevolution.py
index 04830b7..33cc59c 100644
--- a/dynamicalgorithmselection/experiments/neuroevolution.py
+++ b/dynamicalgorithmselection/experiments/neuroevolution.py
@@ -174,7 +174,8 @@ def _coco_bbob_neuroevolution_train(
     mode: str = "easy",
 ):
     cocoex.utilities.MiniPrint()
-    _, problem_ids = get_suite(mode, True)
+    _, problem_ids = get_suite(mode, True, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
     batch_size = 30
     input_dim = None
     if options.get("state_representation") == "ELA":
@@ -184,9 +185,12 @@ def _coco_bbob_neuroevolution_train(
     elif options.get("state_representation") == "custom":
         input_dim = BASE_STATE_SIZE + 2 * len(options.get("action_space")) + 2
 
+    action_space = options.get("action_space")
+    if not action_space:
+        raise Exception("No action space")
     adjust_config(
         input_dim,
-        len(options.get("action_space")),
+        len(action_space),
     )
 
     config = neat.Config(
diff --git a/dynamicalgorithmselection/experiments/utils.py b/dynamicalgorithmselection/experiments/utils.py
index 977065d..033a522 100644
--- a/dynamicalgorithmselection/experiments/utils.py
+++ b/dynamicalgorithmselection/experiments/utils.py
@@ -1,9 +1,9 @@
 import json
 import os
 from itertools import islice, product
-from typing import Type
+from typing import Type, Optional
 
-import cocoex
+import cocoex  # type: ignore
 import numpy as np
 
 from dynamicalgorithmselection.agents.agent_utils import (
@@ -48,17 +48,20 @@ def coco_bbob_single_function(
     return results
 
 
-def get_suite(mode, train):
+def get_suite(mode: str, train: bool, dim: Optional[int]):
     """
     :param mode:  mode of the training (LOPO: easy and hard) or LOIO
     :param train: if suite should be for testing or training:
+    :param dim: dimensionality of suite's problem. None indicates all of them
     :return suite and list of problem ids:
     """
     cocoex.utilities.MiniPrint()
     problems_suite = cocoex.Suite("bbob", "", "")
     all_problem_ids = [
         f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, DIMENSIONS)
+        for i_id, f_id, dim in product(
+            INSTANCE_IDS, ALL_FUNCTIONS, (DIMENSIONS if dim is None else [dim])
+        )
     ]
     if mode in ["easy", "hard"]:
         easy = mode == "easy"
@@ -70,17 +73,20 @@ def get_suite(mode, train):
 
         problem_ids = [
             f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-            for i_id, f_id, dim in product(INSTANCE_IDS, function_ids, DIMENSIONS)
+            for i_id, f_id, dim in product(
+                INSTANCE_IDS, function_ids, (DIMENSIONS if dim is None else [dim])
+            )
         ]
 
     elif mode == "LOIO":
-        with open("LOIO_train_set.json") as f:
-            problem_ids = json.load(f)["data"]
         np.random.seed(1234)
+        train_problem_ids = np.random.choice(
+            all_problem_ids, size=2 * len(all_problem_ids) // 3, replace=False
+        )
         if train:
-            pass
+            problem_ids = train_problem_ids
         else:
-            problem_ids = list(set(all_problem_ids).difference(problem_ids))
+            problem_ids = list(set(all_problem_ids).difference(train_problem_ids))
     elif mode == "CV":
         raise ValueError("CV mode is not suitable for get_suite function")
     else:
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index 1f80d71..e070b3b 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -3,8 +3,8 @@
 import pickle
 import shutil
 from typing import List, Type, Optional
-import cocopp
-import neat
+import cocopp  # type: ignore
+import neat  # type: ignore
 import torch
 import wandb
 
@@ -14,6 +14,7 @@
 from dynamicalgorithmselection.agents.random_agent import RandomAgent
 from dynamicalgorithmselection.experiments.experiment import coco_bbob_experiment
 from dynamicalgorithmselection import optimizers
+from dynamicalgorithmselection.experiments.utils import DIMENSIONS
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
 AGENTS_DICT = {
@@ -42,7 +43,7 @@ def parse_arguments():
     parser.add_argument(
         "-m",
         "--population_size",
-        type=Optional[int],
+        type=int,
         default=None,
         help="Population size (default: 20)",
     )
@@ -136,6 +137,24 @@ def parse_arguments():
         default=False,
         help="Enable selection of forcibly restarting optimizers",
     )
+
+    parser.add_argument(
+        "-D",
+        "--dimensionality",
+        type=int,
+        choices=DIMENSIONS,
+        default=None,
+        help="dimensionality of problems",
+    )
+
+    parser.add_argument(
+        "-E",
+        "--n_epochs",
+        type=int,
+        default=1,
+        help="number of training epochs",
+    )
+
     return parser.parse_args()
 
 
@@ -157,6 +176,8 @@ def print_info(args):
     print("Exponential checkpoint division base: ", args.cdb)
     print("State representation variant: ", args.state_representation)
     print("Forcing restarts: ", args.force_restarts)
+    print("Dimensionality of problems: ", args.dimensionality)
+    print("Number of training epochs: ", args.n_epochs)
 
 
 def test(args, action_space):
@@ -170,6 +191,8 @@ def test(args, action_space):
         "cdb": args.cdb,
         "state_representation": args.state_representation,
         "force_restarts": args.force_restarts,
+        "dimensionality": args.dimensionality,
+        "n_epochs": args.n_epochs,
     }
     # agent_state = torch.load(f)
     if args.agent == "neuroevolution":
@@ -224,6 +247,8 @@ def run_training(args, action_space):
             "cdb": args.cdb,
             "state_representation": args.state_representation,
             "force_restarts": args.force_restarts,
+            "dimensionality": args.dimensionality,
+            "n_epochs": args.n_epochs,
         },
         name=f"DAS_train_{args.name}",
         evaluations_multiplier=args.fe_multiplier,
@@ -248,6 +273,8 @@ def run_CV(args, action_space):
             "cdb": args.cdb,
             "state_representation": args.state_representation,
             "force_restarts": args.force_restarts,
+            "dimensionality": args.dimensionality,
+            "n_epochs": args.n_epochs,
         },
         name=f"DAS_CV_{args.name}",
         evaluations_multiplier=args.fe_multiplier,
@@ -265,8 +292,6 @@ def run_baselines(args, action_space):
 
         print(f"--- Running Baseline: {optimizer.__name__} ---")
 
-        # 2. Run experiment for ONLY this optimizer
-        # NOTICE: We pass `[optimizer]` instead of `action_space` here.
         coco_bbob_experiment(
             None,
             {
@@ -277,6 +302,8 @@ def run_baselines(args, action_space):
                 "cdb": args.cdb,
                 "state_representation": args.state_representation,
                 "force_restarts": args.force_restarts,
+                "dimensionality": args.dimensionality,
+                "n_epochs": args.n_epochs,
             },
             name=optimizer.__name__,
             evaluations_multiplier=args.fe_multiplier,
diff --git a/dynamicalgorithmselection/optimizers/DE/DE.py b/dynamicalgorithmselection/optimizers/DE/DE.py
index fae8ce9..fdbb78b 100644
--- a/dynamicalgorithmselection/optimizers/DE/DE.py
+++ b/dynamicalgorithmselection/optimizers/DE/DE.py
@@ -57,4 +57,5 @@ def _collect(self, fitness=None, y=None):
         self._print_verbose_info(fitness, y)
         results = Optimizer._collect(self, fitness)
         results["_n_generations"] = self._n_generations
+        results.update(self.results)
         return results
diff --git a/dynamicalgorithmselection/optimizers/Optimizer.py b/dynamicalgorithmselection/optimizers/Optimizer.py
index 13a8edd..1bac425 100644
--- a/dynamicalgorithmselection/optimizers/Optimizer.py
+++ b/dynamicalgorithmselection/optimizers/Optimizer.py
@@ -2,7 +2,7 @@
 from typing import Optional
 
 import numpy as np
-from pypop7.optimizers.core import Optimizer as BaseOptimizer
+from pypop7.optimizers.core import Optimizer as BaseOptimizer, Terminations  # type: ignore
 
 
 class Optimizer(BaseOptimizer):
@@ -20,6 +20,7 @@ def __init__(self, problem, options):
         self.x_history, self.y_history = [], []
         # [Added] Dictionary to store histories of generic parameters
         self.parameter_history = {}
+        self.target_FE: int | float = float("inf")
 
     # [Modified] Accept generic kwargs for history tracking
     def _evaluate_fitness(self, x, args=None, **kwargs):
@@ -117,3 +118,10 @@ def optimize(self, fitness_function=None):
             self.fitness_function = fitness_function
         fitness = []  # to store all fitness generated during evolution/optimization
         return fitness
+
+    def _check_terminations(self):
+        termination_signal = super()._check_terminations()
+        if not termination_signal:
+            termination_signal = self.n_function_evaluations >= self.target_FE
+            self.termination_signal = Terminations.MAX_FUNCTION_EVALUATIONS
+        return termination_signal
diff --git a/dynamicalgorithmselection/optimizers/RestartOptimizer.py b/dynamicalgorithmselection/optimizers/RestartOptimizer.py
index b26a8b0..8e6a61a 100644
--- a/dynamicalgorithmselection/optimizers/RestartOptimizer.py
+++ b/dynamicalgorithmselection/optimizers/RestartOptimizer.py
@@ -1,14 +1,22 @@
-from typing import Type
+from typing import Type, TypeVar
 
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
+# Create a TypeVar that is bound to the Optimizer base class
+T = TypeVar("T", bound=Optimizer)
 
-def restart_optimizer(base: Type[Optimizer]):
-    class RestartOptimizer(base):
+
+def restart_optimizer(base: Type[T]) -> Type[T]:
+    class RestartOptimizer(base):  # type: ignore[misc, valid-type]
         def set_data(self, x=None, y=None, best_x=None, best_y=None, *args, **kwargs):
+            # We override this to do nothing, effectively "restarting"
+            # or ignoring previous state transitions.
             pass
 
     new_name = f"{base.__name__}Restart"
     RestartOptimizer.__name__ = new_name
     RestartOptimizer.__qualname__ = new_name
+
+    # Casting or returning as Type[T] ensures mypy sees the
+    # result as the same category of class as the input.
     return RestartOptimizer
diff --git a/pyproject.toml b/pyproject.toml
index 3a4ac3a..e624646 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ build-backend = "hatchling.build"
 
 [dependency-groups]
 dev = [
+    "mypy>=1.19.1",
     "pre-commit>=4.5.1",
     "pytest>=9.0.2",
     "ruff>=0.14.5",
diff --git a/tests/test_cross_validation.py b/tests/test_cross_validation.py
index 3bd9c79..9ba9f4d 100644
--- a/tests/test_cross_validation.py
+++ b/tests/test_cross_validation.py
@@ -24,7 +24,7 @@ def test_get_cv_folds_structure(self, mock_suite, mock_miniprint):
         # so we check if it returns lists of correct length/structure.
 
         n_folds = 4
-        suite, folds = _get_cv_folds(n_folds, is_loio=True)
+        suite, folds = _get_cv_folds(n_folds, is_loio=True, dim=10)
 
         self.assertIsInstance(suite, MagicMock)  # Should return the mocked suite
         self.assertEqual(len(folds), n_folds)
diff --git a/tests/test_experiments_core.py b/tests/test_experiments_core.py
index b2d57c4..83a8ee6 100644
--- a/tests/test_experiments_core.py
+++ b/tests/test_experiments_core.py
@@ -13,6 +13,7 @@ def setUp(self):
             "n_checkpoints": 5,
             "n_individuals": 10,
             "cdb": 0.5,
+            "n_epochs": 1,
         }
         self.eval_multiplier = 10
         self.problem_ids = ["p1", "p2"]
diff --git a/tests/test_policy_gradient_agent.py b/tests/test_policy_gradient_agent.py
index 9e2d024..d0ffebc 100644
--- a/tests/test_policy_gradient_agent.py
+++ b/tests/test_policy_gradient_agent.py
@@ -24,6 +24,8 @@ def ppo_options(self):
             "reward_normalizer": MagicMock(),
             "state_normalizer": MagicMock(),
             "buffer": MagicMock(),
+            "n_problems": 1000,
+            "n_epochs": 1,
         }
 
     @pytest.fixture
@@ -158,6 +160,8 @@ def test_stagnation_logic(self, mock_problem, ppo_options):
     def test_buffer_add(self, mock_problem, ppo_options):
         mock_buffer = MagicMock()
         ppo_options["buffer"] = mock_buffer
+        ppo_options["n_problems"] = 1000
+        ppo_options["n_epochs"] = 1
 
         with patch(
             "dynamicalgorithmselection.agents.agent.get_state_representation",
diff --git a/uv.lock b/uv.lock
index f0879fa..705286c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -317,6 +317,7 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "mypy" },
     { name = "pre-commit" },
     { name = "pytest" },
     { name = "ruff" },
@@ -339,6 +340,7 @@ requires-dist = [
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "mypy", specifier = ">=1.19.1" },
     { name = "pre-commit", specifier = ">=4.5.1" },
     { name = "pytest", specifier = ">=9.0.2" },
     { name = "ruff", specifier = ">=0.14.5" },
@@ -615,6 +617,79 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/e9/0d4add7873a73e462aeb45c036a2dead2562b825aa46ba326727b3f31016/kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1", size = 73929, upload-time = "2025-08-10T21:27:48.236Z" },
 ]
 
+[[package]]
+name = "librt"
+version = "0.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8a/3f/4ca7dd7819bf8ff303aca39c3c60e5320e46e766ab7f7dd627d3b9c11bdf/librt-0.8.0.tar.gz", hash = "sha256:cb74cdcbc0103fc988e04e5c58b0b31e8e5dd2babb9182b6f9490488eb36324b", size = 177306, upload-time = "2026-02-12T14:53:54.743Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/e9/42af181c89b65abfd557c1b017cba5b82098eef7bf26d1649d82ce93ccc7/librt-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ce33a9778e294507f3a0e3468eccb6a698b5166df7db85661543eca1cfc5369", size = 65314, upload-time = "2026-02-12T14:52:14.778Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/4a/15a847fca119dc0334a4b8012b1e15fdc5fc19d505b71e227eaf1bcdba09/librt-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8070aa3368559de81061ef752770d03ca1f5fc9467d4d512d405bd0483bfffe6", size = 68015, upload-time = "2026-02-12T14:52:15.797Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/87/ffc8dbd6ab68dd91b736c88529411a6729649d2b74b887f91f3aaff8d992/librt-0.8.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:20f73d4fecba969efc15cdefd030e382502d56bb6f1fc66b580cce582836c9fa", size = 194508, upload-time = "2026-02-12T14:52:16.835Z" },
+    { url = "https://files.pythonhosted.org/packages/89/92/a7355cea28d6c48ff6ff5083ac4a2a866fb9b07b786aa70d1f1116680cd5/librt-0.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a512c88900bdb1d448882f5623a0b1ad27ba81a9bd75dacfe17080b72272ca1f", size = 205630, upload-time = "2026-02-12T14:52:18.58Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/5e/54509038d7ac527828db95b8ba1c8f5d2649bc32fd8f39b1718ec9957dce/librt-0.8.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:015e2dde6e096d27c10238bf9f6492ba6c65822dfb69d2bf74c41a8e88b7ddef", size = 218289, upload-time = "2026-02-12T14:52:20.134Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/17/0ee0d13685cefee6d6f2d47bb643ddad3c62387e2882139794e6a5f1288a/librt-0.8.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1c25a131013eadd3c600686a0c0333eb2896483cbc7f65baa6a7ee761017aef9", size = 211508, upload-time = "2026-02-12T14:52:21.413Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/a8/1714ef6e9325582e3727de3be27e4c1b2f428ea411d09f1396374180f130/librt-0.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:21b14464bee0b604d80a638cf1ee3148d84ca4cc163dcdcecb46060c1b3605e4", size = 219129, upload-time = "2026-02-12T14:52:22.61Z" },
+    { url = "https://files.pythonhosted.org/packages/89/d3/2d9fe353edff91cdc0ece179348054a6fa61f3de992c44b9477cb973509b/librt-0.8.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:05a3dd3f116747f7e1a2b475ccdc6fb637fd4987126d109e03013a79d40bf9e6", size = 213126, upload-time = "2026-02-12T14:52:23.819Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/8e/9f5c60444880f6ad50e3ff7475e5529e787797e7f3ad5432241633733b92/librt-0.8.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:fa37f99bff354ff191c6bcdffbc9d7cdd4fc37faccfc9be0ef3a4fd5613977da", size = 212279, upload-time = "2026-02-12T14:52:25.034Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/eb/d4a2cfa647da3022ae977f50d7eda1d91f70d7d1883cf958a4b6ef689eab/librt-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1566dbb9d1eb0987264c9b9460d212e809ba908d2f4a3999383a84d765f2f3f1", size = 234654, upload-time = "2026-02-12T14:52:26.204Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/31/26b978861c7983b036a3aea08bdbb2ec32bbaab1ad1d57c5e022be59afc1/librt-0.8.0-cp311-cp311-win32.whl", hash = "sha256:70defb797c4d5402166787a6b3c66dfb3fa7f93d118c0509ffafa35a392f4258", size = 54603, upload-time = "2026-02-12T14:52:27.342Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/78/f194ed7c48dacf875677e749c5d0d1d69a9daa7c994314a39466237fb1be/librt-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:db953b675079884ffda33d1dca7189fb961b6d372153750beb81880384300817", size = 61730, upload-time = "2026-02-12T14:52:28.31Z" },
+    { url = "https://files.pythonhosted.org/packages/97/ee/ad71095478d02137b6f49469dc808c595cfe89b50985f6b39c5345f0faab/librt-0.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:75d1a8cab20b2043f03f7aab730551e9e440adc034d776f15f6f8d582b0a5ad4", size = 52274, upload-time = "2026-02-12T14:52:29.345Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/53/f3bc0c4921adb0d4a5afa0656f2c0fbe20e18e3e0295e12985b9a5dc3f55/librt-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:17269dd2745dbe8e42475acb28e419ad92dfa38214224b1b01020b8cac70b645", size = 66511, upload-time = "2026-02-12T14:52:30.34Z" },
+    { url = "https://files.pythonhosted.org/packages/89/4b/4c96357432007c25a1b5e363045373a6c39481e49f6ba05234bb59a839c1/librt-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f4617cef654fca552f00ce5ffdf4f4b68770f18950e4246ce94629b789b92467", size = 68628, upload-time = "2026-02-12T14:52:31.491Z" },
+    { url = "https://files.pythonhosted.org/packages/47/16/52d75374d1012e8fc709216b5eaa25f471370e2a2331b8be00f18670a6c7/librt-0.8.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5cb11061a736a9db45e3c1293cfcb1e3caf205912dfa085734ba750f2197ff9a", size = 198941, upload-time = "2026-02-12T14:52:32.489Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/11/d5dd89e5a2228567b1228d8602d896736247424484db086eea6b8010bcba/librt-0.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4bb00bd71b448f16749909b08a0ff16f58b079e2261c2e1000f2bbb2a4f0a45", size = 210009, upload-time = "2026-02-12T14:52:33.634Z" },
+    { url = "https://files.pythonhosted.org/packages/49/d8/fc1a92a77c3020ee08ce2dc48aed4b42ab7c30fb43ce488d388673b0f164/librt-0.8.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95a719a049f0eefaf1952673223cf00d442952273cbd20cf2ed7ec423a0ef58d", size = 224461, upload-time = "2026-02-12T14:52:34.868Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/98/eb923e8b028cece924c246104aa800cf72e02d023a8ad4ca87135b05a2fe/librt-0.8.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bd32add59b58fba3439d48d6f36ac695830388e3da3e92e4fc26d2d02670d19c", size = 217538, upload-time = "2026-02-12T14:52:36.078Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/67/24e80ab170674a1d8ee9f9a83081dca4635519dbd0473b8321deecddb5be/librt-0.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4f764b2424cb04524ff7a486b9c391e93f93dc1bd8305b2136d25e582e99aa2f", size = 225110, upload-time = "2026-02-12T14:52:37.301Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/c7/6fbdcbd1a6e5243c7989c21d68ab967c153b391351174b4729e359d9977f/librt-0.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f04ca50e847abc486fa8f4107250566441e693779a5374ba211e96e238f298b9", size = 217758, upload-time = "2026-02-12T14:52:38.89Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/bd/4d6b36669db086e3d747434430073e14def032dd58ad97959bf7e2d06c67/librt-0.8.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9ab3a3475a55b89b87ffd7e6665838e8458e0b596c22e0177e0f961434ec474a", size = 218384, upload-time = "2026-02-12T14:52:40.637Z" },
+    { url = "https://files.pythonhosted.org/packages/50/2d/afe966beb0a8f179b132f3e95c8dd90738a23e9ebdba10f89a3f192f9366/librt-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e36a8da17134ffc29373775d88c04832f9ecfab1880470661813e6c7991ef79", size = 241187, upload-time = "2026-02-12T14:52:43.55Z" },
+    { url = "https://files.pythonhosted.org/packages/02/d0/6172ea4af2b538462785ab1a68e52d5c99cfb9866a7caf00fdf388299734/librt-0.8.0-cp312-cp312-win32.whl", hash = "sha256:4eb5e06ebcc668677ed6389164f52f13f71737fc8be471101fa8b4ce77baeb0c", size = 54914, upload-time = "2026-02-12T14:52:44.676Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/cb/ceb6ed6175612a4337ad49fb01ef594712b934b4bc88ce8a63554832eb44/librt-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:0a33335eb59921e77c9acc05d0e654e4e32e45b014a4d61517897c11591094f8", size = 62020, upload-time = "2026-02-12T14:52:45.676Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/7e/61701acbc67da74ce06ddc7ba9483e81c70f44236b2d00f6a4bfee1aacbf/librt-0.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:24a01c13a2a9bdad20997a4443ebe6e329df063d1978bbe2ebbf637878a46d1e", size = 52443, upload-time = "2026-02-12T14:52:47.218Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/32/3edb0bcb4113a9c8bdcd1750663a54565d255027657a5df9d90f13ee07fa/librt-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7f820210e21e3a8bf8fde2ae3c3d10106d4de9ead28cbfdf6d0f0f41f5b12fa1", size = 66522, upload-time = "2026-02-12T14:52:48.219Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ab/e8c3d05e281f5d405ebdcc5bc8ab36df23e1a4b40ac9da8c3eb9928b72b9/librt-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4831c44b8919e75ca0dfb52052897c1ef59fdae19d3589893fbd068f1e41afbf", size = 68658, upload-time = "2026-02-12T14:52:50.351Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/d3/74a206c47b7748bbc8c43942de3ed67de4c231156e148b4f9250869593df/librt-0.8.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:88c6e75540f1f10f5e0fc5e87b4b6c290f0e90d1db8c6734f670840494764af8", size = 199287, upload-time = "2026-02-12T14:52:51.938Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/29/ef98a9131cf12cb95771d24e4c411fda96c89dc78b09c2de4704877ebee4/librt-0.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9646178cd794704d722306c2c920c221abbf080fede3ba539d5afdec16c46dad", size = 210293, upload-time = "2026-02-12T14:52:53.128Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/3e/89b4968cb08c53d4c2d8b02517081dfe4b9e07a959ec143d333d76899f6c/librt-0.8.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e1af31a710e17891d9adf0dbd9a5fcd94901a3922a96499abdbf7ce658f4e01", size = 224801, upload-time = "2026-02-12T14:52:54.367Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/28/f38526d501f9513f8b48d78e6be4a241e15dd4b000056dc8b3f06ee9ce5d/librt-0.8.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:507e94f4bec00b2f590fbe55f48cd518a208e2474a3b90a60aa8f29136ddbada", size = 218090, upload-time = "2026-02-12T14:52:55.758Z" },
+    { url = "https://files.pythonhosted.org/packages/02/ec/64e29887c5009c24dc9c397116c680caffc50286f62bd99c39e3875a2854/librt-0.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f1178e0de0c271231a660fbef9be6acdfa1d596803464706862bef6644cc1cae", size = 225483, upload-time = "2026-02-12T14:52:57.375Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/16/7850bdbc9f1a32d3feff2708d90c56fc0490b13f1012e438532781aa598c/librt-0.8.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:71fc517efc14f75c2f74b1f0a5d5eb4a8e06aa135c34d18eaf3522f4a53cd62d", size = 218226, upload-time = "2026-02-12T14:52:58.534Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/4a/166bffc992d65ddefa7c47052010a87c059b44a458ebaf8f5eba384b0533/librt-0.8.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:0583aef7e9a720dd40f26a2ad5a1bf2ccbb90059dac2b32ac516df232c701db3", size = 218755, upload-time = "2026-02-12T14:52:59.701Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5d/9aeee038bcc72a9cfaaee934463fe9280a73c5440d36bd3175069d2cb97b/librt-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5d0f76fc73480d42285c609c0ea74d79856c160fa828ff9aceab574ea4ecfd7b", size = 241617, upload-time = "2026-02-12T14:53:00.966Z" },
+    { url = "https://files.pythonhosted.org/packages/64/ff/2bec6b0296b9d0402aa6ec8540aa19ebcb875d669c37800cb43d10d9c3a3/librt-0.8.0-cp313-cp313-win32.whl", hash = "sha256:e79dbc8f57de360f0ed987dc7de7be814b4803ef0e8fc6d3ff86e16798c99935", size = 54966, upload-time = "2026-02-12T14:53:02.042Z" },
+    { url = "https://files.pythonhosted.org/packages/08/8d/bf44633b0182996b2c7ea69a03a5c529683fa1f6b8e45c03fe874ff40d56/librt-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:25b3e667cbfc9000c4740b282df599ebd91dbdcc1aa6785050e4c1d6be5329ab", size = 62000, upload-time = "2026-02-12T14:53:03.822Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/fd/c6472b8e0eac0925001f75e366cf5500bcb975357a65ef1f6b5749389d3a/librt-0.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:e9a3a38eb4134ad33122a6d575e6324831f930a771d951a15ce232e0237412c2", size = 52496, upload-time = "2026-02-12T14:53:04.889Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/13/79ebfe30cd273d7c0ce37a5f14dc489c5fb8b722a008983db2cfd57270bb/librt-0.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:421765e8c6b18e64d21c8ead315708a56fc24f44075059702e421d164575fdda", size = 66078, upload-time = "2026-02-12T14:53:06.085Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/8f/d11eca40b62a8d5e759239a80636386ef88adecb10d1a050b38cc0da9f9e/librt-0.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:48f84830a8f8ad7918afd743fd7c4eb558728bceab7b0e38fd5a5cf78206a556", size = 68309, upload-time = "2026-02-12T14:53:07.121Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/b4/f12ee70a3596db40ff3c88ec9eaa4e323f3b92f77505b4d900746706ec6a/librt-0.8.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9f09d4884f882baa39a7e36bbf3eae124c4ca2a223efb91e567381d1c55c6b06", size = 196804, upload-time = "2026-02-12T14:53:08.164Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/7e/70dbbdc0271fd626abe1671ad117bcd61a9a88cdc6a10ccfbfc703db1873/librt-0.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:693697133c3b32aa9b27f040e3691be210e9ac4d905061859a9ed519b1d5a376", size = 206915, upload-time = "2026-02-12T14:53:09.333Z" },
+    { url = "https://files.pythonhosted.org/packages/79/13/6b9e05a635d4327608d06b3c1702166e3b3e78315846373446cf90d7b0bf/librt-0.8.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5512aae4648152abaf4d48b59890503fcbe86e85abc12fb9b096fe948bdd816", size = 221200, upload-time = "2026-02-12T14:53:10.68Z" },
+    { url = "https://files.pythonhosted.org/packages/35/6c/e19a3ac53e9414de43a73d7507d2d766cd22d8ca763d29a4e072d628db42/librt-0.8.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:995d24caa6bbb34bcdd4a41df98ac6d1af637cfa8975cb0790e47d6623e70e3e", size = 214640, upload-time = "2026-02-12T14:53:12.342Z" },
+    { url = "https://files.pythonhosted.org/packages/30/f0/23a78464788619e8c70f090cfd099cce4973eed142c4dccb99fc322283fd/librt-0.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b9aef96d7593584e31ef6ac1eb9775355b0099fee7651fae3a15bc8657b67b52", size = 221980, upload-time = "2026-02-12T14:53:13.603Z" },
+    { url = "https://files.pythonhosted.org/packages/03/32/38e21420c5d7aa8a8bd2c7a7d5252ab174a5a8aaec8b5551968979b747bf/librt-0.8.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4f6e975377fbc4c9567cb33ea9ab826031b6c7ec0515bfae66a4fb110d40d6da", size = 215146, upload-time = "2026-02-12T14:53:14.8Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/00/bd9ecf38b1824c25240b3ad982fb62c80f0a969e6679091ba2b3afb2b510/librt-0.8.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:daae5e955764be8fd70a93e9e5133c75297f8bce1e802e1d3683b98f77e1c5ab", size = 215203, upload-time = "2026-02-12T14:53:16.087Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/60/7559bcc5279d37810b98d4a52616febd7b8eef04391714fd6bdf629598b1/librt-0.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7bd68cebf3131bb920d5984f75fe302d758db33264e44b45ad139385662d7bc3", size = 237937, upload-time = "2026-02-12T14:53:17.236Z" },
+    { url = "https://files.pythonhosted.org/packages/41/cc/be3e7da88f1abbe2642672af1dc00a0bccece11ca60241b1883f3018d8d5/librt-0.8.0-cp314-cp314-win32.whl", hash = "sha256:1e6811cac1dcb27ca4c74e0ca4a5917a8e06db0d8408d30daee3a41724bfde7a", size = 50685, upload-time = "2026-02-12T14:53:18.888Z" },
+    { url = "https://files.pythonhosted.org/packages/38/27/e381d0df182a8f61ef1f6025d8b138b3318cc9d18ad4d5f47c3bf7492523/librt-0.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:178707cda89d910c3b28bf5aa5f69d3d4734e0f6ae102f753ad79edef83a83c7", size = 57872, upload-time = "2026-02-12T14:53:19.942Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/0c/ca9dfdf00554a44dea7d555001248269a4bab569e1590a91391feb863fa4/librt-0.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3e8b77b5f54d0937b26512774916041756c9eb3e66f1031971e626eea49d0bf4", size = 48056, upload-time = "2026-02-12T14:53:21.473Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/ed/6cc9c4ad24f90c8e782193c7b4a857408fd49540800613d1356c63567d7b/librt-0.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:789911e8fa40a2e82f41120c936b1965f3213c67f5a483fc5a41f5839a05dcbb", size = 68307, upload-time = "2026-02-12T14:53:22.498Z" },
+    { url = "https://files.pythonhosted.org/packages/84/d8/0e94292c6b3e00b6eeea39dd44d5703d1ec29b6dafce7eea19dc8f1aedbd/librt-0.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2b37437e7e4ef5e15a297b36ba9e577f73e29564131d86dd75875705e97402b5", size = 70999, upload-time = "2026-02-12T14:53:23.603Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/f4/6be1afcbdeedbdbbf54a7c9d73ad43e1bf36897cebf3978308cd64922e02/librt-0.8.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:671a6152edf3b924d98a5ed5e6982ec9cb30894085482acadce0975f031d4c5c", size = 220782, upload-time = "2026-02-12T14:53:25.133Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/8d/f306e8caa93cfaf5c6c9e0d940908d75dc6af4fd856baa5535c922ee02b1/librt-0.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8992ca186a1678107b0af3d0c9303d8c7305981b9914989b9788319ed4d89546", size = 235420, upload-time = "2026-02-12T14:53:27.047Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/f2/65d86bd462e9c351326564ca805e8457442149f348496e25ccd94583ffa2/librt-0.8.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:001e5330093d887b8b9165823eca6c5c4db183fe4edea4fdc0680bbac5f46944", size = 246452, upload-time = "2026-02-12T14:53:28.341Z" },
+    { url = "https://files.pythonhosted.org/packages/03/94/39c88b503b4cb3fcbdeb3caa29672b6b44ebee8dcc8a54d49839ac280f3f/librt-0.8.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d920789eca7ef71df7f31fd547ec0d3002e04d77f30ba6881e08a630e7b2c30e", size = 238891, upload-time = "2026-02-12T14:53:29.625Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/c6/6c0d68190893d01b71b9569b07a1c811e280c0065a791249921c83dc0290/librt-0.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:82fb4602d1b3e303a58bfe6165992b5a78d823ec646445356c332cd5f5bbaa61", size = 250249, upload-time = "2026-02-12T14:53:30.93Z" },
+    { url = "https://files.pythonhosted.org/packages/52/7a/f715ed9e039035d0ea637579c3c0155ab3709a7046bc408c0fb05d337121/librt-0.8.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:4d3e38797eb482485b486898f89415a6ab163bc291476bd95712e42cf4383c05", size = 240642, upload-time = "2026-02-12T14:53:32.174Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/3c/609000a333debf5992efe087edc6467c1fdbdddca5b610355569bbea9589/librt-0.8.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a905091a13e0884701226860836d0386b88c72ce5c2fdfba6618e14c72be9f25", size = 239621, upload-time = "2026-02-12T14:53:33.39Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/df/87b0673d5c395a8f34f38569c116c93142d4dc7e04af2510620772d6bd4f/librt-0.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:375eda7acfce1f15f5ed56cfc960669eefa1ec8732e3e9087c3c4c3f2066759c", size = 262986, upload-time = "2026-02-12T14:53:34.617Z" },
+    { url = "https://files.pythonhosted.org/packages/09/7f/6bbbe9dcda649684773aaea78b87fff4d7e59550fbc2877faa83612087a3/librt-0.8.0-cp314-cp314t-win32.whl", hash = "sha256:2ccdd20d9a72c562ffb73098ac411de351b53a6fbb3390903b2d33078ef90447", size = 51328, upload-time = "2026-02-12T14:53:36.15Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/f3/e1981ab6fa9b41be0396648b5850267888a752d025313a9e929c4856208e/librt-0.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:25e82d920d4d62ad741592fcf8d0f3bda0e3fc388a184cb7d2f566c681c5f7b9", size = 58719, upload-time = "2026-02-12T14:53:37.183Z" },
+    { url = "https://files.pythonhosted.org/packages/94/d1/433b3c06e78f23486fe4fdd19bc134657eb30997d2054b0dbf52bbf3382e/librt-0.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:92249938ab744a5890580d3cb2b22042f0dce71cdaa7c1369823df62bedf7cbc", size = 48753, upload-time = "2026-02-12T14:53:38.539Z" },
+]
+
 [[package]]
 name = "llvmlite"
 version = "0.45.1"
@@ -805,6 +880,54 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
 ]
 
+[[package]]
+name = "mypy"
+version = "1.19.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "librt", marker = "platform_python_implementation != 'PyPy'" },
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/47/6b3ebabd5474d9cdc170d1342fbf9dddc1b0ec13ec90bf9004ee6f391c31/mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288", size = 13028539, upload-time = "2025-12-15T05:03:44.129Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/a6/ac7c7a88a3c9c54334f53a941b765e6ec6c4ebd65d3fe8cdcfbe0d0fd7db/mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab", size = 12083163, upload-time = "2025-12-15T05:03:37.679Z" },
+    { url = "https://files.pythonhosted.org/packages/67/af/3afa9cf880aa4a2c803798ac24f1d11ef72a0c8079689fac5cfd815e2830/mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6", size = 12687629, upload-time = "2025-12-15T05:02:31.526Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/46/20f8a7114a56484ab268b0ab372461cb3a8f7deed31ea96b83a4e4cfcfca/mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331", size = 13436933, upload-time = "2025-12-15T05:03:15.606Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/f8/33b291ea85050a21f15da910002460f1f445f8007adb29230f0adea279cb/mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925", size = 13661754, upload-time = "2025-12-15T05:02:26.731Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/a3/47cbd4e85bec4335a9cd80cf67dbc02be21b5d4c9c23ad6b95d6c5196bac/mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042", size = 10055772, upload-time = "2025-12-15T05:03:26.179Z" },
+    { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" },
+    { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" },
+    { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" },
+    { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" },
+    { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" },
+    { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" },
+    { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" },
+    { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" },
+    { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" },
+    { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
 [[package]]
 name = "neat-python"
 version = "0.92"
@@ -1066,6 +1189,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692, upload-time = "2023-06-28T23:17:28.824Z" },
 ]
 
+[[package]]
+name = "pathspec"
+version = "1.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" },
+]
+
 [[package]]
 name = "pflacco"
 version = "1.2.2"

From d066ee1827835e4aed5ce48e9dc8cb7395ea04b0 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Tue, 17 Feb 2026 21:11:02 +0100
Subject: [PATCH 08/20] fix RL-DAS agent PPO implementation

---
 .../agents/RLDAS_agent.py                     | 161 +++++++-----------
 1 file changed, 57 insertions(+), 104 deletions(-)

diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index e8e17cf..63e63ab 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -9,6 +9,7 @@
     DEVICE,
     RolloutBuffer,
     RLDASNetwork,
+    GAMMA,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
@@ -166,7 +167,6 @@ def optimize(self, fitness_function=None, args=None):
         self.best_so_far_x = best_x_global
 
         self.history.append(self.best_so_far_y)
-        self.fitness_history.append(self.best_so_far_y)
         if self.saving_fitness:
             fitness.append(self.best_so_far_y)
 
@@ -252,7 +252,6 @@ def optimize(self, fitness_function=None, args=None):
                 self.best_so_far_x = x_best_new
 
             self.history.append(self.best_so_far_y)
-            self.fitness_history.append(self.best_so_far_y)
             if self.saving_fitness:
                 fitness.append(self.best_so_far_y)
 
@@ -318,123 +317,77 @@ def _collect(self, fitness, y=None):
 
         return results, agent_state
 
-    def _update_on_minibatch(
-        self,
-        mb_la,
-        mb_ah,
-        mb_actions,
-        mb_old_log_probs,
-        mb_returns,
-        mb_advantages,
-        clip_eps,
-        value_coef,
-        entropy_coef,
-    ):
-        policy_probs, values_pred = self.network(mb_la, mb_ah)
-
-        dist = torch.distributions.Categorical(policy_probs)
-        dist_log_probs = dist.log_prob(mb_actions)
-        entropy = dist.entropy().mean()
-
-        ratio = torch.exp(dist_log_probs - mb_old_log_probs)
-
-        values_pred = values_pred.squeeze(1)
-        value_loss = torch.nn.functional.mse_loss(values_pred, mb_returns)
-
-        surr1 = ratio * mb_advantages
-        surr2 = torch.clamp(ratio, 1.0 - clip_eps, 1.0 + clip_eps) * mb_advantages
-        actor_loss = -torch.min(surr1, surr2).mean()
-
-        loss = actor_loss + value_coef * value_loss  # - entropy_coef * entropy
-
-        self.optimizer.zero_grad()
-        loss.backward()
-        torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5)
-        self.optimizer.step()
-
-        return actor_loss.detach().item(), value_loss.detach().item()
-
     def ppo_update(
         self,
         buffer,
         epochs=4,
-        minibatch_size=256,
+        minibatch_size=None,
         clip_eps=0.2,
         value_coef=0.5,
         entropy_coef=0.01,
     ):
-        la_states, ah_states, actions, old_log_probs, returns, advantages = (
-            self._compute_advantages(buffer)
-        )
-        dataset_size = la_states.shape[0]
+        la_list, ah_list = zip(*buffer.states)
 
-        n_batches = 0
+        la_states = torch.stack(la_list).to(DEVICE)
+        ah_states = torch.stack(ah_list).to(DEVICE)
 
-        actual_minibatch_size = min(minibatch_size, dataset_size)
+        actions = torch.tensor(buffer.actions).to(DEVICE)
+        rewards = buffer.rewards
+        dones = buffer.dones
 
-        for epoch in range(epochs):
-            indices = np.arange(dataset_size)
-            np.random.shuffle(indices)
+        old_logprobs = torch.stack(buffer.log_probs).detach().to(DEVICE).view(-1)
 
-            for start in range(0, dataset_size, actual_minibatch_size):
-                idx = indices[start : start + actual_minibatch_size]
+        old_values = torch.stack(buffer.values).detach().to(DEVICE).view(-1)
 
-                self._update_on_minibatch(
-                    la_states[idx],
-                    ah_states[idx],  # Pass both
-                    actions[idx],
-                    old_log_probs[idx],
-                    returns[idx],
-                    advantages[idx],
-                    clip_eps,
-                    value_coef,
-                    entropy_coef,
-                )
-                n_batches += 1
+        for _ in range(epochs):
+            policy_probs, current_values = self.network(la_states, ah_states)
 
-    def _compute_advantages(self, buffer):
-        """
-        Computes GAE handling split (LA, AH) state inputs.
-        """
-        la_list, ah_list = zip(*buffer.states)
+            dist = torch.distributions.Categorical(policy_probs)
+            logprobs = dist.log_prob(actions)
 
-        la_states = torch.stack(la_list).to(DEVICE)
-        ah_states = torch.stack(ah_list).to(DEVICE)
+            current_values = current_values.squeeze()
 
-        rewards = torch.tensor(buffer.rewards, dtype=torch.float32).to(DEVICE)
-        dones = torch.tensor(buffer.dones, dtype=torch.float32).to(DEVICE)
-        values = torch.stack(buffer.values).squeeze().to(DEVICE)
+            with torch.no_grad():
+                if dones[-1]:
+                    R = 0.0
+                else:
+                    R = current_values[-1].item() if not dones[-1] else 0.0
 
-        with torch.no_grad():
-            if buffer.dones[-1]:
-                next_value = 0.0
-            else:
-                _, last_val_tens = self.network(
-                    la_states[-1].unsqueeze(0), ah_states[-1].unsqueeze(0)
+            Returns = []
+            for r in reversed(rewards):
+                R = R * GAMMA + r
+                Returns.insert(0, R)
+
+            Returns = torch.tensor(Returns).to(DEVICE).float()
+            advantages = Returns - current_values.detach()
+            ratios = torch.exp(logprobs - old_logprobs)
+
+            # Actor Loss (Reinforce Loss with Clipping)
+            surr1 = ratios * advantages
+            surr2 = torch.clamp(ratios, 1 - clip_eps, 1 + clip_eps) * advantages
+            actor_loss = -torch.min(surr1, surr2).mean()
+
+            vpredclipped = old_values + torch.clamp(
+                current_values - old_values, -clip_eps, clip_eps
+            )
+
+            v_max = torch.max(
+                ((current_values - Returns) ** 2), ((vpredclipped - Returns) ** 2)
+            )
+            critic_loss = v_max.mean()
+
+            loss = critic_loss + actor_loss
+
+            if self.run is not None:
+                self.run.log(
+                    {
+                        "Returns": Returns.mean().item(),
+                        "actor_loss": actor_loss.item(),
+                        "critic_loss": critic_loss.item(),
+                    }
                 )
-                next_value = last_val_tens.item()
-
-        advantages = []
-        last_gae_lam = 0
-        gamma = 0.90
-        lam = 0.5
-
-        for step in reversed(range(len(rewards))):
-            next_non_terminal = 1.0 - dones[step]
-            next_val = next_value if step == len(rewards) - 1 else values[step + 1]
-
-            delta = rewards[step] + gamma * next_val * next_non_terminal - values[step]
-            last_gae_lam = delta + gamma * lam * next_non_terminal * last_gae_lam
-            advantages.insert(0, last_gae_lam)
-
-        advantages = torch.tensor(advantages, dtype=torch.float32).to(DEVICE)
-        returns = advantages + values
-
-        return (
-            la_states,
-            ah_states,
-            torch.tensor(buffer.actions).to(DEVICE),
-            torch.stack(buffer.log_probs).to(DEVICE),
-            returns,
-            advantages,
-        )
+
+            self.optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5)
+            self.optimizer.step()

From 35908b09e7a220a465678a46ca4b37831f94bbbf Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Tue, 17 Feb 2026 22:33:11 +0100
Subject: [PATCH 09/20] add all experiment series to runner.slurm

---
 .../agents/RLDAS_agent.py                     |  6 +--
 dynamicalgorithmselection/agents/agent.py     |  2 +-
 .../agents/agent_state.py                     |  4 +-
 .../agents/agent_utils.py                     |  4 +-
 dynamicalgorithmselection/experiments/core.py |  4 +-
 .../experiments/cross_validation.py           |  4 +-
 .../experiments/experiment.py                 |  6 +--
 .../optimizers/ES/CMAES.py                    |  2 +-
 .../optimizers/ES/OPOA2015.py                 |  2 +-
 runner.slurm                                  | 50 +++++++++++++++++--
 10 files changed, 63 insertions(+), 21 deletions(-)

diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index 63e63ab..4c94393 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -219,9 +219,9 @@ def optimize(self, fitness_function=None, args=None):
 
             self._save_context(sub_opt, alg_name)
 
-            x_best_new = population_x[np.argmin(population_y)].copy()
-            x_worst_new = population_x[np.argmax(population_y)].copy()
-            cost_new = np.min(population_y)
+            x_best_new: float = population_x[np.argmin(population_y)].copy()
+            x_worst_new: float = population_x[np.argmax(population_y)].copy()
+            cost_new: float = np.min(population_y)
 
             self._update_ah_history(
                 action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index 5c66aac..abba830 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -63,7 +63,7 @@ def get_partial_state(
         x: Optional[np.ndarray],
         y: Optional[np.ndarray],
         optimization_state: bool = False,
-    ) -> np.array:
+    ) -> np.ndarray:
         sr_additional_params = (
             self.lower_boundary,
             self.upper_boundary,
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index 8757f8f..7609c96 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -3,7 +3,7 @@
 from typing import Tuple, Callable, Any
 
 import numpy as np
-import pandas as pd
+import pandas as pd  # type: ignore
 from pflacco.classical_ela_features import (
     calculate_ela_meta,  # Meta-Model (Linear/Quadratic fit)
     calculate_nbc,  # Nearest Better Clustering
@@ -11,7 +11,7 @@
     calculate_information_content,
     calculate_ela_distribution,  # Information Content
 )
-from scipy.spatial.distance import pdist
+from scipy.spatial.distance import pdist  # type: ignore
 from scipy.stats import spearmanr  # type: ignore
 
 from dynamicalgorithmselection.NeurELA.NeurELA import feature_embedder
diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py
index 96d2513..e60a8ed 100644
--- a/dynamicalgorithmselection/agents/agent_utils.py
+++ b/dynamicalgorithmselection/agents/agent_utils.py
@@ -19,8 +19,8 @@ def get_runtime_stats(
     area_under_optimization_curve = 0.0
     last_i = 0
     checkpoint_idx = 0
-    last_fitness = None
-    checkpoints_fitness = []
+    last_fitness = float("inf")
+    checkpoints_fitness: list[float] = []
     for i, fitness in fitness_history:
         area_under_optimization_curve += fitness * (i - last_i)
         while (
diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py
index 237f636..ce4c7b1 100644
--- a/dynamicalgorithmselection/experiments/core.py
+++ b/dynamicalgorithmselection/experiments/core.py
@@ -1,8 +1,8 @@
 from typing import Type, Any
 
-import cocoex
+import cocoex  # type: ignore
 import numpy as np
-from tqdm import tqdm
+from tqdm import tqdm  # type: ignore
 
 from dynamicalgorithmselection.experiments.utils import (
     coco_bbob_single_function,
diff --git a/dynamicalgorithmselection/experiments/cross_validation.py b/dynamicalgorithmselection/experiments/cross_validation.py
index 2233575..8d46890 100644
--- a/dynamicalgorithmselection/experiments/cross_validation.py
+++ b/dynamicalgorithmselection/experiments/cross_validation.py
@@ -2,7 +2,7 @@
 from itertools import product
 from typing import Type, Optional
 
-import cocoex
+import cocoex  # type: ignore
 import numpy as np
 
 from dynamicalgorithmselection.experiments.core import run_testing, run_training
@@ -25,7 +25,7 @@ def run_cross_validation(
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
     problems_suite, cv_folds = _get_cv_folds(4, is_loio, options.get("dimensionality"))
-    observer = cocoex.Observer("bbob", "result_folder: " + options.get("name"))
+    observer = cocoex.Observer("bbob", "result_folder: " + options["name"])
     for i, (train_set, test_set) in enumerate(cv_folds):
         print(f"Running cross validation training, fold {i + 1}")
         run_training(
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index d50f3ea..e561f83 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -13,7 +13,7 @@
     dump_stats,
 )
 
-import cocoex
+import cocoex  # type: ignore
 from tqdm import tqdm  # type: ignore
 
 from dynamicalgorithmselection.agents.agent_utils import (
@@ -66,7 +66,7 @@ def dump_extreme_stats(
 
 
 def coco_bbob_experiment(
-    optimizer: Type[Optimizer],
+    optimizer: Optional[Type[Optimizer]],
     options: dict,
     name: str,
     evaluations_multiplier: int = 1_000,
@@ -85,7 +85,7 @@ def coco_bbob_experiment(
     elif options.get("baselines"):
         # running only baselines
         return run_comparison(
-            options.get("optimizer_portfolio"), options, evaluations_multiplier
+            options["optimizer_portfolio"], options, evaluations_multiplier
         )
     elif not train:
         return _coco_bbob_test(optimizer, options, evaluations_multiplier, mode)
diff --git a/dynamicalgorithmselection/optimizers/ES/CMAES.py b/dynamicalgorithmselection/optimizers/ES/CMAES.py
index 7dbb308..74b5896 100644
--- a/dynamicalgorithmselection/optimizers/ES/CMAES.py
+++ b/dynamicalgorithmselection/optimizers/ES/CMAES.py
@@ -365,7 +365,7 @@ def set_data(
             }
             mean = x[indices].mean(axis=0)
             stds = np.std(x[indices], axis=0)
-            sigma = np.max(stds)
+            sigma: float = np.max(stds)
             sigma = max(sigma, 1e-8)
             start_conditions.update(
                 {"x": x[indices], "y": y[indices], "mean": mean, "sigma": sigma}
diff --git a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
index c1b4d08..aecc439 100644
--- a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
+++ b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
@@ -1,7 +1,7 @@
 from typing import Optional
 
 import numpy as np
-import numba as nb
+import numba as nb  # type: ignore
 
 from dynamicalgorithmselection.optimizers.ES.ES import ES
 
diff --git a/runner.slurm b/runner.slurm
index 91d3a60..db4bda6 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -7,12 +7,54 @@
 #SBATCH --mem=32G
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
-#SBATCH -A plgautopt26-gpu-a100
+#SBATCH --array=0-15           # 0-6 (LOIO), 7-13 (LOPO), 14-15 (Random and baselines)
 
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
-
 source "$ENV_PATH"
+mkdir -p logs  # Ensure logs directory exists
+
+Determine the Mode based on the Array ID
+if [[ $SLURM_ARRAY_TASK_ID -le 6 || $SLURM_ARRAY_TASK_ID -ge 14 ]]; then
+    MODE="CV-LOIO"
+    TASK_ID=$SLURM_ARRAY_TASK_ID
+else
+    MODE="CV-LOPO"
+    TASK_ID=$((SLURM_ARRAY_TASK_ID - 7))
+fi
+
+# Map the Task ID (0-6) to specific experiments
+DIMS=(2 3 5 10 20 40)
+
+if [ $TASK_ID -le 5 ]; then
+    # --- DIMENSION SPECIFIC RUNS ---
+    DIM=${DIMS[$TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS \
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
+      --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG \
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
+      -r custom --mode $MODE --dimensionality $DIM --cdb 1.5 --n_epochs 3 --agent policy-gradient
+
+elif [ $TASK_ID -eq 6 ]; then
+    # --- MULTIDIMENSIONAL ---
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL \
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
+      -r custom --mode $MODE --cdb 1.5 --agent policy-gradient
+
+elif [ $TASK_ID -eq 14 ]; then
+    # --- RANDOM AGENT ---
+    echo "Running Mode: Random Agent"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM \
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --cdb 1.5 --agent random
 
-# Run Experiment
-python3 dynamicalgorithmselection/main.py example -p 'LMCMAES' 'SPSO' 'G3PCX' --wandb_project RL-DAS --wandb_entity niecwladek-agh --mode LOIO -s 10
+elif [ $TASK_ID -eq 15 ]; then
+    # --- BASELINES ---
+    echo "Running Mode: Baselines"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM \
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --agent random --mode baselines
+fi
\ No newline at end of file

From 4897dc3f4f2950c63695aa511c0e5aa21bbd26eb Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Wed, 18 Feb 2026 19:53:29 +0100
Subject: [PATCH 10/20] update runner.slurm and move to ty

---
 .../agents/RLDAS_agent.py                     |  48 +++---
 dynamicalgorithmselection/agents/agent.py     |  12 +-
 .../agents/agent_state.py                     |  14 +-
 .../agents/agent_utils.py                     |   9 +-
 .../agents/neuroevolution_agent.py            |   2 +-
 dynamicalgorithmselection/experiments/core.py |   4 +-
 .../experiments/cross_validation.py           |   3 +-
 .../experiments/experiment.py                 |   4 +-
 .../experiments/utils.py                      |   2 +-
 dynamicalgorithmselection/main.py             |   4 +-
 .../optimizers/ES/OPOA2015.py                 |   2 +-
 .../optimizers/Optimizer.py                   |  84 +++++-----
 pyproject.toml                                |   2 +-
 runner.slurm                                  |  16 +-
 tests/test_experiment.py                      |  22 ++-
 uv.lock                                       | 158 +++---------------
 16 files changed, 160 insertions(+), 226 deletions(-)

diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index 4c94393..ada85ba 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -2,6 +2,7 @@
 import torch
 import copy
 import os
+from typing import Any, Dict, List, Optional, Tuple
 
 from dynamicalgorithmselection.agents.agent import Agent
 from dynamicalgorithmselection.agents.agent_state import get_la_features
@@ -32,7 +33,9 @@ def __init__(self, problem, options):
         self._load_parameters(options)
         self.ah_vectors = np.zeros((self.n_algorithms, 2, self.ndim_problem))
         self.alg_usage_counts = np.zeros(self.n_algorithms)
-        self.context_memory = {name: {} for name in self.alg_names}
+        self.context_memory: Dict[str, Dict[str, Any]] = {
+            name: {} for name in self.alg_names
+        }
         self.context_memory["Common"] = {}
         self.mean_rewards = options.get("mean_rewards", [])
         self.best_50_mean = float("inf")
@@ -52,8 +55,15 @@ def _load_parameters(self, options):
         if p := options.get("optimizer", None):
             self.optimizer.load_state_dict(p)
 
-    def get_state(self, pop_x, pop_y):
-        la = get_la_features(self, pop_x, pop_y)
+    def get_state(
+        self,
+        x: Optional[np.ndarray] = None,
+        y: Optional[np.ndarray] = None,
+        x_history: Optional[np.ndarray] = None,
+        y_history: Optional[np.ndarray] = None,
+        update: bool = True,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        la = get_la_features(self, x, y)
         ah = self.ah_vectors.copy()
 
         return la, ah
@@ -167,8 +177,7 @@ def optimize(self, fitness_function=None, args=None):
         self.best_so_far_x = best_x_global
 
         self.history.append(self.best_so_far_y)
-        if self.saving_fitness:
-            fitness.append(self.best_so_far_y)
+        fitness.append(float(self.best_so_far_y))
 
         self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0
 
@@ -177,7 +186,7 @@ def optimize(self, fitness_function=None, args=None):
         self.context_memory = {name: {} for name in self.alg_names}
         self.context_memory["Common"] = {}
 
-        trajectory = []  # To store (s, a, r_raw, log_prob, val, done)
+        trajectory = []
 
         while self.n_function_evaluations < self.max_function_evaluations:
             state = self.get_state(population_x, population_y)
@@ -219,8 +228,8 @@ def optimize(self, fitness_function=None, args=None):
 
             self._save_context(sub_opt, alg_name)
 
-            x_best_new: float = population_x[np.argmin(population_y)].copy()
-            x_worst_new: float = population_x[np.argmax(population_y)].copy()
+            x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy()
+            x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy()
             cost_new: float = np.min(population_y)
 
             self._update_ah_history(
@@ -252,8 +261,7 @@ def optimize(self, fitness_function=None, args=None):
                 self.best_so_far_x = x_best_new
 
             self.history.append(self.best_so_far_y)
-            if self.saving_fitness:
-                fitness.append(self.best_so_far_y)
+            fitness.append(float(self.best_so_far_y))
 
             self._n_generations += 1
             self._print_verbose_info(fitness, self.best_so_far_y)
@@ -349,17 +357,18 @@ def ppo_update(
 
             with torch.no_grad():
                 if dones[-1]:
-                    R = 0.0
+                    return_value = 0.0
                 else:
-                    R = current_values[-1].item() if not dones[-1] else 0.0
+                    return_value = current_values[-1].item() if not dones[-1] else 0.0
 
-            Returns = []
+            returns_list: List[float] = []
             for r in reversed(rewards):
-                R = R * GAMMA + r
-                Returns.insert(0, R)
+                return_value = return_value * GAMMA + r
+                returns_list.insert(0, return_value)
+
+            returns_tensor: torch.Tensor = torch.tensor(returns_list).to(DEVICE).float()
+            advantages = returns_tensor - current_values.detach()
 
-            Returns = torch.tensor(Returns).to(DEVICE).float()
-            advantages = Returns - current_values.detach()
             ratios = torch.exp(logprobs - old_logprobs)
 
             # Actor Loss (Reinforce Loss with Clipping)
@@ -372,7 +381,8 @@ def ppo_update(
             )
 
             v_max = torch.max(
-                ((current_values - Returns) ** 2), ((vpredclipped - Returns) ** 2)
+                ((current_values - returns_tensor) ** 2),
+                ((vpredclipped - returns_tensor) ** 2),
             )
             critic_loss = v_max.mean()
 
@@ -381,7 +391,7 @@ def ppo_update(
             if self.run is not None:
                 self.run.log(
                     {
-                        "Returns": Returns.mean().item(),
+                        "Returns": returns_tensor.mean().item(),
                         "actor_loss": actor_loss.item(),
                         "critic_loss": critic_loss.item(),
                     }
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index abba830..3900af3 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -1,5 +1,5 @@
 from itertools import product
-from typing import List, Type, Optional
+from typing import List, Type, Optional, Dict, Any
 import numpy as np
 from dynamicalgorithmselection.agents.agent_state import (
     get_state_representation,
@@ -163,7 +163,13 @@ def _save_fitness(self, best_x, best_y, worst_x, worst_y):
 
         self._check_early_stopping(best_y)
 
-    def iterate(self, optimizer_input_data=None, optimizer=None):
+    def iterate(
+        self,
+        optimizer_input_data: Optional[Dict] = None,
+        optimizer: Optional[Optimizer] = None,
+    ):
+        if optimizer_input_data is None or optimizer is None:
+            raise ValueError("Inputs to iterate cannot be None")
         optimizer_input_data["best_x"] = self.best_so_far_x
         optimizer_input_data["best_y"] = self.best_so_far_y
         optimizer.set_data(**optimizer_input_data)
@@ -172,7 +178,7 @@ def iterate(self, optimizer_input_data=None, optimizer=None):
             return optimizer.get_data()
 
         self._n_generations += 1
-        results = optimizer.optimize()
+        results: Dict[str, Any] = optimizer.optimize()
         self.fitness_history.extend(results["fitness_history"])
 
         self._save_fitness(
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index 7609c96..9df5349 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -3,7 +3,7 @@
 from typing import Tuple, Callable, Any
 
 import numpy as np
-import pandas as pd  # type: ignore
+import pandas as pd
 from pflacco.classical_ela_features import (
     calculate_ela_meta,  # Meta-Model (Linear/Quadratic fit)
     calculate_nbc,  # Nearest Better Clustering
@@ -11,8 +11,8 @@
     calculate_information_content,
     calculate_ela_distribution,  # Information Content
 )
-from scipy.spatial.distance import pdist  # type: ignore
-from scipy.stats import spearmanr  # type: ignore
+from scipy.spatial.distance import pdist
+from scipy.stats import spearmanr
 
 from dynamicalgorithmselection.NeurELA.NeurELA import feature_embedder
 from dynamicalgorithmselection.agents.agent_utils import MAX_DIM, RunningMeanStd
@@ -165,7 +165,7 @@ def get_weighted_central_moment(self, n: int):
         return numerator / max(1e-5, inertia_denom_w * inertia_denom_n)
 
     def normalized_distance(self, x0: np.ndarray, x1: np.ndarray) -> float:
-        return min(np.linalg.norm(x0 - x1) / self.max_distance, 1.0)
+        return float(min(np.linalg.norm(x0 - x1) / self.max_distance, 1.0))
 
     def get_fitness_weights(self) -> np.ndarray:
         weights = (
@@ -175,9 +175,9 @@ def get_fitness_weights(self) -> np.ndarray:
         )
         return weights / weights.sum()
 
-    def population_relative_radius(self) -> np.ndarray:
+    def population_relative_radius(self) -> float:
         population_radius = np.linalg.norm(self.x.max(axis=0) - self.x.min(axis=0))
-        return population_radius / self.max_distance
+        return float(population_radius / self.max_distance)
 
     def slopes_stats(self) -> tuple:
         return get_list_stats(
@@ -330,7 +330,7 @@ def get_state(self, optimization_status=False) -> np.ndarray:
 
 
 def distance(x0: np.ndarray, x1: np.ndarray) -> float:
-    return np.linalg.norm(x0 - x1)
+    return float(np.linalg.norm(x0 - x1))
 
 
 def inverse_scaling(x):
diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py
index e60a8ed..849c941 100644
--- a/dynamicalgorithmselection/agents/agent_utils.py
+++ b/dynamicalgorithmselection/agents/agent_utils.py
@@ -9,7 +9,9 @@ def get_runtime_stats(
     fitness_history: list[tuple[int, float]],
     function_evaluations: int,
     checkpoints: np.ndarray,
-) -> dict[str, float | list[Optional[float]]]:
+) -> dict[
+    str, float | list[float]
+]:  # Changed from list[Optional[float]] to list[float]
     """
     :param fitness_history: list of tuples [fe, fitness] with only points where best so far fitness improved
     :param function_evaluations: max number of function evaluations during run.
@@ -21,6 +23,7 @@ def get_runtime_stats(
     checkpoint_idx = 0
     last_fitness = float("inf")
     checkpoints_fitness: list[float] = []
+
     for i, fitness in fitness_history:
         area_under_optimization_curve += fitness * (i - last_i)
         while (
@@ -31,13 +34,16 @@ def get_runtime_stats(
             checkpoint_idx += 1
         last_i = i
         last_fitness = fitness
+
     area_under_optimization_curve += fitness_history[-1][1] * (
         function_evaluations - fitness_history[-1][0]
     )
     final_fitness = fitness_history[-1][1]
+
     if function_evaluations == checkpoints[-1]:
         while len(checkpoints_fitness) < len(checkpoints):
             checkpoints_fitness.append(final_fitness)
+
     return {
         "area_under_optimization_curve": area_under_optimization_curve
         / function_evaluations,
@@ -93,6 +99,7 @@ def get_extreme_stats(
                 worst_history.append((fe, fitness))
                 current_worst_fitness = new_worst_fitness
 
+    # These now match the expected return type of tuple[dict[str, float | list[float]], ...]
     return (
         get_runtime_stats(best_history, function_evaluations, checkpoints),
         get_runtime_stats(worst_history, function_evaluations, checkpoints),
diff --git a/dynamicalgorithmselection/agents/neuroevolution_agent.py b/dynamicalgorithmselection/agents/neuroevolution_agent.py
index 0408287..abcd53b 100644
--- a/dynamicalgorithmselection/agents/neuroevolution_agent.py
+++ b/dynamicalgorithmselection/agents/neuroevolution_agent.py
@@ -42,7 +42,7 @@ def optimize(self, fitness_function=None, args=None):
             action_options["verbose"] = False
             optimizer = self.actions[action](self.problem, action_options)
             optimizer.n_function_evaluations = self.n_function_evaluations
-            optimizer._n_generations = 0
+            setattr(optimizer, "_n_generations", 0)
             best_parent = self.best_so_far_y
             iteration_result = self.iterate(iteration_result, optimizer)
             x, y = iteration_result.get("x"), iteration_result.get("y")
diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py
index ce4c7b1..237f636 100644
--- a/dynamicalgorithmselection/experiments/core.py
+++ b/dynamicalgorithmselection/experiments/core.py
@@ -1,8 +1,8 @@
 from typing import Type, Any
 
-import cocoex  # type: ignore
+import cocoex
 import numpy as np
-from tqdm import tqdm  # type: ignore
+from tqdm import tqdm
 
 from dynamicalgorithmselection.experiments.utils import (
     coco_bbob_single_function,
diff --git a/dynamicalgorithmselection/experiments/cross_validation.py b/dynamicalgorithmselection/experiments/cross_validation.py
index 8d46890..c04d1dd 100644
--- a/dynamicalgorithmselection/experiments/cross_validation.py
+++ b/dynamicalgorithmselection/experiments/cross_validation.py
@@ -2,7 +2,7 @@
 from itertools import product
 from typing import Type, Optional
 
-import cocoex  # type: ignore
+import cocoex
 import numpy as np
 
 from dynamicalgorithmselection.experiments.core import run_testing, run_training
@@ -25,6 +25,7 @@ def run_cross_validation(
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
     problems_suite, cv_folds = _get_cv_folds(4, is_loio, options.get("dimensionality"))
+    options["n_problems"] = len(cv_folds[0])
     observer = cocoex.Observer("bbob", "result_folder: " + options["name"])
     for i, (train_set, test_set) in enumerate(cv_folds):
         print(f"Running cross validation training, fold {i + 1}")
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index e561f83..5bce3d9 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -13,8 +13,8 @@
     dump_stats,
 )
 
-import cocoex  # type: ignore
-from tqdm import tqdm  # type: ignore
+import cocoex
+from tqdm import tqdm
 
 from dynamicalgorithmselection.agents.agent_utils import (
     get_extreme_stats,
diff --git a/dynamicalgorithmselection/experiments/utils.py b/dynamicalgorithmselection/experiments/utils.py
index 033a522..a14f481 100644
--- a/dynamicalgorithmselection/experiments/utils.py
+++ b/dynamicalgorithmselection/experiments/utils.py
@@ -3,7 +3,7 @@
 from itertools import islice, product
 from typing import Type, Optional
 
-import cocoex  # type: ignore
+import cocoex
 import numpy as np
 
 from dynamicalgorithmselection.agents.agent_utils import (
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index e070b3b..adde569 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -3,8 +3,8 @@
 import pickle
 import shutil
 from typing import List, Type, Optional
-import cocopp  # type: ignore
-import neat  # type: ignore
+import cocopp
+import neat
 import torch
 import wandb
 
diff --git a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
index aecc439..c1b4d08 100644
--- a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
+++ b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
@@ -1,7 +1,7 @@
 from typing import Optional
 
 import numpy as np
-import numba as nb  # type: ignore
+import numba as nb
 
 from dynamicalgorithmselection.optimizers.ES.ES import ES
 
diff --git a/dynamicalgorithmselection/optimizers/Optimizer.py b/dynamicalgorithmselection/optimizers/Optimizer.py
index 1bac425..f989f31 100644
--- a/dynamicalgorithmselection/optimizers/Optimizer.py
+++ b/dynamicalgorithmselection/optimizers/Optimizer.py
@@ -1,8 +1,8 @@
 import time
-from typing import Optional
+from typing import Optional, Any, Dict, List, Tuple
 
 import numpy as np
-from pypop7.optimizers.core import Optimizer as BaseOptimizer, Terminations  # type: ignore
+from pypop7.optimizers.core import Optimizer as BaseOptimizer, Terminations
 
 
 class Optimizer(BaseOptimizer):
@@ -10,56 +10,64 @@ class Optimizer(BaseOptimizer):
 
     def __init__(self, problem, options):
         BaseOptimizer.__init__(self, problem, options)
-        self.fitness_history = []
-        self.start_conditions = dict()
-        self.results = dict()
-        self.worst_so_far_y, self.worst_so_far_x = (
-            options.get("worst_so_far_y", -np.inf),
-            None,
+        self.best_so_far_y: float = options.get("best_so_far_y", float("inf"))
+        self.best_so_far_x: Optional[np.ndarray] = None
+        self._base_early_stopping: float = self.best_so_far_y
+        self._counter_early_stopping: int = 0
+        self.early_stopping_threshold: float = options.get(
+            "early_stopping_threshold", 1e-10
         )
-        self.x_history, self.y_history = [], []
-        # [Added] Dictionary to store histories of generic parameters
-        self.parameter_history = {}
+        self.fitness_history: List[Tuple[int, float]] = []
+
+        self.start_conditions: Dict[str, Any] = dict()
+        self.results: Dict[str, Any] = dict()
+
+        self.worst_so_far_y: float = options.get("worst_so_far_y", -np.inf)
+        self.worst_so_far_x: Optional[np.ndarray] = None
+        self.x_history: List[np.ndarray] = []
+        self.y_history: List[float] = []
+        self.parameter_history: Dict[str, List[Any]] = {}
         self.target_FE: int | float = float("inf")
 
-    # [Modified] Accept generic kwargs for history tracking
     def _evaluate_fitness(self, x, args=None, **kwargs):
         self.start_function_evaluations = time.time()
         if args is None:
             y = self.fitness_function(x)
         else:
             y = self.fitness_function(x, args=args)
+
+        y_val = float(y)
         self.time_function_evaluations += time.time() - self.start_function_evaluations
         self.n_function_evaluations += 1
-        # update best-so-far solution (x) and fitness (y)
-        if y < self.best_so_far_y:
-            self.best_so_far_x, self.best_so_far_y = np.copy(x), y
-            self.fitness_history.append((self.n_function_evaluations, float(y)))
-        if y > self.worst_so_far_y:
-            self.worst_so_far_x, self.worst_so_far_y = np.copy(x), y
+
+        # update best-so-far solution
+        if y_val < self.best_so_far_y:
+            self.best_so_far_x, self.best_so_far_y = np.copy(x), y_val
+            self.fitness_history.append((self.n_function_evaluations, y_val))
+
+        if y_val > self.worst_so_far_y:
+            self.worst_so_far_x, self.worst_so_far_y = np.copy(x), y_val
+
         # update all settings related to early stopping
-        if (self._base_early_stopping - y) <= self.early_stopping_threshold:
+        if (self._base_early_stopping - y_val) <= self.early_stopping_threshold:
             self._counter_early_stopping += 1
         else:
-            self._counter_early_stopping, self._base_early_stopping = 0, y
+            self._counter_early_stopping, self._base_early_stopping = 0, y_val
 
         self.x_history.append(np.copy(x))
-        self.y_history.append(float(y))
+        self.y_history.append(y_val)
 
-        # [Added] Generic storage for any extra parameters passed
         for key, val in kwargs.items():
             if key not in self.parameter_history:
                 self.parameter_history[key] = []
-
-            # Store copy if it's an array to prevent reference issues
             if isinstance(val, np.ndarray):
                 self.parameter_history[key].append(np.copy(val))
             else:
                 self.parameter_history[key].append(val)
 
-        return float(y)
+        return y_val
 
-    def _check_success(self):
+    def _check_success(self) -> bool:
         if (
             (self.upper_boundary is not None)
             and (self.lower_boundary is not None)
@@ -79,7 +87,7 @@ def _check_success(self):
             return False
         return True
 
-    def _collect(self, fitness):
+    def _collect(self, fitness: List[float]) -> Dict[str, Any]:  # Added type hints
         result = BaseOptimizer._collect(self, fitness)
         result.update(
             {
@@ -93,35 +101,35 @@ def _collect(self, fitness):
             }
         )
 
-        # [Added] Inject generic parameter histories into result
-        # Keys will be named like 'v_history', 'p_x_history' automatically
         for key, history in self.parameter_history.items():
             result[f"{key}_history"] = np.array(history, dtype=np.float32)
         return result
 
     def set_data(self, x=None, y=None, best_x=None, best_y=None, *args, **kwargs):
+        n_ind = getattr(self, "n_individuals", 0)
         self.start_conditions = {
-            "x": x[: self.n_individuals] if x is not None else x,
-            "y": (y[: self.n_individuals] if isinstance(y, np.ndarray) else None),
+            "x": x[:n_ind] if x is not None else x,
+            "y": (y[:n_ind] if isinstance(y, np.ndarray) else None),
             "best_x": best_x,
             "best_y": best_y,
         }
         self.best_so_far_x = best_x
-        self.best_so_far_y = best_y
+        self.best_so_far_y = float(best_y) if best_y is not None else float("inf")
 
-    def get_data(self, n_individuals: Optional[int] = None):
+    def get_data(self, n_individuals: Optional[int] = None) -> Dict[str, Any]:
         return self.results or self.start_conditions
 
-    def optimize(self, fitness_function=None):
+    def optimize(self, fitness_function=None) -> List[float]:
         self.start_time = time.time()
         if fitness_function is not None:
             self.fitness_function = fitness_function
-        fitness = []  # to store all fitness generated during evolution/optimization
+        fitness: List[float] = []
         return fitness
 
-    def _check_terminations(self):
+    def _check_terminations(self) -> bool:
         termination_signal = super()._check_terminations()
         if not termination_signal:
-            termination_signal = self.n_function_evaluations >= self.target_FE
-            self.termination_signal = Terminations.MAX_FUNCTION_EVALUATIONS
+            termination_signal = bool(self.n_function_evaluations >= self.target_FE)
+            if termination_signal:
+                self.termination_signal = Terminations.MAX_FUNCTION_EVALUATIONS
         return termination_signal
diff --git a/pyproject.toml b/pyproject.toml
index e624646..6010a65 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "torch>=2.9.0",
     "numpy>=1.20.0, <2.0",
     "pflacco>=1.0.0",
+    "ty>=0.0.17",
 ]
 
 [build-system]
@@ -25,7 +26,6 @@ build-backend = "hatchling.build"
 
 [dependency-groups]
 dev = [
-    "mypy>=1.19.1",
     "pre-commit>=4.5.1",
     "pytest>=9.0.2",
     "ruff>=0.14.5",
diff --git a/runner.slurm b/runner.slurm
index db4bda6..ac15cea 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -9,6 +9,8 @@
 #SBATCH --partition=plgrid-gpu-a100
 #SBATCH --array=0-15           # 0-6 (LOIO), 7-13 (LOPO), 14-15 (Random and baselines)
 
+CDB_VAL=${1:-1.5}
+
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
 source "$ENV_PATH"
@@ -31,30 +33,30 @@ if [ $TASK_ID -le 5 ]; then
     DIM=${DIMS[$TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS \
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
       -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
       --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG \
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${DIM} \
       -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
-      -r custom --mode $MODE --dimensionality $DIM --cdb 1.5 --n_epochs 3 --agent policy-gradient
+      -r custom --mode $MODE --dimensionality $DIM --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
 elif [ $TASK_ID -eq 6 ]; then
     # --- MULTIDIMENSIONAL ---
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL \
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_$MODE \
       -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
-      -r custom --mode $MODE --cdb 1.5 --agent policy-gradient
+      -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
 elif [ $TASK_ID -eq 14 ]; then
     # --- RANDOM AGENT ---
     echo "Running Mode: Random Agent"
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --cdb 1.5 --agent random
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --cdb $CDB_VAL --agent random
 
 elif [ $TASK_ID -eq 15 ]; then
     # --- BASELINES ---
     echo "Running Mode: Baselines"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM \
+    python3 dynamicalgorithmselection/main.py BASELINES \
       -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --agent random --mode baselines
 fi
\ No newline at end of file
diff --git a/tests/test_experiment.py b/tests/test_experiment.py
index 825ede8..519af5d 100644
--- a/tests/test_experiment.py
+++ b/tests/test_experiment.py
@@ -1,17 +1,22 @@
 import unittest
 from unittest.mock import MagicMock, patch, mock_open
-import os
+from typing import Any, cast, Type  # Added imports
 
 from dynamicalgorithmselection.experiments.experiment import (
     coco_bbob_experiment,
     run_comparison,
     dump_extreme_stats,
 )
+from dynamicalgorithmselection.optimizers.Optimizer import (
+    Optimizer,
+)  # Import the base class
 
 
 class TestExperiment(unittest.TestCase):
     def setUp(self):
-        self.optimizer_mock = MagicMock()
+        self.optimizer_mock = MagicMock(
+            spec=Type[Optimizer]
+        )  # Use spec for better type safety
         self.optimizer_mock.__name__ = "MockOpt"
         self.options = {
             "name": "experiment_test",
@@ -87,7 +92,7 @@ def test_run_comparison(
         opt1.__name__ = "Opt1"
         opt2 = MagicMock()
         opt2.__name__ = "Opt2"
-        portfolio = [opt1, opt2]
+        portfolio = cast(list[Type[Optimizer]], [opt1, opt2])
 
         # Mock Suite
         mock_suite_obj = MagicMock()
@@ -95,11 +100,8 @@ def test_run_comparison(
         mock_problem.dimension = 2
         mock_suite_obj.get_problem.return_value = mock_problem
 
-        # get_suite returns (suite, problem_ids)
         mock_get_suite.return_value = (mock_suite_obj, ["p1"])
 
-        # FIXED: Return a dictionary, NOT a tuple.
-        # run_comparison treats results as a dictionary directly in one of the lines.
         mock_single_func.return_value = {"fitness_history": [1, 2]}
 
         # Execute
@@ -118,8 +120,12 @@ def test_run_comparison(
     def test_dump_extreme_stats(
         self, mock_json_dump, mock_file, mock_get_extreme, mock_get_checkpoints
     ):
-        stats = {"Opt1": [], "Opt2": []}
-        portfolio = [MagicMock(__name__="Opt1"), MagicMock(__name__="Opt2")]
+        stats: dict[str, list[Any]] = {"Opt1": [], "Opt2": []}
+        portfolio = cast(
+            list[Type[Optimizer]],
+            [MagicMock(__name__="Opt1"), MagicMock(__name__="Opt2")],
+        )
+
         mock_get_extreme.return_value = ({"best": 1}, {"worst": 0})
 
         dump_extreme_stats(portfolio, stats, "p1", 100, 5, 10, 0.5)
diff --git a/uv.lock b/uv.lock
index 705286c..68aef71 100644
--- a/uv.lock
+++ b/uv.lock
@@ -311,13 +311,13 @@ dependencies = [
     { name = "tenacity" },
     { name = "torch" },
     { name = "tqdm" },
+    { name = "ty" },
     { name = "wandb" },
     { name = "wandb-workspaces" },
 ]
 
 [package.dev-dependencies]
 dev = [
-    { name = "mypy" },
     { name = "pre-commit" },
     { name = "pytest" },
     { name = "ruff" },
@@ -334,13 +334,13 @@ requires-dist = [
     { name = "tenacity", specifier = ">=9.1.2,<10" },
     { name = "torch", specifier = ">=2.9.0" },
     { name = "tqdm", specifier = ">=4.67.1,<5" },
+    { name = "ty", specifier = ">=0.0.17" },
     { name = "wandb", specifier = ">=0.22.2,<0.23" },
     { name = "wandb-workspaces", specifier = ">=0.1.19,<0.2" },
 ]
 
 [package.metadata.requires-dev]
 dev = [
-    { name = "mypy", specifier = ">=1.19.1" },
     { name = "pre-commit", specifier = ">=4.5.1" },
     { name = "pytest", specifier = ">=9.0.2" },
     { name = "ruff", specifier = ">=0.14.5" },
@@ -617,79 +617,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/e9/0d4add7873a73e462aeb45c036a2dead2562b825aa46ba326727b3f31016/kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1", size = 73929, upload-time = "2025-08-10T21:27:48.236Z" },
 ]
 
-[[package]]
-name = "librt"
-version = "0.8.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8a/3f/4ca7dd7819bf8ff303aca39c3c60e5320e46e766ab7f7dd627d3b9c11bdf/librt-0.8.0.tar.gz", hash = "sha256:cb74cdcbc0103fc988e04e5c58b0b31e8e5dd2babb9182b6f9490488eb36324b", size = 177306, upload-time = "2026-02-12T14:53:54.743Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/e9/42af181c89b65abfd557c1b017cba5b82098eef7bf26d1649d82ce93ccc7/librt-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ce33a9778e294507f3a0e3468eccb6a698b5166df7db85661543eca1cfc5369", size = 65314, upload-time = "2026-02-12T14:52:14.778Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/4a/15a847fca119dc0334a4b8012b1e15fdc5fc19d505b71e227eaf1bcdba09/librt-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8070aa3368559de81061ef752770d03ca1f5fc9467d4d512d405bd0483bfffe6", size = 68015, upload-time = "2026-02-12T14:52:15.797Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/87/ffc8dbd6ab68dd91b736c88529411a6729649d2b74b887f91f3aaff8d992/librt-0.8.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:20f73d4fecba969efc15cdefd030e382502d56bb6f1fc66b580cce582836c9fa", size = 194508, upload-time = "2026-02-12T14:52:16.835Z" },
-    { url = "https://files.pythonhosted.org/packages/89/92/a7355cea28d6c48ff6ff5083ac4a2a866fb9b07b786aa70d1f1116680cd5/librt-0.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a512c88900bdb1d448882f5623a0b1ad27ba81a9bd75dacfe17080b72272ca1f", size = 205630, upload-time = "2026-02-12T14:52:18.58Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/5e/54509038d7ac527828db95b8ba1c8f5d2649bc32fd8f39b1718ec9957dce/librt-0.8.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:015e2dde6e096d27c10238bf9f6492ba6c65822dfb69d2bf74c41a8e88b7ddef", size = 218289, upload-time = "2026-02-12T14:52:20.134Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/17/0ee0d13685cefee6d6f2d47bb643ddad3c62387e2882139794e6a5f1288a/librt-0.8.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:1c25a131013eadd3c600686a0c0333eb2896483cbc7f65baa6a7ee761017aef9", size = 211508, upload-time = "2026-02-12T14:52:21.413Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/a8/1714ef6e9325582e3727de3be27e4c1b2f428ea411d09f1396374180f130/librt-0.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:21b14464bee0b604d80a638cf1ee3148d84ca4cc163dcdcecb46060c1b3605e4", size = 219129, upload-time = "2026-02-12T14:52:22.61Z" },
-    { url = "https://files.pythonhosted.org/packages/89/d3/2d9fe353edff91cdc0ece179348054a6fa61f3de992c44b9477cb973509b/librt-0.8.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:05a3dd3f116747f7e1a2b475ccdc6fb637fd4987126d109e03013a79d40bf9e6", size = 213126, upload-time = "2026-02-12T14:52:23.819Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/8e/9f5c60444880f6ad50e3ff7475e5529e787797e7f3ad5432241633733b92/librt-0.8.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:fa37f99bff354ff191c6bcdffbc9d7cdd4fc37faccfc9be0ef3a4fd5613977da", size = 212279, upload-time = "2026-02-12T14:52:25.034Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/eb/d4a2cfa647da3022ae977f50d7eda1d91f70d7d1883cf958a4b6ef689eab/librt-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1566dbb9d1eb0987264c9b9460d212e809ba908d2f4a3999383a84d765f2f3f1", size = 234654, upload-time = "2026-02-12T14:52:26.204Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/31/26b978861c7983b036a3aea08bdbb2ec32bbaab1ad1d57c5e022be59afc1/librt-0.8.0-cp311-cp311-win32.whl", hash = "sha256:70defb797c4d5402166787a6b3c66dfb3fa7f93d118c0509ffafa35a392f4258", size = 54603, upload-time = "2026-02-12T14:52:27.342Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/78/f194ed7c48dacf875677e749c5d0d1d69a9daa7c994314a39466237fb1be/librt-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:db953b675079884ffda33d1dca7189fb961b6d372153750beb81880384300817", size = 61730, upload-time = "2026-02-12T14:52:28.31Z" },
-    { url = "https://files.pythonhosted.org/packages/97/ee/ad71095478d02137b6f49469dc808c595cfe89b50985f6b39c5345f0faab/librt-0.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:75d1a8cab20b2043f03f7aab730551e9e440adc034d776f15f6f8d582b0a5ad4", size = 52274, upload-time = "2026-02-12T14:52:29.345Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/53/f3bc0c4921adb0d4a5afa0656f2c0fbe20e18e3e0295e12985b9a5dc3f55/librt-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:17269dd2745dbe8e42475acb28e419ad92dfa38214224b1b01020b8cac70b645", size = 66511, upload-time = "2026-02-12T14:52:30.34Z" },
-    { url = "https://files.pythonhosted.org/packages/89/4b/4c96357432007c25a1b5e363045373a6c39481e49f6ba05234bb59a839c1/librt-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f4617cef654fca552f00ce5ffdf4f4b68770f18950e4246ce94629b789b92467", size = 68628, upload-time = "2026-02-12T14:52:31.491Z" },
-    { url = "https://files.pythonhosted.org/packages/47/16/52d75374d1012e8fc709216b5eaa25f471370e2a2331b8be00f18670a6c7/librt-0.8.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5cb11061a736a9db45e3c1293cfcb1e3caf205912dfa085734ba750f2197ff9a", size = 198941, upload-time = "2026-02-12T14:52:32.489Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/11/d5dd89e5a2228567b1228d8602d896736247424484db086eea6b8010bcba/librt-0.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4bb00bd71b448f16749909b08a0ff16f58b079e2261c2e1000f2bbb2a4f0a45", size = 210009, upload-time = "2026-02-12T14:52:33.634Z" },
-    { url = "https://files.pythonhosted.org/packages/49/d8/fc1a92a77c3020ee08ce2dc48aed4b42ab7c30fb43ce488d388673b0f164/librt-0.8.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:95a719a049f0eefaf1952673223cf00d442952273cbd20cf2ed7ec423a0ef58d", size = 224461, upload-time = "2026-02-12T14:52:34.868Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/98/eb923e8b028cece924c246104aa800cf72e02d023a8ad4ca87135b05a2fe/librt-0.8.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bd32add59b58fba3439d48d6f36ac695830388e3da3e92e4fc26d2d02670d19c", size = 217538, upload-time = "2026-02-12T14:52:36.078Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/67/24e80ab170674a1d8ee9f9a83081dca4635519dbd0473b8321deecddb5be/librt-0.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4f764b2424cb04524ff7a486b9c391e93f93dc1bd8305b2136d25e582e99aa2f", size = 225110, upload-time = "2026-02-12T14:52:37.301Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/c7/6fbdcbd1a6e5243c7989c21d68ab967c153b391351174b4729e359d9977f/librt-0.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f04ca50e847abc486fa8f4107250566441e693779a5374ba211e96e238f298b9", size = 217758, upload-time = "2026-02-12T14:52:38.89Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/bd/4d6b36669db086e3d747434430073e14def032dd58ad97959bf7e2d06c67/librt-0.8.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9ab3a3475a55b89b87ffd7e6665838e8458e0b596c22e0177e0f961434ec474a", size = 218384, upload-time = "2026-02-12T14:52:40.637Z" },
-    { url = "https://files.pythonhosted.org/packages/50/2d/afe966beb0a8f179b132f3e95c8dd90738a23e9ebdba10f89a3f192f9366/librt-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e36a8da17134ffc29373775d88c04832f9ecfab1880470661813e6c7991ef79", size = 241187, upload-time = "2026-02-12T14:52:43.55Z" },
-    { url = "https://files.pythonhosted.org/packages/02/d0/6172ea4af2b538462785ab1a68e52d5c99cfb9866a7caf00fdf388299734/librt-0.8.0-cp312-cp312-win32.whl", hash = "sha256:4eb5e06ebcc668677ed6389164f52f13f71737fc8be471101fa8b4ce77baeb0c", size = 54914, upload-time = "2026-02-12T14:52:44.676Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/cb/ceb6ed6175612a4337ad49fb01ef594712b934b4bc88ce8a63554832eb44/librt-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:0a33335eb59921e77c9acc05d0e654e4e32e45b014a4d61517897c11591094f8", size = 62020, upload-time = "2026-02-12T14:52:45.676Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/7e/61701acbc67da74ce06ddc7ba9483e81c70f44236b2d00f6a4bfee1aacbf/librt-0.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:24a01c13a2a9bdad20997a4443ebe6e329df063d1978bbe2ebbf637878a46d1e", size = 52443, upload-time = "2026-02-12T14:52:47.218Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/32/3edb0bcb4113a9c8bdcd1750663a54565d255027657a5df9d90f13ee07fa/librt-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7f820210e21e3a8bf8fde2ae3c3d10106d4de9ead28cbfdf6d0f0f41f5b12fa1", size = 66522, upload-time = "2026-02-12T14:52:48.219Z" },
-    { url = "https://files.pythonhosted.org/packages/30/ab/e8c3d05e281f5d405ebdcc5bc8ab36df23e1a4b40ac9da8c3eb9928b72b9/librt-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4831c44b8919e75ca0dfb52052897c1ef59fdae19d3589893fbd068f1e41afbf", size = 68658, upload-time = "2026-02-12T14:52:50.351Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/d3/74a206c47b7748bbc8c43942de3ed67de4c231156e148b4f9250869593df/librt-0.8.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:88c6e75540f1f10f5e0fc5e87b4b6c290f0e90d1db8c6734f670840494764af8", size = 199287, upload-time = "2026-02-12T14:52:51.938Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/29/ef98a9131cf12cb95771d24e4c411fda96c89dc78b09c2de4704877ebee4/librt-0.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9646178cd794704d722306c2c920c221abbf080fede3ba539d5afdec16c46dad", size = 210293, upload-time = "2026-02-12T14:52:53.128Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/3e/89b4968cb08c53d4c2d8b02517081dfe4b9e07a959ec143d333d76899f6c/librt-0.8.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e1af31a710e17891d9adf0dbd9a5fcd94901a3922a96499abdbf7ce658f4e01", size = 224801, upload-time = "2026-02-12T14:52:54.367Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/28/f38526d501f9513f8b48d78e6be4a241e15dd4b000056dc8b3f06ee9ce5d/librt-0.8.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:507e94f4bec00b2f590fbe55f48cd518a208e2474a3b90a60aa8f29136ddbada", size = 218090, upload-time = "2026-02-12T14:52:55.758Z" },
-    { url = "https://files.pythonhosted.org/packages/02/ec/64e29887c5009c24dc9c397116c680caffc50286f62bd99c39e3875a2854/librt-0.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f1178e0de0c271231a660fbef9be6acdfa1d596803464706862bef6644cc1cae", size = 225483, upload-time = "2026-02-12T14:52:57.375Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/16/7850bdbc9f1a32d3feff2708d90c56fc0490b13f1012e438532781aa598c/librt-0.8.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:71fc517efc14f75c2f74b1f0a5d5eb4a8e06aa135c34d18eaf3522f4a53cd62d", size = 218226, upload-time = "2026-02-12T14:52:58.534Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/4a/166bffc992d65ddefa7c47052010a87c059b44a458ebaf8f5eba384b0533/librt-0.8.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:0583aef7e9a720dd40f26a2ad5a1bf2ccbb90059dac2b32ac516df232c701db3", size = 218755, upload-time = "2026-02-12T14:52:59.701Z" },
-    { url = "https://files.pythonhosted.org/packages/da/5d/9aeee038bcc72a9cfaaee934463fe9280a73c5440d36bd3175069d2cb97b/librt-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5d0f76fc73480d42285c609c0ea74d79856c160fa828ff9aceab574ea4ecfd7b", size = 241617, upload-time = "2026-02-12T14:53:00.966Z" },
-    { url = "https://files.pythonhosted.org/packages/64/ff/2bec6b0296b9d0402aa6ec8540aa19ebcb875d669c37800cb43d10d9c3a3/librt-0.8.0-cp313-cp313-win32.whl", hash = "sha256:e79dbc8f57de360f0ed987dc7de7be814b4803ef0e8fc6d3ff86e16798c99935", size = 54966, upload-time = "2026-02-12T14:53:02.042Z" },
-    { url = "https://files.pythonhosted.org/packages/08/8d/bf44633b0182996b2c7ea69a03a5c529683fa1f6b8e45c03fe874ff40d56/librt-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:25b3e667cbfc9000c4740b282df599ebd91dbdcc1aa6785050e4c1d6be5329ab", size = 62000, upload-time = "2026-02-12T14:53:03.822Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/fd/c6472b8e0eac0925001f75e366cf5500bcb975357a65ef1f6b5749389d3a/librt-0.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:e9a3a38eb4134ad33122a6d575e6324831f930a771d951a15ce232e0237412c2", size = 52496, upload-time = "2026-02-12T14:53:04.889Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/13/79ebfe30cd273d7c0ce37a5f14dc489c5fb8b722a008983db2cfd57270bb/librt-0.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:421765e8c6b18e64d21c8ead315708a56fc24f44075059702e421d164575fdda", size = 66078, upload-time = "2026-02-12T14:53:06.085Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/8f/d11eca40b62a8d5e759239a80636386ef88adecb10d1a050b38cc0da9f9e/librt-0.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:48f84830a8f8ad7918afd743fd7c4eb558728bceab7b0e38fd5a5cf78206a556", size = 68309, upload-time = "2026-02-12T14:53:07.121Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/b4/f12ee70a3596db40ff3c88ec9eaa4e323f3b92f77505b4d900746706ec6a/librt-0.8.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9f09d4884f882baa39a7e36bbf3eae124c4ca2a223efb91e567381d1c55c6b06", size = 196804, upload-time = "2026-02-12T14:53:08.164Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/7e/70dbbdc0271fd626abe1671ad117bcd61a9a88cdc6a10ccfbfc703db1873/librt-0.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:693697133c3b32aa9b27f040e3691be210e9ac4d905061859a9ed519b1d5a376", size = 206915, upload-time = "2026-02-12T14:53:09.333Z" },
-    { url = "https://files.pythonhosted.org/packages/79/13/6b9e05a635d4327608d06b3c1702166e3b3e78315846373446cf90d7b0bf/librt-0.8.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5512aae4648152abaf4d48b59890503fcbe86e85abc12fb9b096fe948bdd816", size = 221200, upload-time = "2026-02-12T14:53:10.68Z" },
-    { url = "https://files.pythonhosted.org/packages/35/6c/e19a3ac53e9414de43a73d7507d2d766cd22d8ca763d29a4e072d628db42/librt-0.8.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:995d24caa6bbb34bcdd4a41df98ac6d1af637cfa8975cb0790e47d6623e70e3e", size = 214640, upload-time = "2026-02-12T14:53:12.342Z" },
-    { url = "https://files.pythonhosted.org/packages/30/f0/23a78464788619e8c70f090cfd099cce4973eed142c4dccb99fc322283fd/librt-0.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b9aef96d7593584e31ef6ac1eb9775355b0099fee7651fae3a15bc8657b67b52", size = 221980, upload-time = "2026-02-12T14:53:13.603Z" },
-    { url = "https://files.pythonhosted.org/packages/03/32/38e21420c5d7aa8a8bd2c7a7d5252ab174a5a8aaec8b5551968979b747bf/librt-0.8.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4f6e975377fbc4c9567cb33ea9ab826031b6c7ec0515bfae66a4fb110d40d6da", size = 215146, upload-time = "2026-02-12T14:53:14.8Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/00/bd9ecf38b1824c25240b3ad982fb62c80f0a969e6679091ba2b3afb2b510/librt-0.8.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:daae5e955764be8fd70a93e9e5133c75297f8bce1e802e1d3683b98f77e1c5ab", size = 215203, upload-time = "2026-02-12T14:53:16.087Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/60/7559bcc5279d37810b98d4a52616febd7b8eef04391714fd6bdf629598b1/librt-0.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7bd68cebf3131bb920d5984f75fe302d758db33264e44b45ad139385662d7bc3", size = 237937, upload-time = "2026-02-12T14:53:17.236Z" },
-    { url = "https://files.pythonhosted.org/packages/41/cc/be3e7da88f1abbe2642672af1dc00a0bccece11ca60241b1883f3018d8d5/librt-0.8.0-cp314-cp314-win32.whl", hash = "sha256:1e6811cac1dcb27ca4c74e0ca4a5917a8e06db0d8408d30daee3a41724bfde7a", size = 50685, upload-time = "2026-02-12T14:53:18.888Z" },
-    { url = "https://files.pythonhosted.org/packages/38/27/e381d0df182a8f61ef1f6025d8b138b3318cc9d18ad4d5f47c3bf7492523/librt-0.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:178707cda89d910c3b28bf5aa5f69d3d4734e0f6ae102f753ad79edef83a83c7", size = 57872, upload-time = "2026-02-12T14:53:19.942Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/0c/ca9dfdf00554a44dea7d555001248269a4bab569e1590a91391feb863fa4/librt-0.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:3e8b77b5f54d0937b26512774916041756c9eb3e66f1031971e626eea49d0bf4", size = 48056, upload-time = "2026-02-12T14:53:21.473Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/ed/6cc9c4ad24f90c8e782193c7b4a857408fd49540800613d1356c63567d7b/librt-0.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:789911e8fa40a2e82f41120c936b1965f3213c67f5a483fc5a41f5839a05dcbb", size = 68307, upload-time = "2026-02-12T14:53:22.498Z" },
-    { url = "https://files.pythonhosted.org/packages/84/d8/0e94292c6b3e00b6eeea39dd44d5703d1ec29b6dafce7eea19dc8f1aedbd/librt-0.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2b37437e7e4ef5e15a297b36ba9e577f73e29564131d86dd75875705e97402b5", size = 70999, upload-time = "2026-02-12T14:53:23.603Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/f4/6be1afcbdeedbdbbf54a7c9d73ad43e1bf36897cebf3978308cd64922e02/librt-0.8.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:671a6152edf3b924d98a5ed5e6982ec9cb30894085482acadce0975f031d4c5c", size = 220782, upload-time = "2026-02-12T14:53:25.133Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/8d/f306e8caa93cfaf5c6c9e0d940908d75dc6af4fd856baa5535c922ee02b1/librt-0.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8992ca186a1678107b0af3d0c9303d8c7305981b9914989b9788319ed4d89546", size = 235420, upload-time = "2026-02-12T14:53:27.047Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/f2/65d86bd462e9c351326564ca805e8457442149f348496e25ccd94583ffa2/librt-0.8.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:001e5330093d887b8b9165823eca6c5c4db183fe4edea4fdc0680bbac5f46944", size = 246452, upload-time = "2026-02-12T14:53:28.341Z" },
-    { url = "https://files.pythonhosted.org/packages/03/94/39c88b503b4cb3fcbdeb3caa29672b6b44ebee8dcc8a54d49839ac280f3f/librt-0.8.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d920789eca7ef71df7f31fd547ec0d3002e04d77f30ba6881e08a630e7b2c30e", size = 238891, upload-time = "2026-02-12T14:53:29.625Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/c6/6c0d68190893d01b71b9569b07a1c811e280c0065a791249921c83dc0290/librt-0.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:82fb4602d1b3e303a58bfe6165992b5a78d823ec646445356c332cd5f5bbaa61", size = 250249, upload-time = "2026-02-12T14:53:30.93Z" },
-    { url = "https://files.pythonhosted.org/packages/52/7a/f715ed9e039035d0ea637579c3c0155ab3709a7046bc408c0fb05d337121/librt-0.8.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:4d3e38797eb482485b486898f89415a6ab163bc291476bd95712e42cf4383c05", size = 240642, upload-time = "2026-02-12T14:53:32.174Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/3c/609000a333debf5992efe087edc6467c1fdbdddca5b610355569bbea9589/librt-0.8.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a905091a13e0884701226860836d0386b88c72ce5c2fdfba6618e14c72be9f25", size = 239621, upload-time = "2026-02-12T14:53:33.39Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/df/87b0673d5c395a8f34f38569c116c93142d4dc7e04af2510620772d6bd4f/librt-0.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:375eda7acfce1f15f5ed56cfc960669eefa1ec8732e3e9087c3c4c3f2066759c", size = 262986, upload-time = "2026-02-12T14:53:34.617Z" },
-    { url = "https://files.pythonhosted.org/packages/09/7f/6bbbe9dcda649684773aaea78b87fff4d7e59550fbc2877faa83612087a3/librt-0.8.0-cp314-cp314t-win32.whl", hash = "sha256:2ccdd20d9a72c562ffb73098ac411de351b53a6fbb3390903b2d33078ef90447", size = 51328, upload-time = "2026-02-12T14:53:36.15Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/f3/e1981ab6fa9b41be0396648b5850267888a752d025313a9e929c4856208e/librt-0.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:25e82d920d4d62ad741592fcf8d0f3bda0e3fc388a184cb7d2f566c681c5f7b9", size = 58719, upload-time = "2026-02-12T14:53:37.183Z" },
-    { url = "https://files.pythonhosted.org/packages/94/d1/433b3c06e78f23486fe4fdd19bc134657eb30997d2054b0dbf52bbf3382e/librt-0.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:92249938ab744a5890580d3cb2b22042f0dce71cdaa7c1369823df62bedf7cbc", size = 48753, upload-time = "2026-02-12T14:53:38.539Z" },
-]
-
 [[package]]
 name = "llvmlite"
 version = "0.45.1"
@@ -880,54 +807,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
 ]
 
-[[package]]
-name = "mypy"
-version = "1.19.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "librt", marker = "platform_python_implementation != 'PyPy'" },
-    { name = "mypy-extensions" },
-    { name = "pathspec" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/47/6b3ebabd5474d9cdc170d1342fbf9dddc1b0ec13ec90bf9004ee6f391c31/mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288", size = 13028539, upload-time = "2025-12-15T05:03:44.129Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/a6/ac7c7a88a3c9c54334f53a941b765e6ec6c4ebd65d3fe8cdcfbe0d0fd7db/mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab", size = 12083163, upload-time = "2025-12-15T05:03:37.679Z" },
-    { url = "https://files.pythonhosted.org/packages/67/af/3afa9cf880aa4a2c803798ac24f1d11ef72a0c8079689fac5cfd815e2830/mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6", size = 12687629, upload-time = "2025-12-15T05:02:31.526Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/46/20f8a7114a56484ab268b0ab372461cb3a8f7deed31ea96b83a4e4cfcfca/mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331", size = 13436933, upload-time = "2025-12-15T05:03:15.606Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/f8/33b291ea85050a21f15da910002460f1f445f8007adb29230f0adea279cb/mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925", size = 13661754, upload-time = "2025-12-15T05:02:26.731Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/a3/47cbd4e85bec4335a9cd80cf67dbc02be21b5d4c9c23ad6b95d6c5196bac/mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042", size = 10055772, upload-time = "2025-12-15T05:03:26.179Z" },
-    { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" },
-    { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" },
-    { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" },
-    { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" },
-    { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" },
-    { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" },
-    { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" },
-    { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" },
-    { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" },
-    { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" },
-]
-
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
-]
-
 [[package]]
 name = "neat-python"
 version = "0.92"
@@ -1189,15 +1068,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692, upload-time = "2023-06-28T23:17:28.824Z" },
 ]
 
-[[package]]
-name = "pathspec"
-version = "1.0.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" },
-]
-
 [[package]]
 name = "pflacco"
 version = "1.2.2"
@@ -1887,6 +1757,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" },
 ]
 
+[[package]]
+name = "ty"
+version = "0.0.17"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/c3/41ae6346443eedb65b96761abfab890a48ce2aa5a8a27af69c5c5d99064d/ty-0.0.17.tar.gz", hash = "sha256:847ed6c120913e280bf9b54d8eaa7a1049708acb8824ad234e71498e8ad09f97", size = 5167209, upload-time = "2026-02-13T13:26:36.835Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/01/0ef15c22a1c54b0f728ceff3f62d478dbf8b0dcf8ff7b80b954f79584f3e/ty-0.0.17-py3-none-linux_armv6l.whl", hash = "sha256:64a9a16555cc8867d35c2647c2f1afbd3cae55f68fd95283a574d1bb04fe93e0", size = 10192793, upload-time = "2026-02-13T13:27:13.943Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/2c/f4c322d9cded56edc016b1092c14b95cf58c8a33b4787316ea752bb9418e/ty-0.0.17-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:eb2dbd8acd5c5a55f4af0d479523e7c7265a88542efe73ed3d696eb1ba7b6454", size = 10051977, upload-time = "2026-02-13T13:26:57.741Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/a5/43746c1ff81e784f5fc303afc61fe5bcd85d0fcf3ef65cb2cef78c7486c7/ty-0.0.17-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f18f5fd927bc628deb9ea2df40f06b5f79c5ccf355db732025a3e8e7152801f6", size = 9564639, upload-time = "2026-02-13T13:26:42.781Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/b8/280b04e14a9c0474af574f929fba2398b5e1c123c1e7735893b4cd73d13c/ty-0.0.17-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5383814d1d7a5cc53b3b07661856bab04bb2aac7a677c8d33c55169acdaa83df", size = 10061204, upload-time = "2026-02-13T13:27:00.152Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/d7/493e1607d8dfe48288d8a768a2adc38ee27ef50e57f0af41ff273987cda0/ty-0.0.17-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9c20423b8744b484f93e7bf2ef8a9724bca2657873593f9f41d08bd9f83444c9", size = 10013116, upload-time = "2026-02-13T13:26:34.543Z" },
+    { url = "https://files.pythonhosted.org/packages/80/ef/22f3ed401520afac90dbdf1f9b8b7755d85b0d5c35c1cb35cf5bd11b59c2/ty-0.0.17-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6f5b1aba97db9af86517b911674b02f5bc310750485dc47603a105bd0e83ddd", size = 10533623, upload-time = "2026-02-13T13:26:31.449Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ce/744b15279a11ac7138832e3a55595706b4a8a209c9f878e3ab8e571d9032/ty-0.0.17-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:488bce1a9bea80b851a97cd34c4d2ffcd69593d6c3f54a72ae02e5c6e47f3d0c", size = 11069750, upload-time = "2026-02-13T13:26:48.638Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/be/1133c91f15a0e00d466c24f80df486d630d95d1b2af63296941f7473812f/ty-0.0.17-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8df66b91ec84239420985ec215e7f7549bfda2ac036a3b3c065f119d1c06825a", size = 10870862, upload-time = "2026-02-13T13:26:54.715Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/4a/a2ed209ef215b62b2d3246e07e833081e07d913adf7e0448fc204be443d6/ty-0.0.17-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:002139e807c53002790dfefe6e2f45ab0e04012e76db3d7c8286f96ec121af8f", size = 10628118, upload-time = "2026-02-13T13:26:45.439Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/0c/87476004cb5228e9719b98afffad82c3ef1f84334bde8527bcacba7b18cb/ty-0.0.17-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6c4e01f05ce82e5d489ab3900ca0899a56c4ccb52659453780c83e5b19e2b64c", size = 10038185, upload-time = "2026-02-13T13:27:02.693Z" },
+    { url = "https://files.pythonhosted.org/packages/46/4b/98f0b3ba9aef53c1f0305519536967a4aa793a69ed72677b0a625c5313ac/ty-0.0.17-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2b226dd1e99c0d2152d218c7e440150d1a47ce3c431871f0efa073bbf899e881", size = 10047644, upload-time = "2026-02-13T13:27:05.474Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e0/06737bb80aa1a9103b8651d2eb691a7e53f1ed54111152be25f4a02745db/ty-0.0.17-py3-none-musllinux_1_2_i686.whl", hash = "sha256:8b11f1da7859e0ad69e84b3c5ef9a7b055ceed376a432fad44231bdfc48061c2", size = 10231140, upload-time = "2026-02-13T13:27:10.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/79/e2a606bd8852383ba9abfdd578f4a227bd18504145381a10a5f886b4e751/ty-0.0.17-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c04e196809ff570559054d3e011425fd7c04161529eb551b3625654e5f2434cb", size = 10718344, upload-time = "2026-02-13T13:26:51.66Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/2d/2663984ac11de6d78f74432b8b14ba64d170b45194312852b7543cf7fd56/ty-0.0.17-py3-none-win32.whl", hash = "sha256:305b6ed150b2740d00a817b193373d21f0767e10f94ac47abfc3b2e5a5aec809", size = 9672932, upload-time = "2026-02-13T13:27:08.522Z" },
+    { url = "https://files.pythonhosted.org/packages/de/b5/39be78f30b31ee9f5a585969930c7248354db90494ff5e3d0756560fb731/ty-0.0.17-py3-none-win_amd64.whl", hash = "sha256:531828267527aee7a63e972f54e5eee21d9281b72baf18e5c2850c6b862add83", size = 10542138, upload-time = "2026-02-13T13:27:17.084Z" },
+    { url = "https://files.pythonhosted.org/packages/40/b7/f875c729c5d0079640c75bad2c7e5d43edc90f16ba242f28a11966df8f65/ty-0.0.17-py3-none-win_arm64.whl", hash = "sha256:de9810234c0c8d75073457e10a84825b9cd72e6629826b7f01c7a0b266ae25b1", size = 10023068, upload-time = "2026-02-13T13:26:39.637Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"

From dd3ac4551da1829d88038e1b79d88e4a07a82efb Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Thu, 19 Feb 2026 15:47:27 +0100
Subject: [PATCH 11/20] update initial_value_range

---
 dynamicalgorithmselection/agents/agent.py           | 13 +++++++------
 dynamicalgorithmselection/experiments/experiment.py |  2 --
 dynamicalgorithmselection/optimizers/DE/JDE21.py    |  2 ++
 dynamicalgorithmselection/optimizers/DE/MADDE.py    |  4 +++-
 .../optimizers/DE/NL_SHADE_RSP.py                   |  4 +++-
 dynamicalgorithmselection/optimizers/DS/POWELL.py   |  2 ++
 6 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index 3900af3..2243a38 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -1,5 +1,5 @@
 from itertools import product
-from typing import List, Type, Optional, Dict, Any
+from typing import List, Type, Optional, Dict, Any, Tuple
 import numpy as np
 from dynamicalgorithmselection.agents.agent_state import (
     get_state_representation,
@@ -56,7 +56,7 @@ def __init__(self, problem, options):
         self.state_normalizer = self.options.get(
             "state_normalizer", StateNormalizer(input_shape=(self.state_dim,))
         )
-        self.initial_value_range = None
+        self.initial_value_range: Tuple[Optional[float], Optional[float]] = (None, None)
 
     def get_partial_state(
         self,
@@ -150,8 +150,8 @@ def _check_early_stopping(self, best_y):
             self._counter_early_stopping, self._base_early_stopping = 0, best_y
 
     def _save_fitness(self, best_x, best_y, worst_x, worst_y):
-        if self.initial_value_range is None:
-            self.initial_value_range = max(worst_y - best_y, 1e-5)
+        if self.initial_value_range[0] is None:
+            self.initial_value_range = best_y, max(worst_y, best_y + 1e-5)
 
         self.best_parent = best_y
         self.history.append(best_y)
@@ -240,6 +240,7 @@ def get_reward(self, new_best_y, old_best_y):
 
         improvement = old_best_y - new_best_y
 
-        # return float(improvement > 1e-3)
-        reward = improvement / self.initial_value_range
+        reward = improvement / (
+            self.initial_value_range[1] - self.initial_value_range[0]
+        )
         return np.log(np.clip(reward, 0.0, 1.0) + 1e-5)
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index 5bce3d9..2ac1324 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -80,10 +80,8 @@ def coco_bbob_experiment(
             optimizer, options, evaluations_multiplier, is_loio=mode.endswith("LOIO")
         )
     elif agent == "random":
-        # running random baseline
         return _coco_bbob_test_all(optimizer, options, evaluations_multiplier, mode)
     elif options.get("baselines"):
-        # running only baselines
         return run_comparison(
             options["optimizer_portfolio"], options, evaluations_multiplier
         )
diff --git a/dynamicalgorithmselection/optimizers/DE/JDE21.py b/dynamicalgorithmselection/optimizers/DE/JDE21.py
index 32ac219..3042efd 100644
--- a/dynamicalgorithmselection/optimizers/DE/JDE21.py
+++ b/dynamicalgorithmselection/optimizers/DE/JDE21.py
@@ -81,6 +81,8 @@ def _mutate_cross_select(self, x, y, indices, args=None):
     def iterate(self, x=None, y=None, args=None):
         bNP = x.shape[0] - self.sNP
         # Evolution of big population
+        if x is None or y is None:
+            raise ValueError("x and y must be provided for iteration.")
         x, y = self._mutate_cross_select(x, y, np.arange(bNP), args)
 
         # Evolution of small population (repeated)
diff --git a/dynamicalgorithmselection/optimizers/DE/MADDE.py b/dynamicalgorithmselection/optimizers/DE/MADDE.py
index 5a7704f..8ffa06c 100644
--- a/dynamicalgorithmselection/optimizers/DE/MADDE.py
+++ b/dynamicalgorithmselection/optimizers/DE/MADDE.py
@@ -82,7 +82,9 @@ def _mutate(self, x, y, F, strategy_idx, q, Fa):
 
         return v
 
-    def iterate(self, x, y, args=None):
+    def iterate(self, x=None, y=None, args=None):
+        if x is None or y is None:
+            raise ValueError("x and y must be provided for iteration.")
         NP = x.shape[0]
         dim = self.ndim_problem
         FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
index 2282740..e670a2c 100644
--- a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
+++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
@@ -63,7 +63,9 @@ def _update_memory(self, SF, SCr, df):
             self.MCr[self.k_idx] = np.sum(w * SCr)
             self.k_idx = (self.k_idx + 1) % self.memory_size
 
-    def iterate(self, x, y, args=None):
+    def iterate(self, x=None, y=None, args=None):
+        if x is None or y is None:
+            raise ValueError("x and y must be provided for iteration.")
         NP = x.shape[0]
         Cr, F = self._choose_F_Cr(NP)
 
diff --git a/dynamicalgorithmselection/optimizers/DS/POWELL.py b/dynamicalgorithmselection/optimizers/DS/POWELL.py
index decac18..44fb383 100644
--- a/dynamicalgorithmselection/optimizers/DS/POWELL.py
+++ b/dynamicalgorithmselection/optimizers/DS/POWELL.py
@@ -147,6 +147,8 @@ def _func(alpha):  # only for line search
         return y, x + d, d, yy
 
     def iterate(self, x=None, y=None, u=None, args=None):
+        if x is None or y is None:
+            raise ValueError("x and y must be provided for iteration.")
         xx, yy = np.copy(x), np.copy(y)
         big_ind, delta, ys = 0, 0.0, []
         for i in range(self.ndim_problem):

From 2eedf491562d8558d867eed27e67ca7f23d68c60 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Thu, 19 Feb 2026 17:06:03 +0100
Subject: [PATCH 12/20] add multiple reward choices

---
 README.md                                     | 27 ++++++-
 dynamicalgorithmselection/agents/agent.py     | 17 ++--
 .../agents/agent_reward.py                    | 79 +++++++++++++++++++
 dynamicalgorithmselection/main.py             | 62 +++++++--------
 runner.slurm                                  | 10 +--
 tests/test_agent.py                           |  2 +-
 6 files changed, 145 insertions(+), 52 deletions(-)
 create mode 100644 dynamicalgorithmselection/agents/agent_reward.py

diff --git a/README.md b/README.md
index e0833dd..94ddfd5 100644
--- a/README.md
+++ b/README.md
@@ -61,17 +61,21 @@ uv run das <name> [options]
 |------------------------------------|-------------|-----------------------------|------------------------------------------------------------------------------------------------------------------|
 | `name`                             | `str`       | —                           | **Required.** Name tag for the run or experiment.                                                                |
 | `-p`, `--portfolio`                | `list[str]` | `['SPSO', 'IPSO', 'SPSOL']` | Portfolio of sub-optimizers to include.                                                                          |
-| `-m`, `--population_size`          | `int`       | `20`                        | Population size for all fixed-population optimizers.                                                             |
+| `-m`, `--population_size`          | `int`       | `None`                      | Population size for all fixed-population optimizers. None means no fixed population size.                        |
 | `-f`, `--fe_multiplier`            | `int`       | `10_000`                    | Function evaluation multiplier.                                                                                  |
 | `-s`, `--n_checkpoints`            | `int`       | `10`                        | Number of checkpoints for sub-optimizer selection.                                                               |
 | `-t`, `--test` / `--no-test`       | `bool`      | `True`                      | Whether to execute in test mode.                                                                                 |
 | `-c`, `--compare` / `--no-compare` | `bool`      | `False`                     | Whether to compare results against standalone optimizers.                                                        |
 | `-e`, `--wandb_entity`             | `str`       | `None`                      | Weights and Biases (WandB) entity name.                                                                          |
 | `-w`, `--wandb_project`            | `str`       | `None`                      | Weights and Biases (WandB) project name.                                                                         |
-| `-a`, `--agent`                    | `str`       | `policy-gradient`           | Agent type. Options: `neuroevolution`, `policy-gradient`, `random`.                                              |
+| `-a`, `--agent`                    | `str`       | `policy-gradient`           | Agent type. Options: `neuroevolution`, `policy-gradient`, `random`, `RL-DAS`.                                    |
 | `-l`, `--mode`                     | `str`       | `LOIO`                      | Train/Test split mode (see [Split Strategies](https://www.google.com/search?q=%23-train-test-split-strategies)). |
 | `-x`, `--cdb`                      | `float`     | `1.0`                       | **Checkpoint Division Exponent**; determines how quickly checkpoint length increases.                            |
 | `-r`, `--state-representation`     | `str`       | `ELA`                       | Method used to extract features from the algorithm population.                                                   |
+| `-d`, `--force-restarts`           | `bool`      | `False`                     | Enable selection of forcibly restarting optimizers.                                                              |
+| `-D`, `--dimensionality`           | `int`       | `None`                      | Dimensionality of problems.                                                                                      |
+| `-E`, `--n_epochs`                 | `int`       | `1`                         | Number of training epochs.                                                                                       |
+| `-O`, `--reward-option`            | `int`       | `1`                         | ID of method used to compute reward.                                                                             |
 
 ---
 
@@ -79,8 +83,7 @@ uv run das <name> [options]
 
 The `-l` / `--mode` argument determines how the dataset is divided:
 
-* **`LOIO` (Leave One Instance Out):** Uses `LOLO_train_set.json`, a randomly generated subset containing mixed problem
-  types.
+* **`LOIO` (Leave One Instance Out):** Uses  a randomly generated subset containing mixed problem types.
 * **`hard` (Leave One Problem Out):** Splits the dataset by grouping identical problem instances. Contains **twice as
   many** training functions as test functions.
 * **`easy` (Leave One Problem Out):** Similar to `hard`, but with **inverted** train-test proportions (more test
@@ -117,6 +120,22 @@ Below is a comparison of how the checkpoint lengths correspond to different `cdb
     * **Early Stages (Short):** Allows the agent to make rapid decisions and switch algorithms frequently during the initial exploration phase.
     * **Later Stages (Long):** Provides longer uninterrupted periods for algorithms to converge (exploitation) without being disrupted by frequent agent switching.
 
+## 🏆 Reward Options
+
+The `-O` or `--reward-option` argument determines how the agent calculates the reward after each checkpoint. All options compute an `improvement` metric based on the change in the best objective value (`y`), scaled against the initial value range (`initial_value_range[1] - initial_value_range[0]`).
+
+Here are the available reward strategies:
+
+* **Option 1 (`1`): Logarithmic Scaled Improvement**
+Calculates the improvement between the current checkpoint and the previous one (`old_best_y - new_best_y`), scales it, clips the value between 0.0 and 1.0, and applies a logarithmic transformation (`np.log(reward + 1e-5)`). Useful for smoothing out large variance in improvements.
+* **Option 2 (`2`): Linear Clipped Improvement**
+Calculates the scaled improvement between the current checkpoint and the previous one (`old_best_y - new_best_y`), and simply clips the result between 0.0 and 1.0 without any logarithmic scaling (`np.clip(reward, 0.0, 1.0)`).
+* **Option 3 (`3`): Sparse Total Improvement (Final Checkpoint Only)**
+Provides a sparse reward. It returns `0.0` for all intermediate checkpoints. At the final checkpoint, it calculates the *total* improvement from the very start of the optimization run (`initial_value_range[0] - new_best_y`), scales it, and applies a logarithmic transformation.
+* **Option 4 (`4`): Binary Threshold Reward**
+Calculates the scaled improvement between checkpoints and provides a binary outcome: it returns `1.0` if the scaled improvement is greater than or equal to a minimum threshold (`1e-3`), and `0.0` otherwise.
+
+
 ## 🧠 State Representation
 
 There are three options for representing the optimization state (`-r` flag):
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index 2243a38..3b515d9 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -1,6 +1,8 @@
 from itertools import product
 from typing import List, Type, Optional, Dict, Any, Tuple
 import numpy as np
+
+from dynamicalgorithmselection.agents.agent_reward import AgentReward
 from dynamicalgorithmselection.agents.agent_state import (
     get_state_representation,
     StateNormalizer,
@@ -57,6 +59,7 @@ def __init__(self, problem, options):
             "state_normalizer", StateNormalizer(input_shape=(self.state_dim,))
         )
         self.initial_value_range: Tuple[Optional[float], Optional[float]] = (None, None)
+        self.reward_method = AgentReward(self.options.get("reward_option", 1))
 
     def get_partial_state(
         self,
@@ -234,13 +237,9 @@ def _collect(self, fitness, y=None):
     def optimize(self, fitness_function=None, args=None):
         raise NotImplementedError
 
-    def get_reward(self, new_best_y, old_best_y):
-        if old_best_y == float("inf"):
-            return 0.0
-
-        improvement = old_best_y - new_best_y
-
-        reward = improvement / (
-            self.initial_value_range[1] - self.initial_value_range[0]
+    def get_reward(
+        self, new_best_y: float, old_best_y: float, is_final_checkpoint: bool = False
+    ):
+        return self.reward_method(
+            new_best_y, old_best_y, self.initial_value_range, is_final_checkpoint
         )
-        return np.log(np.clip(reward, 0.0, 1.0) + 1e-5)
diff --git a/dynamicalgorithmselection/agents/agent_reward.py b/dynamicalgorithmselection/agents/agent_reward.py
new file mode 100644
index 0000000..20c6ed4
--- /dev/null
+++ b/dynamicalgorithmselection/agents/agent_reward.py
@@ -0,0 +1,79 @@
+from typing import Tuple
+
+import numpy as np
+
+
+class AgentReward:
+    def __init__(self, option: int):
+        self.reward_method = getattr(self, f"r{option}")
+
+    def __call__(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        return self.reward_method(
+            new_best_y, old_best_y, initial_value_range, is_final_checkpoint
+        )
+
+    def r1(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf"):
+            return 0.0
+
+        improvement = old_best_y - new_best_y
+
+        reward = improvement / (initial_value_range[1] - initial_value_range[0])
+        return np.log(np.clip(reward, 0.0, 1.0) + 1e-5)
+
+    def r2(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf"):
+            return 0.0
+
+        improvement = old_best_y - new_best_y
+
+        reward = improvement / (initial_value_range[1] - initial_value_range[0])
+        return np.clip(reward, 0.0, 1.0)
+
+    def r3(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf") or not is_final_checkpoint:
+            return 0.0
+
+        improvement = initial_value_range[0] - new_best_y
+        scale = initial_value_range[1] - initial_value_range[0]
+        reward = improvement / scale
+        return np.log(reward + 1e-5)
+
+    def r4(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf"):
+            return 0.0
+
+        improvement = old_best_y - new_best_y
+
+        reward = improvement / (initial_value_range[1] - initial_value_range[0])
+        return 1.0 if reward >= 1e-3 else 0.0
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index adde569..472b50d 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -2,7 +2,7 @@
 import os
 import pickle
 import shutil
-from typing import List, Type, Optional
+from typing import List, Type, Dict, Any
 import cocopp
 import neat
 import torch
@@ -108,7 +108,7 @@ def parse_arguments():
         "-l",
         "--mode",
         type=str,
-        default="easy",
+        default="LOIO",
         choices=["LOIO", "hard", "easy", "CV-LOIO", "CV-LOPO", "baselines"],
         help="specify which agent to use",
     )
@@ -155,6 +155,14 @@ def parse_arguments():
         help="number of training epochs",
     )
 
+    parser.add_argument(
+        "-O",
+        "--reward-option",
+        type=int,
+        default=1,
+        help="id of method used to compute reward",
+    )
+
     return parser.parse_args()
 
 
@@ -178,22 +186,30 @@ def print_info(args):
     print("Forcing restarts: ", args.force_restarts)
     print("Dimensionality of problems: ", args.dimensionality)
     print("Number of training epochs: ", args.n_epochs)
+    print("Rewarding option: ", args.reward_option)
 
 
-def test(args, action_space):
-    if os.path.exists(os.path.join("exdata", f"DAS_{args.name}")):
-        shutil.rmtree(os.path.join("exdata", f"DAS_{args.name}"))
-
+def common_options(args) -> Dict[str, Any]:
     options = {
         "n_checkpoints": args.n_checkpoints,
         "n_individuals": args.population_size,
-        "action_space": action_space,
         "cdb": args.cdb,
         "state_representation": args.state_representation,
         "force_restarts": args.force_restarts,
         "dimensionality": args.dimensionality,
         "n_epochs": args.n_epochs,
+        "reward_option": args.reward_option,
     }
+    return options
+
+
+def test(args, action_space):
+    if os.path.exists(os.path.join("exdata", f"DAS_{args.name}")):
+        shutil.rmtree(os.path.join("exdata", f"DAS_{args.name}"))
+
+    options = {
+        "action_space": action_space,
+    } | common_options(args)
     # agent_state = torch.load(f)
     if args.agent == "neuroevolution":
         config = neat.Config(
@@ -240,16 +256,10 @@ def run_training(args, action_space):
     coco_bbob_experiment(
         AGENTS_DICT[args.agent],
         {
-            "n_checkpoints": args.n_checkpoints,
-            "n_individuals": args.population_size,
             "run": run,
             "action_space": action_space,
-            "cdb": args.cdb,
-            "state_representation": args.state_representation,
-            "force_restarts": args.force_restarts,
-            "dimensionality": args.dimensionality,
-            "n_epochs": args.n_epochs,
-        },
+        }
+        | common_options(args),
         name=f"DAS_train_{args.name}",
         evaluations_multiplier=args.fe_multiplier,
         train=True,
@@ -266,16 +276,10 @@ def run_CV(args, action_space):
     coco_bbob_experiment(
         AGENTS_DICT[args.agent],
         {
-            "n_checkpoints": args.n_checkpoints,
-            "n_individuals": args.population_size,
             "run": None,
             "action_space": action_space,
-            "cdb": args.cdb,
-            "state_representation": args.state_representation,
-            "force_restarts": args.force_restarts,
-            "dimensionality": args.dimensionality,
-            "n_epochs": args.n_epochs,
-        },
+        }
+        | common_options(args),
         name=f"DAS_CV_{args.name}",
         evaluations_multiplier=args.fe_multiplier,
         train=True,
@@ -295,16 +299,10 @@ def run_baselines(args, action_space):
         coco_bbob_experiment(
             None,
             {
-                "optimizer_portfolio": [optimizer],  # <--- FIXED: List of 1
-                "n_individuals": args.population_size,
+                "optimizer_portfolio": [optimizer],
                 "baselines": True,
-                "n_checkpoints": args.n_checkpoints,
-                "cdb": args.cdb,
-                "state_representation": args.state_representation,
-                "force_restarts": args.force_restarts,
-                "dimensionality": args.dimensionality,
-                "n_epochs": args.n_epochs,
-            },
+            }
+            | common_options(args),
             name=optimizer.__name__,
             evaluations_multiplier=args.fe_multiplier,
             train=False,
diff --git a/runner.slurm b/runner.slurm
index ac15cea..42f89c8 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -34,19 +34,17 @@ if [ $TASK_ID -le 5 ]; then
     echo "Running Mode: $MODE | Dimension: $DIM"
 
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
-      --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP'  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
 
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${DIM} \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
-      -r custom --mode $MODE --dimensionality $DIM --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' -r custom --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
 elif [ $TASK_ID -eq 6 ]; then
     # --- MULTIDIMENSIONAL ---
     echo "Running Mode: $MODE | Multidimensional PG"
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_$MODE \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --wandb_project RL-DAS --wandb_entity niecwladek-agh \
-      -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
 elif [ $TASK_ID -eq 14 ]; then
     # --- RANDOM AGENT ---
diff --git a/tests/test_agent.py b/tests/test_agent.py
index eecbb27..9545f15 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -50,7 +50,7 @@ def test_get_reward_logic(self, mock_problem, basic_options):
             return_value=(MagicMock(), 5),
         ):
             agent = Agent(mock_problem, basic_options)
-            agent.initial_value_range = 10.0
+            agent.initial_value_range = (10.0, 20.0)
 
             reward_good = agent.get_reward(new_best_y=15.0, old_best_y=20.0)
             # 5.0 / 10.0 = 0.5 -> log(0.5)

From 6e2a583b542825091d0da7cb7f587679c13c2ad1 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Fri, 20 Feb 2026 20:19:59 +0100
Subject: [PATCH 13/20] fix baselines and drop standalone highly-dimensional
 experiments

---
 dynamicalgorithmselection/NeurELA/NeurELA.py  |   2 -
 .../agents/RLDAS_random_agent.py              | 192 ++++++++++++++++++
 .../experiments/cross_validation.py           |   1 -
 .../experiments/experiment.py                 |  26 +--
 .../experiments/utils.py                      |   1 -
 dynamicalgorithmselection/main.py             |  71 +++++--
 runner.slurm                                  |  91 ++++++---
 tests/test_experiment.py                      |  11 +-
 8 files changed, 321 insertions(+), 74 deletions(-)
 create mode 100644 dynamicalgorithmselection/agents/RLDAS_random_agent.py

diff --git a/dynamicalgorithmselection/NeurELA/NeurELA.py b/dynamicalgorithmselection/NeurELA/NeurELA.py
index f141c54..ebc8f37 100644
--- a/dynamicalgorithmselection/NeurELA/NeurELA.py
+++ b/dynamicalgorithmselection/NeurELA/NeurELA.py
@@ -27,8 +27,6 @@ def load_data(path):
     return data
 
 
-seed = 0
-
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 load_path = os.path.join(BASE_DIR, "NeurELA.pkl")
 
diff --git a/dynamicalgorithmselection/agents/RLDAS_random_agent.py b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
new file mode 100644
index 0000000..4049a98
--- /dev/null
+++ b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
@@ -0,0 +1,192 @@
+import numpy as np
+import torch
+import copy
+from typing import Any, Dict
+
+from dynamicalgorithmselection.agents.agent import Agent
+from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
+
+INITIAL_POPSIZE = 170
+
+
+class RLDASRandomAgent(Agent):
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+
+        self.alg_names = [alg.__name__ for alg in self.actions]
+        self.n_algorithms = len(self.actions)
+
+        self.ah_vectors = np.zeros((self.n_algorithms, 2, self.ndim_problem))
+        self.alg_usage_counts = np.zeros(self.n_algorithms)
+        self.context_memory: Dict[str, Dict[str, Any]] = {
+            name: {} for name in self.alg_names
+        }
+        self.context_memory["Common"] = {}
+        self.mean_rewards = options.get("mean_rewards", [])
+        self.best_50_mean = float("inf")
+        self.schedule_interval = options.get(
+            "schedule_interval", int(self.max_function_evaluations / 50)
+        )
+
+    def _update_ah_history(
+        self, alg_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+    ):
+        sv_best_current = x_best_new - x_best_old
+        sv_worst_current = x_worst_new - x_worst_old
+
+        H = self.alg_usage_counts[alg_idx]
+
+        self.ah_vectors[alg_idx, 0] = (
+            self.ah_vectors[alg_idx, 0] * H + sv_best_current
+        ) / (H + 1)
+        self.ah_vectors[alg_idx, 1] = (
+            self.ah_vectors[alg_idx, 1] * H + sv_worst_current
+        ) / (H + 1)
+
+        self.alg_usage_counts[alg_idx] += 1
+
+    def _save_context(self, optimizer, alg_name):
+        common_attrs = ["memory_f", "memory_cr", "archive", "archive_fitness"]
+        for attr in common_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory["Common"][attr] = getattr(optimizer, attr)
+
+        specific_attrs = []
+        if "JDE21" in alg_name:
+            specific_attrs = [
+                "tau1",
+                "tau2",
+                "ageLmt",
+                "eps",
+                "myEqs",
+                "successful_f",
+                "successful_cr",
+            ]
+        elif "MadDE" in alg_name:
+            specific_attrs = ["pm", "pbest", "pqBX"]
+        elif "NL_SHADE" in alg_name:
+            specific_attrs = ["nA", "pA"]
+
+        for attr in specific_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory[alg_name][attr] = getattr(optimizer, attr)
+
+    def _restore_context(self, optimizer, alg_name):
+        """
+        Restores parameters to the optimizer from self.context_memory.
+        """
+        for attr, val in self.context_memory["Common"].items():
+            if hasattr(optimizer, attr):
+                setattr(optimizer, attr, copy.deepcopy(val))
+
+        if alg_name in self.context_memory:
+            for attr, val in self.context_memory[alg_name].items():
+                if hasattr(optimizer, attr):
+                    setattr(optimizer, attr, copy.deepcopy(val))
+
+    def _select_action(self):
+        with torch.no_grad():
+            probs = torch.ones(size=(1, len(self.actions))) / len(self.actions)
+            dist = torch.distributions.Categorical(probs)
+            action = dist.sample()
+
+        return action.item()
+
+    def initialize(self):
+        x = self.rng_initialization.uniform(
+            self.initial_lower_boundary,
+            self.initial_upper_boundary,
+            size=(INITIAL_POPSIZE, self.ndim_problem),
+        )
+        y = np.zeros((INITIAL_POPSIZE,))
+        for i in range(INITIAL_POPSIZE):
+            y[i] = self._evaluate_fitness(x[i])
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        """
+        Main Optimization Loop implementing RL-DAS workflow (Algorithm 1).
+        Does NOT use checkpoints. Uses interval-based scheduling.
+        """
+        fitness = Optimizer.optimize(self, fitness_function)
+        population_x, population_y = self.initialize()
+        self.n_function_evaluations = INITIAL_POPSIZE
+
+        best_idx = np.argmin(population_y)
+        best_y_global = population_y[best_idx]
+        best_x_global = population_x[best_idx].copy()
+
+        self.best_so_far_y = best_y_global
+        self.best_so_far_x = best_x_global
+
+        self.history.append(self.best_so_far_y)
+        fitness.append(float(self.best_so_far_y))
+
+        self.ah_vectors.fill(0.0)
+        self.alg_usage_counts.fill(0.0)
+        self.context_memory = {name: {} for name in self.alg_names}
+        self.context_memory["Common"] = {}
+
+        while self.n_function_evaluations < self.max_function_evaluations:
+            action_idx = self._select_action()
+            self.choices_history.append(action_idx)
+
+            selected_alg_class = self.actions[action_idx]
+            alg_name = self.alg_names[action_idx]
+
+            sub_opt = selected_alg_class(self.problem, self.options)
+            sub_opt.n_function_evaluations = self.n_function_evaluations
+            sub_opt.max_function_evaluations = self.max_function_evaluations
+
+            self._restore_context(sub_opt, alg_name)
+
+            x_best_old = population_x[np.argmin(population_y)].copy()
+            x_worst_old = population_x[np.argmax(population_y)].copy()
+
+            target_fes = min(
+                self.n_function_evaluations + self.schedule_interval,
+                self.max_function_evaluations,
+            )
+            sub_opt.target_FE = target_fes
+            sub_opt.set_data(
+                x=population_x,
+                y=population_y,
+                best_x=self.best_so_far_x,
+                best_y=self.best_so_far_y,
+            )
+
+            res = sub_opt.optimize()
+
+            population_x = res["x"]
+            population_y = res["y"]
+
+            self.n_function_evaluations = sub_opt.n_function_evaluations
+
+            self._save_context(sub_opt, alg_name)
+
+            x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy()
+            x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy()
+            cost_new: float = np.min(population_y)
+
+            self._update_ah_history(
+                action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+            )
+
+            best_y_global = min(best_y_global, cost_new)
+
+            if cost_new < self.best_so_far_y:
+                self.best_so_far_y = cost_new
+                self.best_so_far_x = x_best_new
+
+            self.history.append(self.best_so_far_y)
+            fitness.append(float(self.best_so_far_y))
+
+            self._n_generations += 1
+            self._print_verbose_info(fitness, self.best_so_far_y)
+
+        return self._collect(fitness, self.best_so_far_y)
+
+    def _collect(self, fitness, y=None):
+        results, _ = super()._collect(fitness, y)
+        agent_state = {}
+        return results, agent_state
diff --git a/dynamicalgorithmselection/experiments/cross_validation.py b/dynamicalgorithmselection/experiments/cross_validation.py
index c04d1dd..368cb1f 100644
--- a/dynamicalgorithmselection/experiments/cross_validation.py
+++ b/dynamicalgorithmselection/experiments/cross_validation.py
@@ -61,7 +61,6 @@ def _get_cv_folds(n: int, is_loio: bool, dim: Optional[int]):
     :param dim: dimensionality of the problems. None indicates all of them.
     :return suite, list of (train set, test set) pairs:
     """
-    np.random.seed(1234)
     cocoex.utilities.MiniPrint()
     problems_suite = cocoex.Suite("bbob", "", "")
     all_problem_ids = [
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index 2ac1324..a296832 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -2,6 +2,8 @@
 import os
 from typing import Type, Optional
 
+import numpy as np
+
 from dynamicalgorithmselection.experiments.core import run_testing, run_training
 from dynamicalgorithmselection.experiments.cross_validation import run_cross_validation
 from dynamicalgorithmselection.experiments.neuroevolution import (
@@ -24,7 +26,7 @@
 
 
 def dump_extreme_stats(
-    optimizer_portfolio: list[Type[Optimizer]],
+    name: str,
     stats,
     problem_instance,
     max_function_evaluations,
@@ -38,11 +40,10 @@ def dump_extreme_stats(
     best_case, worst_case = get_extreme_stats(
         stats, max_function_evaluations, checkpoints
     )
-    portfolio_name = "_".join(i.__name__ for i in optimizer_portfolio)
     with open(
         os.path.join(
             "results",
-            f"{portfolio_name}_best",
+            f"{name}_best",
             f"{problem_instance}.json",
         ),
         "w",
@@ -54,7 +55,7 @@ def dump_extreme_stats(
     with open(
         os.path.join(
             "results",
-            f"{portfolio_name}_worst",
+            f"{name}_worst",
             f"{problem_instance}.json",
         ),
         "w",
@@ -79,7 +80,7 @@ def coco_bbob_experiment(
         return run_cross_validation(
             optimizer, options, evaluations_multiplier, is_loio=mode.endswith("LOIO")
         )
-    elif agent == "random":
+    elif agent in ["random", "RL-DAS-random"]:
         return _coco_bbob_test_all(optimizer, options, evaluations_multiplier, mode)
     elif options.get("baselines"):
         return run_comparison(
@@ -171,22 +172,22 @@ def run_comparison(
     print("Initializing Observers...")
     for optimizer in optimizer_portfolio:
         optimizer_name = optimizer.__name__
+        case_name = f"{options['name']}_{optimizer_name}"
 
-        results_dir = os.path.join("results", f"{optimizer_name}")
+        results_dir = os.path.join("results", case_name)
         os.makedirs(results_dir, exist_ok=True)
 
-        observer = cocoex.Observer("bbob", "result_folder: " + optimizer_name)
+        observer = cocoex.Observer("bbob", "result_folder: " + case_name)
         observers[optimizer_name] = observer
-        results_folders.append("exdata/" + optimizer_name)  # Adjust path if needed
+        results_folders.append("exdata/" + case_name)  # Adjust path if needed
 
         suites[optimizer_name] = get_suite("all", False, options.get("dimensionality"))[
             0
         ]
 
     # Create directories for best/worst JSON stats
-    portfolio_name = "_".join(i.__name__ for i in optimizer_portfolio)
     for ext in ["best", "worst"]:
-        os.makedirs(os.path.join("results", f"{portfolio_name}_{ext}"), exist_ok=True)
+        os.makedirs(os.path.join("results", f"{options['name']}_{ext}"), exist_ok=True)
 
     cocoex.utilities.MiniPrint()
 
@@ -200,6 +201,7 @@ def run_comparison(
 
         for optimizer in optimizer_portfolio:
             optimizer_name = optimizer.__name__
+            result_folder_name = f"{options['name']}_{optimizer_name}"
 
             problem_instance = suites[optimizer_name].get_problem(problem_id)
             problem_instance.observe_with(observers[optimizer_name])
@@ -215,7 +217,7 @@ def run_comparison(
             stats[optimizer_name] = results["fitness_history"]
             dump_stats(
                 results[0] if isinstance(results, tuple) else results,
-                optimizer_name,
+                result_folder_name,
                 problem_id,
                 max_fe,
                 options.get("n_checkpoints"),
@@ -224,7 +226,7 @@ def run_comparison(
             )
 
         dump_extreme_stats(
-            optimizer_portfolio,
+            options.get("name"),
             stats,
             problem_id,
             max_fe,
diff --git a/dynamicalgorithmselection/experiments/utils.py b/dynamicalgorithmselection/experiments/utils.py
index a14f481..18e489f 100644
--- a/dynamicalgorithmselection/experiments/utils.py
+++ b/dynamicalgorithmselection/experiments/utils.py
@@ -79,7 +79,6 @@ def get_suite(mode: str, train: bool, dim: Optional[int]):
         ]
 
     elif mode == "LOIO":
-        np.random.seed(1234)
         train_problem_ids = np.random.choice(
             all_problem_ids, size=2 * len(all_problem_ids) // 3, replace=False
         )
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index 472b50d..924b87c 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -2,13 +2,16 @@
 import os
 import pickle
 import shutil
+from random import seed as set_random_seed
 from typing import List, Type, Dict, Any
 import cocopp
 import neat
+import numpy as np
 import torch
 import wandb
 
 from dynamicalgorithmselection.agents.RLDAS_agent import RLDASAgent
+from dynamicalgorithmselection.agents.RLDAS_random_agent import RLDASRandomAgent
 from dynamicalgorithmselection.agents.neuroevolution_agent import NeuroevolutionAgent
 from dynamicalgorithmselection.agents.policy_gradient_agent import PolicyGradientAgent
 from dynamicalgorithmselection.agents.random_agent import RandomAgent
@@ -22,6 +25,7 @@
     "neuroevolution": NeuroevolutionAgent,
     "policy-gradient": PolicyGradientAgent,
     "RL-DAS": RLDASAgent,
+    "RL-DAS-random": RLDASRandomAgent,
 }
 
 
@@ -163,6 +167,13 @@ def parse_arguments():
         help="id of method used to compute reward",
     )
 
+    parser.add_argument(
+        "-S",
+        "--seed",
+        type=int,
+        default=42,
+        help="seed",
+    )
     return parser.parse_args()
 
 
@@ -199,6 +210,7 @@ def common_options(args) -> Dict[str, Any]:
         "dimensionality": args.dimensionality,
         "n_epochs": args.n_epochs,
         "reward_option": args.reward_option,
+        "seed": args.seed,
     }
     return options
 
@@ -291,28 +303,45 @@ def run_CV(args, action_space):
 
 def run_baselines(args, action_space):
     for optimizer in action_space:
-        if os.path.exists(os.path.join("exdata", optimizer.__name__)):
-            shutil.rmtree(os.path.join("exdata", optimizer.__name__))
-
-        print(f"--- Running Baseline: {optimizer.__name__} ---")
-
-        coco_bbob_experiment(
-            None,
-            {
-                "optimizer_portfolio": [optimizer],
-                "baselines": True,
-            }
-            | common_options(args),
-            name=optimizer.__name__,
-            evaluations_multiplier=args.fe_multiplier,
-            train=False,
-            agent=None,
+        if os.path.exists(
+            os.path.join("exdata", f"{args.name}_baselines_{optimizer.__name__}")
+        ):
+            shutil.rmtree(
+                os.path.join("exdata", f"{args.name}_baselines_{optimizer.__name__}")
+            )
+
+    coco_bbob_experiment(
+        None,
+        {
+            "optimizer_portfolio": action_space,
+            "baselines": True,
+        }
+        | common_options(args),
+        name=f"{args.name}_baselines",
+        evaluations_multiplier=args.fe_multiplier,
+        train=False,
+        agent=None,
+    )
+    for optimizer in action_space:
+        cocopp.main(
+            os.path.join("exdata", f"{args.name}_baselines_{optimizer.__name__}")
         )
-        cocopp.main(os.path.join("exdata", optimizer.__name__))
+
+
+def set_seed(seed):
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    # Torch RNG
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    # Python RNG
+    np.random.seed(seed)
+    set_random_seed(seed)
 
 
 def main():
     args = parse_arguments()
+    set_seed(args.seed)
     print_info(args)
     available_optimizers = optimizers.available_optimizers
     action_space: List[Type[Optimizer]] = []
@@ -321,14 +350,12 @@ def main():
             raise ValueError(f'Unknown optimizer "{optimizer}"')
         else:
             action_space.append(available_optimizers[optimizer])
-    if not os.path.exists("models"):
-        os.mkdir("models")
-    if not os.path.exists("results"):
-        os.mkdir("results")
+    os.makedirs("models", exist_ok=True)
+    os.makedirs("results", exist_ok=True)
     if args.mode.startswith("CV"):
         run_CV(args, action_space)
     else:
-        if args.agent != "random" and args.mode != "baselines":
+        if args.agent not in ["random", "RL-DAS-random"] and args.mode != "baselines":
             run_training(args, action_space)
         if args.test and args.mode != "baselines":
             test(args, action_space)
diff --git a/runner.slurm b/runner.slurm
index 42f89c8..0066523 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -7,54 +7,87 @@
 #SBATCH --mem=32G
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
-#SBATCH --array=0-15           # 0-6 (LOIO), 7-13 (LOPO), 14-15 (Random and baselines)
+#SBATCH --array=0-15           # 16 tasks total
 
 CDB_VAL=${1:-1.5}
 
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+# Store the remaining arguments as an array called PORTFOLIO.
+# If no additional arguments were provided, fall back to your default.
+if [ "$#" -eq 0 ]; then
+    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
+else
+    PORTFOLIO=("$@")
+fi
+
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
 source "$ENV_PATH"
-mkdir -p logs  # Ensure logs directory exists
+mkdir -p logs
 
-Determine the Mode based on the Array ID
-if [[ $SLURM_ARRAY_TASK_ID -le 6 || $SLURM_ARRAY_TASK_ID -ge 14 ]]; then
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     MODE="CV-LOIO"
-    TASK_ID=$SLURM_ARRAY_TASK_ID
-else
-    MODE="CV-LOPO"
-    TASK_ID=$((SLURM_ARRAY_TASK_ID - 7))
-fi
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
 
-# Map the Task ID (0-6) to specific experiments
-DIMS=(2 3 5 10 20 40)
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
-if [ $TASK_ID -le 5 ]; then
-    # --- DIMENSION SPECIFIC RUNS ---
-    DIM=${DIMS[$TASK_ID]}
+# 2. Dimension-specific CV-LOPO (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP'  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${DIM} \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' -r custom --mode $MODE --dimensionality $DIM \
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
-elif [ $TASK_ID -eq 6 ]; then
-    # --- MULTIDIMENSIONAL ---
+# 3. Dimension-specific RL-DAS-random (Indices 8-11)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
+    echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_${DIM} \
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --cdb $CDB_VAL --agent RL-DAS-random --dimensionality $DIM
+
+# 4. Multidimensional CV-LOIO (Index 12)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 12 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 5. Multidimensional CV-LOPO (Index 13)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 13 ]]; then
+    MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_$MODE \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
-elif [ $TASK_ID -eq 14 ]; then
-    # --- RANDOM AGENT ---
-    echo "Running Mode: Random Agent"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --cdb $CDB_VAL --agent random
+# 6. Global Random Agent (Index 14)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 14 ]]; then
+    echo "Running Mode: Global Random Agent"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random
 
-elif [ $TASK_ID -eq 15 ]; then
-    # --- BASELINES ---
+# 7. Global Baselines (Index 15)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 15 ]]; then
     echo "Running Mode: Baselines"
     python3 dynamicalgorithmselection/main.py BASELINES \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --agent random --mode baselines
+      -p "${PORTFOLIO[@]}" --agent random --mode baselines
 fi
\ No newline at end of file
diff --git a/tests/test_experiment.py b/tests/test_experiment.py
index 519af5d..9d88b24 100644
--- a/tests/test_experiment.py
+++ b/tests/test_experiment.py
@@ -121,18 +121,15 @@ def test_dump_extreme_stats(
         self, mock_json_dump, mock_file, mock_get_extreme, mock_get_checkpoints
     ):
         stats: dict[str, list[Any]] = {"Opt1": [], "Opt2": []}
-        portfolio = cast(
-            list[Type[Optimizer]],
-            [MagicMock(__name__="Opt1"), MagicMock(__name__="Opt2")],
-        )
 
         mock_get_extreme.return_value = ({"best": 1}, {"worst": 0})
+        case_name = "OPT1_OPT2_OPT3"
 
-        dump_extreme_stats(portfolio, stats, "p1", 100, 5, 10, 0.5)
+        dump_extreme_stats(case_name, stats, "p1", 100, 5, 10, 0.5)
 
         self.assertEqual(mock_file.call_count, 2)
         self.assertEqual(mock_json_dump.call_count, 2)
 
         args_list = mock_file.call_args_list
-        self.assertIn("Opt1_Opt2_best", args_list[0][0][0])
-        self.assertIn("Opt1_Opt2_worst", args_list[1][0][0])
+        self.assertIn(f"{case_name}_best", args_list[0][0][0])
+        self.assertIn(f"{case_name}_worst", args_list[1][0][0])

From e94f73f4226f4069e17000023e4f079febb25472 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sat, 21 Feb 2026 14:41:59 +0100
Subject: [PATCH 14/20] fix paper-implementation differences for RL-DAS and its
 sub-optimizers

---
 compare2ELA.slurm                             |  67 ++++
 .../agents/RLDAS_agent.py                     |   8 +-
 .../agents/RLDAS_random_agent.py              |   8 +-
 .../optimizers/DE/JDE21.py                    | 375 ++++++++++++++----
 .../optimizers/DE/MADDE.py                    |  76 +++-
 .../optimizers/DE/NL_SHADE_RSP.py             | 170 +++++---
 runner.slurm                                  |  50 ++-
 7 files changed, 576 insertions(+), 178 deletions(-)
 create mode 100644 compare2ELA.slurm

diff --git a/compare2ELA.slurm b/compare2ELA.slurm
new file mode 100644
index 0000000..18bc73e
--- /dev/null
+++ b/compare2ELA.slurm
@@ -0,0 +1,67 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH --array=0-13           # 14 tasks total
+
+CDB_VAL=${1:-1.5}
+
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+# Store the remaining arguments as an array called PORTFOLIO.
+# If no additional arguments were provided, fall back to your default.
+if [ "$#" -eq 0 ]; then
+    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
+else
+    PORTFOLIO=("$@")
+fi
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM}_ELA \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 2. Dimension-specific CV-LOPO (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM}_ELA \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 4. Multidimensional CV-LOIO (Index 12)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 12 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_ELA \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 5. Multidimensional CV-LOPO (Index 13)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 13 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_ELA \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+fi
\ No newline at end of file
diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index ada85ba..14835ee 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -90,7 +90,7 @@ def _update_ah_history(
         self.alg_usage_counts[alg_idx] += 1
 
     def _save_context(self, optimizer, alg_name):
-        common_attrs = ["memory_f", "memory_cr", "archive", "archive_fitness"]
+        common_attrs = ["MF", "MCr", "archive"]
         for attr in common_attrs:
             if hasattr(optimizer, attr):
                 self.context_memory["Common"][attr] = getattr(optimizer, attr)
@@ -103,13 +103,11 @@ def _save_context(self, optimizer, alg_name):
                 "ageLmt",
                 "eps",
                 "myEqs",
-                "successful_f",
-                "successful_cr",
             ]
         elif "MadDE" in alg_name:
-            specific_attrs = ["pm", "pbest", "pqBX"]
+            specific_attrs = ["pm", "pbest", "PqBX"]
         elif "NL_SHADE" in alg_name:
-            specific_attrs = ["nA", "pA"]
+            specific_attrs = ["NA", "pa"]
 
         for attr in specific_attrs:
             if hasattr(optimizer, attr):
diff --git a/dynamicalgorithmselection/agents/RLDAS_random_agent.py b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
index 4049a98..74b4d7c 100644
--- a/dynamicalgorithmselection/agents/RLDAS_random_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
@@ -46,7 +46,7 @@ def _update_ah_history(
         self.alg_usage_counts[alg_idx] += 1
 
     def _save_context(self, optimizer, alg_name):
-        common_attrs = ["memory_f", "memory_cr", "archive", "archive_fitness"]
+        common_attrs = ["MF", "MCr", "archive"]
         for attr in common_attrs:
             if hasattr(optimizer, attr):
                 self.context_memory["Common"][attr] = getattr(optimizer, attr)
@@ -59,13 +59,11 @@ def _save_context(self, optimizer, alg_name):
                 "ageLmt",
                 "eps",
                 "myEqs",
-                "successful_f",
-                "successful_cr",
             ]
         elif "MadDE" in alg_name:
-            specific_attrs = ["pm", "pbest", "pqBX"]
+            specific_attrs = ["pm", "pbest", "PqBX"]
         elif "NL_SHADE" in alg_name:
-            specific_attrs = ["nA", "pA"]
+            specific_attrs = ["NA", "pa"]
 
         for attr in specific_attrs:
             if hasattr(optimizer, attr):
diff --git a/dynamicalgorithmselection/optimizers/DE/JDE21.py b/dynamicalgorithmselection/optimizers/DE/JDE21.py
index 3042efd..8512eb1 100644
--- a/dynamicalgorithmselection/optimizers/DE/JDE21.py
+++ b/dynamicalgorithmselection/optimizers/DE/JDE21.py
@@ -1,20 +1,49 @@
 import numpy as np
-
 from dynamicalgorithmselection.optimizers.DE.DE import DE
 
 
 class JDE21(DE):
+    start_condition_parameters = ["x", "y", "F", "Cr"]
+
     def __init__(self, problem, options):
         super().__init__(problem, options)
+
+        # Mathematical minimum population limit to survive RL starvation
+        self.Nmin = 4
+
+        # Population parameters
+        # We start with the base sizes defined in the j21 paper,
+        # though set_data/initialize will override this if the RL agent injects a different size.
+        self.bNP = 160
         self.sNP = 10
-        self.bNP = self.n_individuals - self.sNP
+        self.n_individuals = self.bNP + self.sNP
+
+        # Stagnation and Reset parameters
         self.age = 0
-        self.tao1 = self.tao2 = 0.1
-        self.Finit, self.CRinit = 0.5, 0.9
+        self.eps = 1e-12  # Tolerance for fitness equality
+        self.MyEps = 0.25  # Threshold ratio (25%) for reset
+        self.reductions_done = 0
+
+        # Self-adaptation probabilities
+        self.tau1 = 0.1
+        self.tau2 = 0.1
+        self.Finit = 0.5
+        self.CRinit = 0.9
+
+        # Parameter Limits (Big Population)
+        self.Fl_b = 0.1
+        self.CRl_b = 0.0
+        self.CRu_b = 1.1
+
+        # Parameter Limits (Small Population)
+        self.Fl_s = 0.17
+        self.CRl_s = 0.1
+        self.CRu_s = 0.8
+
+        # Shared Upper Bound for F
         self.Fu = 1.1
-        self.Fl_b, self.CRu_b = 0.1, 1.1
-        self.Nmax = self.n_individuals
-        self.Nmin = 30
+
+        self.F, self.Cr = None, None
 
     def initialize(self, args=None, x=None, y=None):
         if x is None:
@@ -23,75 +52,265 @@ def initialize(self, args=None, x=None, y=None):
                 self.initial_upper_boundary,
                 (self.n_individuals, self.ndim_problem),
             )
+        else:
+            self.n_individuals = x.shape[0]
+            self.sNP = min(10, max(1, self.n_individuals // 4))
+            self.bNP = self.n_individuals - self.sNP
         if y is None:
             y = np.array([self._evaluate_fitness(xi, args) for xi in x])
-        self.F = np.full(self.n_individuals, self.Finit)
-        self.Cr = np.full(self.n_individuals, self.CRinit)
+        self.F = np.full(self.n_individuals, self.Finit) if self.F is None else self.F
+        self.Cr = (
+            np.full(self.n_individuals, self.CRinit) if self.Cr is None else self.Cr
+        )
         return x, y
 
-    def _mutate_cross_select(self, x, y, indices, args=None):
-        NP_sub = len(indices)
-        if NP_sub < 4:
-            return x, y
-
-        # Self-adaptation
-        new_F = np.where(
-            self.rng_optimization.random(NP_sub) < self.tao1,
-            self.rng_optimization.random(NP_sub) * self.Fu + self.Fl_b,
-            self.F[indices],
+    def _reflect_bounds(self, v):
+        v = np.where(
+            v < self.initial_lower_boundary, 2 * self.initial_lower_boundary - v, v
         )
-        new_Cr = np.where(
-            self.rng_optimization.random(NP_sub) < self.tao2,
-            self.rng_optimization.random(NP_sub) * self.CRu_b,
-            self.Cr[indices],
+
+        v = np.where(
+            v > self.initial_upper_boundary, 2 * self.initial_upper_boundary - v, v
         )
 
-        # Mutation & Crossover
-        # Simplified vectorized parent selection
-        r1, r2, r3 = [self.rng_optimization.choice(indices, NP_sub) for _ in range(3)]
-        vs = x[r1] + new_F[:, np.newaxis] * (x[r2] - x[r3])
-        vs = np.clip(vs, self.lower_boundary, self.upper_boundary)
+        v = np.clip(v, self.initial_lower_boundary, self.initial_upper_boundary)
+        return v
 
-        mask = (
-            self.rng_optimization.random((NP_sub, self.ndim_problem))
-            < new_Cr[:, np.newaxis]
-        )
-        us = np.where(mask, vs, x[indices])
+    def _check_population_reduction(self, x, y):
+        # SYNCHRONIZATION
+        actual_size = len(y)
+        if actual_size != self.n_individuals:
+            self.n_individuals = actual_size
+            self.sNP = min(10, max(1, actual_size // 4))
+            self.bNP = self.n_individuals - self.sNP
 
-        new_y = np.array([self._evaluate_fitness(ui, args) for ui in us])
+            if len(self.F) != actual_size:
+                self.F = np.full(actual_size, self.Finit)
+                self.Cr = np.full(actual_size, self.CRinit)
 
-        # Crowding Selection
-        dists = np.linalg.norm(
-            x[indices][:, np.newaxis, :] - us[np.newaxis, :, :], axis=2
-        )
-        closest_sub_idx = np.argmin(dists, axis=0)
-        closest_global_idx = indices[closest_sub_idx]
-
-        improved = new_y < y[closest_global_idx]
-        for i, idx in enumerate(closest_global_idx):
-            if improved[i]:
-                x[idx], y[idx] = us[i], new_y[i]
-                self.F[idx], self.Cr[idx] = new_F[i], new_Cr[i]
-                self.age = 0
+        # REDUCTION LOGIC
+        thresholds = [0.25, 0.50, 0.75]
+        if self.reductions_done < len(thresholds):
+            progress = self.n_function_evaluations / self.max_function_evaluations
+            if progress >= thresholds[self.reductions_done]:
+                # Calculate the standard halved size for the big population
+                new_bNP = self.bNP // 2
+
+                min_allowed_bNP = max(1, self.Nmin - self.sNP)
+                new_bNP = max(new_bNP, min_allowed_bNP)
+
+                # Only perform the competition if we are actually shrinking the array
+                if new_bNP < self.bNP:
+                    part1_idx = np.arange(new_bNP)
+                    part2_idx = np.arange(new_bNP, 2 * new_bNP)
+
+                    keep_idx = []
+                    for i, j in zip(part1_idx, part2_idx):
+                        if j < self.bNP:
+                            keep_idx.append(i if y[i] <= y[j] else j)
+                        else:
+                            keep_idx.append(i)
+
+                    keep_b_idx = np.array(keep_idx, dtype=int)
+                    s_idx = np.arange(int(self.bNP), int(self.n_individuals), dtype=int)
+
+                    x = np.concatenate([x[keep_b_idx], x[s_idx]], axis=0)
+                    y = np.concatenate([y[keep_b_idx], y[s_idx]], axis=0)
+                    self.F = np.concatenate([self.F[keep_b_idx], self.F[s_idx]], axis=0)
+                    self.Cr = np.concatenate(
+                        [self.Cr[keep_b_idx], self.Cr[s_idx]], axis=0
+                    )
+
+                    # Update sizes for the newly reduced population
+                    self.bNP = int(len(keep_b_idx))
+                    self.n_individuals = int(len(y))
+
+                self.reductions_done += 1
+
+        return x, y
+
+    def _evolve_population(self, x, y, args, is_big=True):
+        if self.n_individuals == 0:
+            return x, y
+
+        start_idx = 0 if is_big else self.bNP
+        end_idx = self.bNP if is_big else self.n_individuals
+
+        f_low = self.Fl_b if is_big else self.Fl_s
+        cr_bound = self.CRu_b if is_big else self.CRu_s
+        cr_low = self.CRl_b if is_big else self.CRl_s
+
+        for i in range(start_idx, end_idx):
+            # Parameter Adaptation
+            new_F = (
+                self.rng_optimization.random() * self.Fu + f_low
+                if self.rng_optimization.random() < self.tau1
+                else self.F[i]
+            )
+            new_Cr = (
+                self.rng_optimization.random() * cr_bound + cr_low
+                if self.rng_optimization.random() < self.tau2
+                else self.Cr[i]
+            )
+
+            # Mutation Pool Selection with Extreme RL Fallbacks
+            if is_big:
+                progress = self.n_function_evaluations / self.max_function_evaluations
+                ms_size = 1 if progress <= 1 / 3 else 2 if progress <= 2 / 3 else 3
+
+                available_sNP = self.n_individuals - self.bNP
+                ms_size = min(ms_size, available_sNP)
+
+                if ms_size > 0:
+                    ms_indices = self.rng_optimization.choice(
+                        range(self.bNP, self.n_individuals), ms_size, replace=False
+                    )
+                else:
+                    ms_indices = np.array([], dtype=int)
+
+                pool_r2_r3 = np.concatenate([np.arange(self.bNP), ms_indices])
+
+                # Helper to safely pick a target or fallback sequentially
+                def safe_choice(preferred_pool, exclude):
+                    valid = [idx for idx in preferred_pool if idx not in exclude]
+                    if not valid:
+                        valid = [
+                            idx
+                            for idx in range(self.n_individuals)
+                            if idx not in exclude
+                        ]
+                    return self.rng_optimization.choice(valid) if valid else i
+
+                r1 = safe_choice(range(self.bNP), [i])
+                r2 = safe_choice(pool_r2_r3, [i, r1])
+                r3 = safe_choice(pool_r2_r3, [i, r1, r2])
+
+            else:
+                pool = [idx for idx in range(self.bNP, self.n_individuals) if idx != i]
+
+                # Normal behavior: P_s has enough individuals
+                if len(pool) >= 3:
+                    r1, r2, r3 = self.rng_optimization.choice(pool, 3, replace=False)
+                else:
+                    # FALLBACK 1: Try borrowing from the full population without replacement
+                    full_pool = [idx for idx in range(self.n_individuals) if idx != i]
+                    if len(full_pool) >= 3:
+                        r1, r2, r3 = self.rng_optimization.choice(
+                            full_pool, 3, replace=False
+                        )
+                    else:
+                        # EXTREME FALLBACK: Population is < 4. We MUST allow replacement.
+                        # If population is literally 1, it will just pick `i` three times.
+                        full_pool_with_i = list(range(self.n_individuals))
+                        r1, r2, r3 = self.rng_optimization.choice(
+                            full_pool_with_i, 3, replace=True
+                        )
+
+            # Mutation and Reflection
+            v = x[r1] + new_F * (x[r2] - x[r3])
+            v = self._reflect_bounds(v)
+
+            # Crossover (Rotational Invariant Strategy)
+            if new_Cr > 1.0:
+                u = v.copy()
+            else:
+                u = x[i].copy()
+                j_rand = self.rng_optimization.integers(0, self.ndim_problem)
+                mask = self.rng_optimization.random(self.ndim_problem) <= new_Cr
+                mask[j_rand] = True
+                u[mask] = v[mask]
+
+            # Evaluate
+            new_y = self._evaluate_fitness(u, args)
+
+            # Crowding & Selection
+            if is_big:
+                # Euclidean distance crowding
+                dists = np.sum((x[: self.bNP] - u) ** 2, axis=1)
+                target = np.argmin(dists)
+            else:
+                target = i
+
+            if new_y <= y[target]:
+                x[target], y[target] = u, new_y
+                self.F[target], self.Cr[target] = new_F, new_Cr
+
+                if is_big and new_y < self.best_so_far_y:
+                    self.best_so_far_y = new_y
+                    self.age = 0
+            elif is_big and target == i:
+                self.age += 1
 
-        if not np.any(improved):
-            self.age += NP_sub
         return x, y
 
     def iterate(self, x=None, y=None, args=None):
-        bNP = x.shape[0] - self.sNP
-        # Evolution of big population
-        if x is None or y is None:
-            raise ValueError("x and y must be provided for iteration.")
-        x, y = self._mutate_cross_select(x, y, np.arange(bNP), args)
+        x, y = self._check_population_reduction(x, y)
+
+        # P_b Reinitialization Check
+        if self.bNP > 0:
+            best_b_y = np.min(y[: self.bNP])
+            eqs_b = np.sum(np.abs(y[: self.bNP] - best_b_y) < self.eps)
+            age_limit = 0.1 * self.max_function_evaluations
+
+            if (eqs_b >= self.bNP * self.MyEps) or (self.age >= age_limit):
+                x[: self.bNP] = self.rng_initialization.uniform(
+                    self.initial_lower_boundary,
+                    self.initial_upper_boundary,
+                    (self.bNP, self.ndim_problem),
+                )
+                y[: self.bNP] = np.array(
+                    [self._evaluate_fitness(xi, args) for xi in x[: self.bNP]]
+                )
+                self.F[: self.bNP] = self.Finit
+                self.Cr[: self.bNP] = self.CRinit
+                self.age = 0
+
+        # P_s Reinitialization Check
+        if self.sNP > 0:
+            # Safely find the best in the small population
+            best_s_idx = self.bNP + np.argmin(y[self.bNP :])
+            eqs_s = np.sum(np.abs(y[self.bNP :] - y[best_s_idx]) < self.eps)
+
+            if eqs_s >= self.sNP * self.MyEps:
+                best_x_s = x[best_s_idx].copy()
+                best_y_s = y[best_s_idx]
+
+                x[self.bNP :] = self.rng_initialization.uniform(
+                    self.initial_lower_boundary,
+                    self.initial_upper_boundary,
+                    (self.sNP, self.ndim_problem),
+                )
+                y[self.bNP :] = np.array(
+                    [self._evaluate_fitness(xi, args) for xi in x[self.bNP :]]
+                )
+                self.F[self.bNP :] = self.Finit
+                self.Cr[self.bNP :] = self.CRinit
 
-        # Evolution of small population (repeated)
-        small_idx = np.arange(bNP, x.shape[0])
-        for _ in range(bNP // self.sNP):
-            x, y = self._mutate_cross_select(x, y, small_idx, args)
+                # Elitism: retain the best small-population individual
+                x[self.bNP], y[self.bNP] = best_x_s, best_y_s
 
-        progress = self.n_function_evaluations / self.max_function_evaluations
-        self.n_individuals = int(round(self.Nmax - progress * (self.Nmax - self.Nmin)))
+        # Big Population Generation
+        if self.bNP > 0:
+            x, y = self._evolve_population(x, y, args, is_big=True)
+
+        # Migration
+        # The best individual migrates from P_b to P_s
+        if self.bNP > 0 and self.sNP > 0:
+            best_overall_idx = np.argmin(y)
+            if best_overall_idx < self.bNP:
+                worst_s_idx = self.bNP + np.argmax(y[self.bNP :])
+                x[worst_s_idx] = x[best_overall_idx].copy()
+                y[worst_s_idx] = y[best_overall_idx]
+                self.F[worst_s_idx] = self.F[best_overall_idx]
+                self.Cr[worst_s_idx] = self.Cr[best_overall_idx]
+
+        # Small Population Generation (repeats m times)
+        if self.sNP > 0:
+            # m is traditionally bNP // sNP, but must fallback cleanly if bNP is 0
+            m = self.bNP // self.sNP if self.bNP > 0 else 1
+            m = max(1, m)  # Ensure it executes at least once if P_s is all we have
+            for _ in range(m):
+                x, y = self._evolve_population(x, y, args, is_big=False)
 
         self._n_generations += 1
         return x, y
@@ -101,41 +320,31 @@ def optimize(self, fitness_function=None, args=None):
         x, y = self.initialize(
             args, self.start_conditions.get("x"), self.start_conditions.get("y")
         )
-        idx = 0
+
+        self.best_so_far_y = np.min(y)
+
         while True:
             old_evals = self.n_function_evaluations
-
             x, y = self.iterate(x, y, args)
-            self.results.update(
-                {
-                    "x": x,
-                    "y": y,
-                }
-            )
-            if self._check_terminations():
-                break
-            idx += 1
-            if self.n_function_evaluations == old_evals:
+            self.results.update({"x": x, "y": y})
+            if self._check_terminations() or self.n_function_evaluations == old_evals:
                 break
 
         return self._collect(fitness, y)
 
-    def set_data(
-        self,
-        x=None,
-        y=None,
-        *args,
-        **kwargs,
-    ):
+    def set_data(self, x=None, y=None, *args, **kwargs):
         if x is None or y is None:
             self.start_conditions = {"x": None, "y": None}
         elif not isinstance(y, np.ndarray):
-            loc = locals()
             self.start_conditions = {}
         else:
             indices = np.argsort(y)[: self.n_individuals]
-            start_conditions = {}
-            start_conditions.update({"x": x[indices], "y": y[indices]})
-            self.start_conditions = start_conditions
+            self.start_conditions = {"x": x[indices], "y": y[indices]}
+            self.Cr = kwargs.get("Cr")
+            if self.Cr is not None:
+                self.Cr = self.Cr[indices]
+            self.F = kwargs.get("F")
+            if self.F is not None:
+                self.F = self.F[indices]
         self.best_so_far_x = kwargs.get("best_x", None)
         self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/MADDE.py b/dynamicalgorithmselection/optimizers/DE/MADDE.py
index 8ffa06c..2d55ac6 100644
--- a/dynamicalgorithmselection/optimizers/DE/MADDE.py
+++ b/dynamicalgorithmselection/optimizers/DE/MADDE.py
@@ -7,8 +7,10 @@ class MADDE(DE):
 
     def __init__(self, problem, options):
         super().__init__(problem, options)
+        D = self.ndim_problem
         # Constants from MadDE paper/original code
-        self.Nmax = self.n_individuals if self.n_individuals else 170
+        self.Nmax = int(np.round(2 * (D**2)))
+        # self.Nmax = self.n_individuals if self.n_individuals else 170
         self.Nmin = options.get("Nmin", 4)
         self.p = 0.18
         self.PqBX = 0.01
@@ -17,11 +19,12 @@ def __init__(self, problem, options):
         self.pm = np.ones(3) / 3
 
         # Archive and Memory
-        self.NA = int(self.Nmax * 2.1)
+        self.A_rate = 2.30
+        self.NA = int(np.round(self.A_rate * self.Nmax))
         self.archive = np.empty((0, self.ndim_problem))
 
         # Memory for F and Cr
-        self.memory_size = 20  # Standard for SHADE-based
+        self.memory_size = 10 * D
         self.MF = np.ones(self.memory_size) * 0.2
         self.MCr = np.ones(self.memory_size) * 0.2
         self.k_idx = 0
@@ -39,7 +42,14 @@ def initialize(self, args=None, x=None, y=None):
 
     def _choose_F_Cr(self, NP):
         indices = self.rng_optimization.integers(0, self.memory_size, size=NP)
-        Cr = self.rng_optimization.normal(loc=self.MCr[indices], scale=0.1, size=NP)
+
+        Cr = np.zeros(NP)
+        for i, idx in enumerate(indices):
+            if self.MCr[idx] == -1.0:  # Check for terminal state
+                Cr[i] = 0.0
+            else:
+                Cr[i] = self.rng_optimization.normal(loc=self.MCr[idx], scale=0.1)
+
         Cr = np.clip(Cr, 0, 1)
 
         # Cauchy-like sampling for F
@@ -93,11 +103,11 @@ def iterate(self, x=None, y=None, args=None):
         q = 2 * self.p - self.p * FEs / MaxFEs
         Fa = 0.5 + 0.5 * FEs / MaxFEs
 
-        # 1. Parameter sampling
+        # Parameter sampling
         Cr, F = self._choose_F_Cr(NP)
         mu = self.rng_optimization.choice(3, size=NP, p=self.pm)
 
-        # 2. Mutation
+        # Mutation
         v = self._mutate(x, y, F, mu, q, Fa)
 
         # Boundary handling (MadDE specific)
@@ -105,7 +115,7 @@ def iterate(self, x=None, y=None, args=None):
         v = np.where(v < low, (x + low) / 2, v)
         v = np.where(v > high, (x + high) / 2, v)
 
-        # 3. Crossover (Binomial + qBX)
+        # Crossover (Binomial + qBX)
         u = np.zeros_like(x)
         rvs = self.rng_optimization.random(NP)
 
@@ -130,7 +140,7 @@ def iterate(self, x=None, y=None, args=None):
             ]
             u[qu_idx] = self._binomial(cross_qbest, v[qu_idx], Cr[qu_idx])
 
-        # 4. Evaluation and Selection
+        # Evaluation and Selection
         new_y = np.array([self._evaluate_fitness(ui, args) for ui in u])
         optim = new_y < y
 
@@ -151,8 +161,7 @@ def iterate(self, x=None, y=None, args=None):
 
             x[optim], y[optim] = u[optim], new_y[optim]
 
-        # 5. NLPSR
-        x, y = self._nlpsr(x, y)
+        x, y = self._lpsr(x, y)
 
         self._n_generations += 1
         return x, y
@@ -171,19 +180,35 @@ def _update_pm(self, df, mu):
         else:
             self.pm = np.ones(3) / 3
 
-    def _nlpsr(self, x, y):
+    def _lpsr(self, x, y):
         FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
-        new_NP = int(
-            np.round(
-                self.Nmax
-                + (self.Nmin - self.Nmax) * np.power(FEs / MaxFEs, 1 - FEs / MaxFEs)
-            )
-        )
+
+        # Prevent the ratio from exceeding 1.0 if FEs overshoots MaxFEs
+        ratio = min(1.0, FEs / MaxFEs)
+
+        # LPSR formula: N_G = round(N_max - (N_max - N_min) * ratio)
+        new_NP = int(np.round(self.Nmax - (self.Nmax - self.Nmin) * ratio))
+
+        # Clamp to ensure population never drops below Nmin
+        new_NP = max(self.Nmin, new_NP)
+
         if new_NP < x.shape[0]:
             idx = np.argsort(y)[:new_NP]
             x, y = x[idx], y[idx]
             self.n_individuals = new_NP
-            self.NA = int(max(new_NP * 2.1, self.Nmin))
+
+            # Dynamically prune the archive size based on the new population
+            self.NA = int(np.round(self.A_rate * new_NP))
+
+            # Ensure NA doesn't go negative (redundant with the max clamp above, but safe)
+            self.NA = max(0, self.NA)
+
+            if len(self.archive) > self.NA:
+                self.archive = self.archive[
+                    self.rng_optimization.choice(
+                        len(self.archive), self.NA, replace=False
+                    )
+                ]
         return x, y
 
     # Helper mutation methods (Vectorized)
@@ -222,9 +247,22 @@ def _binomial(self, x, v, Cr):
     def _update_memory(self, SF, SCr, df):
         if len(SF) > 0:
             w = df / (np.sum(df) + 1e-15)
+
+            # Weighted Lehmer mean for F
             self.MF[self.k_idx] = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15)
-            self.MCr[self.k_idx] = np.sum(w * SCr)
+
+            # Terminal condition check for Cr
+            if self.MCr[self.k_idx] == -1.0 or np.max(SCr) == 0:
+                self.MCr[self.k_idx] = -1.0  # Terminal state \perp
+            else:
+                # Weighted Lehmer mean for Cr
+                self.MCr[self.k_idx] = np.sum(w * (SCr**2)) / (np.sum(w * SCr) + 1e-15)
+
             self.k_idx = (self.k_idx + 1) % self.memory_size
+        else:
+            # Memory reset rule if no successful trials
+            self.MF[self.k_idx] = 0.5
+            self.MCr[self.k_idx] = 0.5
 
     def optimize(self, fitness_function=None, args=None):
         fitness = super().optimize(fitness_function)
diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
index e670a2c..0842d85 100644
--- a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
+++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
@@ -3,15 +3,14 @@
 
 
 class NL_SHADE_RSP(DE):
-    start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx"]
+    start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx", "pa"]
 
     def __init__(self, problem, options):
         super().__init__(problem, options)
-        self.Nmax = self.n_individuals if self.n_individuals else 170
-        self.Nmin = options.get("Nmin", 30)
+        self.Nmax = options.get("Nmax", 30 * self.ndim_problem)
+        self.Nmin = options.get("Nmin", 4)
         self.n_individuals = self.Nmax
 
-        self.pb = 0.4
         self.pa = 0.5
 
         # Archive
@@ -49,9 +48,11 @@ def _choose_F_Cr(self, NP):
         cauchy_locs = self.MF[ind_r]
         F = self._sample_cauchy(cauchy_locs, 0.1, NP)
         # Symmetry correction for negative values
-        while np.any(F <= 0):
+        attempts = 0
+        while np.any(F <= 0) and attempts < 100:
             idx = np.where(F <= 0)[0]
             F[idx] = self._sample_cauchy(cauchy_locs[idx], 0.1, len(idx))
+            attempts += 1
         return Cr, np.minimum(1, F)
 
     def _update_memory(self, SF, SCr, df):
@@ -67,72 +68,147 @@ def iterate(self, x=None, y=None, args=None):
         if x is None or y is None:
             raise ValueError("x and y must be provided for iteration.")
         NP = x.shape[0]
+
+        # Sort population according to fitness for RSP and Crossover mapping
+        sort_idx = np.argsort(y)
+        x = x[sort_idx]
+        y = y[sort_idx]
+
         Cr, F = self._choose_F_Cr(NP)
 
-        # Mutation: current-to-pbest/1 with archive
-        pb_upper = int(max(2, NP * self.pb))
-        pbest_idx = np.argsort(y)[:pb_upper]
-        x_pbest = x[self.rng_optimization.choice(pbest_idx, NP)]
+        # Sort Cr so better individuals get smaller Cr (for exponential crossover)
+        Cr = np.sort(Cr)
 
-        r1 = self.rng_optimization.integers(0, NP, size=NP)
-        # Ensure distinct r1
-        for i in range(NP):
-            while r1[i] == i:
-                r1[i] = self.rng_optimization.integers(0, NP)
+        # Adaptive greediness pb (from 0.4 to 0.2)
+        nfe_ratio = self.n_function_evaluations / self.max_function_evaluations
+        pb = 0.4 - 0.2 * nfe_ratio
+        pb_upper = max(2, int(np.round(NP * pb)))
+
+        # Adaptive Cr_b for binomial crossover
+        Cr_b = 0.0 if nfe_ratio < 0.5 else 2.0 * (nfe_ratio - 0.5)
+
+        # Rank-based probabilities for r2 (RSP)
+        ranks = np.exp(-np.arange(NP) / NP)
+        pr = ranks / np.sum(ranks)
 
-        # Archive vs Population selection for x2
         x2 = np.zeros_like(x)
         use_arc = self.rng_optimization.random(NP) < self.pa
-        arc_idx = np.where(use_arc & (len(self.archive) > 0))[0]
-        pop_idx = np.where(~use_arc | (len(self.archive) == 0))[0]
-
-        if len(pop_idx) > 0:
-            r2 = self.rng_optimization.integers(0, NP, size=len(pop_idx))
-            x2[pop_idx] = x[r2]
-        if len(arc_idx) > 0:
-            r_arc = self.rng_optimization.integers(
-                0, len(self.archive), size=len(arc_idx)
+
+        r1 = np.zeros(NP, dtype=int)
+        r2 = np.zeros(NP, dtype=int)
+        pbest_idx = np.zeros(NP, dtype=int)
+
+        for i in range(NP):
+            # pbest index
+            valid_pbest = [j for j in range(pb_upper) if j != i]
+            pb_i = int(self.rng_optimization.choice(valid_pbest)) if valid_pbest else i
+            pbest_idx[i] = pb_i
+
+            # r1 index (uniform)
+            valid_r1 = [j for j in range(NP) if j not in (i, pb_i)]
+            r1_i = (
+                int(self.rng_optimization.choice(valid_r1))
+                if valid_r1
+                else self.rng_optimization.integers(0, NP)
             )
-            x2[arc_idx] = self.archive[r_arc]
+            r1[i] = r1_i
 
-        # Generate Trials
+            # r2 index (archive or RSP)
+            if use_arc[i] and len(self.archive) > 0:
+                r2[i] = self.rng_optimization.integers(0, len(self.archive))
+                x2[i] = self.archive[r2[i]]
+            else:
+                use_arc[i] = False
+                valid_r2 = [j for j in range(NP) if j not in (i, pb_i, r1_i)]
+
+                if valid_r2:
+                    # Re-normalize RSP probabilities for the remaining valid choices
+                    valid_pr = pr[valid_r2] / np.sum(pr[valid_r2])
+                    r2_i = int(self.rng_optimization.choice(valid_r2, p=valid_pr))
+                else:
+                    r2_i = self.rng_optimization.integers(0, NP)
+
+                r2[i] = r2_i
+                x2[i] = x[r2_i]
+
+        # Generate Trials: current-to-pbest/1
+        x_pbest = x[pbest_idx]
         vs = x + F[:, np.newaxis] * (x_pbest - x) + F[:, np.newaxis] * (x[r1] - x2)
         vs = np.clip(vs, self.lower_boundary, self.upper_boundary)
 
-        # Binomial Crossover
-        jrand = self.rng_optimization.integers(self.ndim_problem, size=NP)
-        mask = self.rng_optimization.random((NP, self.ndim_problem)) < Cr[:, np.newaxis]
-        us = np.where(mask, vs, x)
-        us[np.arange(NP), jrand] = vs[np.arange(NP), jrand]
+        # Dual Crossover Handling
+        us = np.copy(x)
+        for i in range(NP):
+            if self.rng_optimization.random() < 0.5:
+                # Binomial crossover with Cr_b
+                jrand = self.rng_optimization.integers(self.ndim_problem)
+                for j in range(self.ndim_problem):
+                    if self.rng_optimization.random() < Cr_b or j == jrand:
+                        us[i, j] = vs[i, j]
+            else:
+                # Exponential crossover with Cr_i
+                n1 = self.rng_optimization.integers(self.ndim_problem)
+                n2 = 1
+                while self.rng_optimization.random() < Cr[i] and n2 < self.ndim_problem:
+                    n2 += 1
+                for j in range(n2):
+                    idx = (n1 + j) % self.ndim_problem
+                    us[i, idx] = vs[i, idx]
 
         # Selection
         new_y = np.array([self._evaluate_fitness(ui, args) for ui in us])
-        better = new_y < y
+        better_idx = np.where(new_y < y)[0]
+
+        if len(better_idx) > 0:
+            # Update Archive Probability (pa)
+            df = y[better_idx] - new_y[better_idx]
+            arc_used_better = use_arc[better_idx]
+
+            df_A = np.sum(df[arc_used_better])
+            df_P = np.sum(df[~arc_used_better])
+            n_A_total = np.sum(use_arc)
+            n_P_total = NP - n_A_total
+
+            mean_A = df_A / n_A_total if n_A_total > 0 else 0
+            mean_P = df_P / n_P_total if n_P_total > 0 else 0
+
+            if mean_A + mean_P > 0:
+                self.pa = mean_A / (mean_A + mean_P)
+            self.pa = np.clip(self.pa, 0.1, 0.9)  # Clipping rule applied
 
-        if np.any(better):
             # Update Archive
-            success_x = x[better]
+            success_x = x[better_idx]
             self.archive = np.vstack([self.archive, success_x])
             if len(self.archive) > self.NA:
-                self.archive = self.archive[-self.NA :]
+                # Remove random individuals
+                remove_idx = self.rng_optimization.choice(
+                    len(self.archive), len(self.archive) - self.NA, replace=False
+                )
+                self.archive = np.delete(self.archive, remove_idx, axis=0)
 
-            # Record successes for memory
-            df = (y[better] - new_y[better]) / (y[better] + 1e-15)
-            self._update_memory(F[better], Cr[better], df)
+            # Record successes for memory update
+            self._update_memory(F[better_idx], Cr[better_idx], df)
 
-            x[better], y[better] = us[better], new_y[better]
+            x[better_idx] = us[better_idx]
+            y[better_idx] = new_y[better_idx]
 
-        # NLPSR
-        FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
+        # NLPSR (Non-Linear Population Size Reduction)
+        FEs = self.n_function_evaluations
+        MaxFEs = self.max_function_evaluations
+        nfe_ratio_nlpsr = FEs / MaxFEs
         new_NP = int(
             np.round(
-                self.Nmax
-                + (self.Nmin - self.Nmax) * np.power(FEs / MaxFEs, 1 - FEs / MaxFEs)
+                (self.Nmin - self.Nmax)
+                * np.power(nfe_ratio_nlpsr, 1.0 - nfe_ratio_nlpsr)
+                + self.Nmax
             )
         )
+        new_NP = max(self.Nmin, new_NP)
+
         if new_NP < NP:
-            idx = np.argsort(y)[:new_NP]
-            x, y = x[idx], y[idx]
+            sort_idx_final = np.argsort(y)
+            x = x[sort_idx_final][:new_NP]
+            y = y[sort_idx_final][:new_NP]
             self.n_individuals = new_NP
             self.NA = int(max(new_NP * 2.1, self.Nmin))
 
@@ -148,6 +224,7 @@ def optimize(self, fitness_function=None, args=None):
         x, y = self.initialize(args, x, y)
 
         while True:
+            old_evals = self.n_function_evaluations
             self._print_verbose_info(fitness, y)
             x, y = self.iterate(x, y, args)
             self.results.update(
@@ -156,7 +233,7 @@ def optimize(self, fitness_function=None, args=None):
                     "y": y,
                 }
             )
-            if self._check_terminations():
+            if self._check_terminations() or self.n_function_evaluations == old_evals:
                 break
 
         return self._collect(fitness, y)
@@ -171,7 +248,6 @@ def set_data(
         if x is None or y is None:
             self.start_conditions = {"x": None, "y": None}
         elif not isinstance(y, np.ndarray):
-            loc = locals()
             self.start_conditions = {}
         else:
             indices = np.argsort(y)[: self.n_individuals]
diff --git a/runner.slurm b/runner.slurm
index 0066523..a771cad 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -7,7 +7,7 @@
 #SBATCH --mem=32G
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
-#SBATCH --array=0-15           # 16 tasks total
+#SBATCH --array=0-23           # Increased to 24 tasks total to split sequential runs
 
 CDB_VAL=${1:-1.5}
 
@@ -31,62 +31,74 @@ mkdir -p logs
 # Array of Dimensions
 DIMS=(2 3 5 10)
 
-# 1. Dimension-specific CV-LOIO (Indices 0-3)
+# 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3)
 if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     MODE="CV-LOIO"
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
-    echo "Running Mode: $MODE | Dimension: $DIM"
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
 
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
       -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
 
+# 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
+
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
       -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
-# 2. Dimension-specific CV-LOPO (Indices 4-7)
-elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+# 3. Dimension-specific CV-LOPO | RL-DAS (Indices 8-11)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
     MODE="CV-LOPO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
-    echo "Running Mode: $MODE | Dimension: $DIM"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
 
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
       -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
 
+# 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 12-15)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]}
+    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
+
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
       -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
-# 3. Dimension-specific RL-DAS-random (Indices 8-11)
-elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
+# 5. Dimension-specific RL-DAS-random (Indices 16-19)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 16 && $SLURM_ARRAY_TASK_ID -le 19 ]]; then
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]}
     echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM"
 
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_${DIM} \
-      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --cdb $CDB_VAL --agent RL-DAS-random --dimensionality $DIM
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --agent RL-DAS-random --dimensionality $DIM
 
-# 4. Multidimensional CV-LOIO (Index 12)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 12 ]]; then
+# 6. Multidimensional CV-LOIO (Index 20)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
       -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
-# 5. Multidimensional CV-LOPO (Index 13)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 13 ]]; then
+# 7. Multidimensional CV-LOPO (Index 21)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
       -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
-# 6. Global Random Agent (Index 14)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 14 ]]; then
+# 8. Global Random Agent (Index 22)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then
     echo "Running Mode: Global Random Agent"
     python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL} \
       -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random
 
-# 7. Global Baselines (Index 15)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 15 ]]; then
+# 9. Global Baselines (Index 23)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 23 ]]; then
     echo "Running Mode: Baselines"
     python3 dynamicalgorithmselection/main.py BASELINES \
       -p "${PORTFOLIO[@]}" --agent random --mode baselines

From 4fe1262000aed695d317984383604a1274a9db20 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sat, 21 Feb 2026 15:05:19 +0100
Subject: [PATCH 15/20] fix out of index in NL-SHADE-RSP, add only PG agent
 test

---
 .../optimizers/DE/NL_SHADE_RSP.py             |  1 +
 only_PG.slurm                                 | 73 +++++++++++++++++++
 2 files changed, 74 insertions(+)
 create mode 100644 only_PG.slurm

diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
index 0842d85..29d0fd6 100644
--- a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
+++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
@@ -32,6 +32,7 @@ def initialize(self, args=None, x=None, y=None):
             )
         if y is None:
             y = np.array([self._evaluate_fitness(xi, args) for xi in x])
+        self.memory_size = len(self.MF)
         return x, y
 
     def _sample_cauchy(self, loc, scale, size):
diff --git a/only_PG.slurm b/only_PG.slurm
new file mode 100644
index 0000000..16d3396
--- /dev/null
+++ b/only_PG.slurm
@@ -0,0 +1,73 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH --array=0-9           # Increased to 24 tasks total to split sequential runs
+
+CDB_VAL=${1:-1.5}
+
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+# Store the remaining arguments as an array called PORTFOLIO.
+# If no additional arguments were provided, fall back to your default.
+if [ "$#" -eq 0 ]; then
+    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
+else
+    PORTFOLIO=("$@")
+fi
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID))]}
+    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 6. Multidimensional CV-LOIO (Index 20)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 7 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 7. Multidimensional CV-LOPO (Index 21)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 8. Global Random Agent (Index 22)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
+    echo "Running Mode: Global Random Agent"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random
+fi
\ No newline at end of file

From 977f2d018c2d6ce47101645def5fa4b718829be6 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sat, 21 Feb 2026 17:17:21 +0100
Subject: [PATCH 16/20] update history dealing for policy-gradient agent

---
 .../optimizers/DE/JDE21.py                    | 32 +++++++----
 .../optimizers/DE/MADDE.py                    | 55 ++++++++++++++++++-
 .../optimizers/DE/NL_SHADE_RSP.py             | 37 ++++++++++++-
 3 files changed, 108 insertions(+), 16 deletions(-)

diff --git a/dynamicalgorithmselection/optimizers/DE/JDE21.py b/dynamicalgorithmselection/optimizers/DE/JDE21.py
index 8512eb1..1ce830b 100644
--- a/dynamicalgorithmselection/optimizers/DE/JDE21.py
+++ b/dynamicalgorithmselection/optimizers/DE/JDE21.py
@@ -43,7 +43,8 @@ def __init__(self, problem, options):
         # Shared Upper Bound for F
         self.Fu = 1.1
 
-        self.F, self.Cr = None, None
+        self.F = np.full(self.n_individuals, self.Finit)
+        self.Cr = np.full(self.n_individuals, self.CRinit)
 
     def initialize(self, args=None, x=None, y=None):
         if x is None:
@@ -57,11 +58,12 @@ def initialize(self, args=None, x=None, y=None):
             self.sNP = min(10, max(1, self.n_individuals // 4))
             self.bNP = self.n_individuals - self.sNP
         if y is None:
-            y = np.array([self._evaluate_fitness(xi, args) for xi in x])
-        self.F = np.full(self.n_individuals, self.Finit) if self.F is None else self.F
-        self.Cr = (
-            np.full(self.n_individuals, self.CRinit) if self.Cr is None else self.Cr
-        )
+            y = np.array(
+                [
+                    self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i])
+                    for i, xi in enumerate(x)
+                ]
+            )
         return x, y
 
     def _reflect_bounds(self, v):
@@ -221,7 +223,7 @@ def safe_choice(preferred_pool, exclude):
                 u[mask] = v[mask]
 
             # Evaluate
-            new_y = self._evaluate_fitness(u, args)
+            new_y = self._evaluate_fitness(u, args, F=self.F[i], Cr=self.Cr[i])
 
             # Crowding & Selection
             if is_big:
@@ -259,7 +261,10 @@ def iterate(self, x=None, y=None, args=None):
                     (self.bNP, self.ndim_problem),
                 )
                 y[: self.bNP] = np.array(
-                    [self._evaluate_fitness(xi, args) for xi in x[: self.bNP]]
+                    [
+                        self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i])
+                        for i, xi in enumerate(x[: self.bNP])
+                    ]
                 )
                 self.F[: self.bNP] = self.Finit
                 self.Cr[: self.bNP] = self.CRinit
@@ -281,7 +286,10 @@ def iterate(self, x=None, y=None, args=None):
                     (self.sNP, self.ndim_problem),
                 )
                 y[self.bNP :] = np.array(
-                    [self._evaluate_fitness(xi, args) for xi in x[self.bNP :]]
+                    [
+                        self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i])
+                        for i, xi in enumerate(x[self.bNP :])
+                    ]
                 )
                 self.F[self.bNP :] = self.Finit
                 self.Cr[self.bNP :] = self.CRinit
@@ -326,7 +334,7 @@ def optimize(self, fitness_function=None, args=None):
         while True:
             old_evals = self.n_function_evaluations
             x, y = self.iterate(x, y, args)
-            self.results.update({"x": x, "y": y})
+            self.results.update({"x": x, "y": y, "Cr": self.Cr[:], "F": self.F[:]})
             if self._check_terminations() or self.n_function_evaluations == old_evals:
                 break
 
@@ -340,10 +348,10 @@ def set_data(self, x=None, y=None, *args, **kwargs):
         else:
             indices = np.argsort(y)[: self.n_individuals]
             self.start_conditions = {"x": x[indices], "y": y[indices]}
-            self.Cr = kwargs.get("Cr")
+            self.Cr = kwargs.get("Cr", self.Cr)
             if self.Cr is not None:
                 self.Cr = self.Cr[indices]
-            self.F = kwargs.get("F")
+            self.F = kwargs.get("F", self.F)
             if self.F is not None:
                 self.F = self.F[indices]
         self.best_so_far_x = kwargs.get("best_x", None)
diff --git a/dynamicalgorithmselection/optimizers/DE/MADDE.py b/dynamicalgorithmselection/optimizers/DE/MADDE.py
index 2d55ac6..35a5945 100644
--- a/dynamicalgorithmselection/optimizers/DE/MADDE.py
+++ b/dynamicalgorithmselection/optimizers/DE/MADDE.py
@@ -37,7 +37,19 @@ def initialize(self, args=None, x=None, y=None):
                 (self.n_individuals, self.ndim_problem),
             )
         if y is None:
-            y = np.array([self._evaluate_fitness(xi, args) for xi in x])
+            y = np.array(
+                [
+                    self._evaluate_fitness(
+                        xi,
+                        args,
+                        MF=self.MF[:],
+                        MCr=self.MCr[:],
+                        k_idx=self.k_idx,
+                        pm=self.pm,
+                    )
+                    for xi in x
+                ]
+            )
         return x, y
 
     def _choose_F_Cr(self, NP):
@@ -141,7 +153,19 @@ def iterate(self, x=None, y=None, args=None):
             u[qu_idx] = self._binomial(cross_qbest, v[qu_idx], Cr[qu_idx])
 
         # Evaluation and Selection
-        new_y = np.array([self._evaluate_fitness(ui, args) for ui in u])
+        new_y = np.array(
+            [
+                self._evaluate_fitness(
+                    ui,
+                    args,
+                    MF=self.MF[:],
+                    MCr=self.MCr[:],
+                    k_idx=self.k_idx,
+                    pm=self.pm,
+                )
+                for ui in u
+            ]
+        )
         optim = new_y < y
 
         if np.any(optim):
@@ -276,8 +300,35 @@ def optimize(self, fitness_function=None, args=None):
                 {
                     "x": x,
                     "y": y,
+                    "archive": self.archive,
+                    "MF": self.MF,
+                    "MCr": self.MCr,
+                    "k_idx": self.k_idx,
+                    "pm": self.pm,
                 }
             )
             if self._check_terminations():
                 break
         return self._collect(fitness, y)
+
+    def set_data(
+        self,
+        x=None,
+        y=None,
+        *args,
+        **kwargs,
+    ):
+        if x is None or y is None:
+            self.start_conditions = {"x": None, "y": None}
+        elif not isinstance(y, np.ndarray):
+            self.start_conditions = {}
+        else:
+            indices = np.argsort(y)[: self.n_individuals]
+            start_conditions = {}
+            start_conditions.update({"x": x[indices], "y": y[indices]})
+            self.start_conditions = start_conditions
+            for var in ["archive", "MF", "MCr", "k_idx", "pm"]:
+                if var in kwargs:
+                    setattr(self, var, kwargs[var])
+        self.best_so_far_x = kwargs.get("best_x", None)
+        self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
index 29d0fd6..9cf09c0 100644
--- a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
+++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
@@ -31,7 +31,19 @@ def initialize(self, args=None, x=None, y=None):
                 (self.n_individuals, self.ndim_problem),
             )
         if y is None:
-            y = np.array([self._evaluate_fitness(xi, args) for xi in x])
+            y = np.array(
+                [
+                    self._evaluate_fitness(
+                        xi,
+                        args,
+                        MF=self.MF[:],
+                        MCr=self.MCr[:],
+                        k_idx=self.k_idx,
+                        pa=self.pa,
+                    )
+                    for xi in x
+                ]
+            )
         self.memory_size = len(self.MF)
         return x, y
 
@@ -157,7 +169,20 @@ def iterate(self, x=None, y=None, args=None):
                     us[i, idx] = vs[i, idx]
 
         # Selection
-        new_y = np.array([self._evaluate_fitness(ui, args) for ui in us])
+
+        new_y = np.array(
+            [
+                self._evaluate_fitness(
+                    ui,
+                    args,
+                    MF=self.MF[:],
+                    MCr=self.MCr[:],
+                    k_idx=self.k_idx,
+                    pa=self.pa,
+                )
+                for ui in us
+            ]
+        )
         better_idx = np.where(new_y < y)[0]
 
         if len(better_idx) > 0:
@@ -232,6 +257,11 @@ def optimize(self, fitness_function=None, args=None):
                 {
                     "x": x,
                     "y": y,
+                    "archive": self.archive[:],
+                    "MF": self.MF[:],
+                    "MCr": self.MCr[:],
+                    "k_idx": self.k_idx,
+                    "pa": self.pa,
                 }
             )
             if self._check_terminations() or self.n_function_evaluations == old_evals:
@@ -255,5 +285,8 @@ def set_data(
             start_conditions = {}
             start_conditions.update({"x": x[indices], "y": y[indices]})
             self.start_conditions = start_conditions
+            for var in ["archive", "MF", "MCr", "k_idx", "pa"]:
+                if var in kwargs:
+                    setattr(self, var, kwargs[var])
         self.best_so_far_x = kwargs.get("best_x", None)
         self.best_so_far_y = kwargs.get("best_y", float("inf"))

From 4420d8d461b521c5ca7592f9979b71a2f5b676a8 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Sun, 22 Feb 2026 15:38:23 +0100
Subject: [PATCH 17/20] update documentation

---
 README.md | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 94ddfd5..5469d65 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ uv run das <name> [options]
 
 ```
 
-### **Arguments**
+### **💡 Arguments**
 
 | Argument                           | Type        | Default                     | Description                                                                                                      |
 |------------------------------------|-------------|-----------------------------|------------------------------------------------------------------------------------------------------------------|
@@ -68,7 +68,7 @@ uv run das <name> [options]
 | `-c`, `--compare` / `--no-compare` | `bool`      | `False`                     | Whether to compare results against standalone optimizers.                                                        |
 | `-e`, `--wandb_entity`             | `str`       | `None`                      | Weights and Biases (WandB) entity name.                                                                          |
 | `-w`, `--wandb_project`            | `str`       | `None`                      | Weights and Biases (WandB) project name.                                                                         |
-| `-a`, `--agent`                    | `str`       | `policy-gradient`           | Agent type. Options: `neuroevolution`, `policy-gradient`, `random`, `RL-DAS`.                                    |
+| `-a`, `--agent`                    | `str`       | `policy-gradient`           | Agent type. Options: `neuroevolution`, `policy-gradient`, `random`, `RL-DAS`, `RL-DAS-random`.                   |
 | `-l`, `--mode`                     | `str`       | `LOIO`                      | Train/Test split mode (see [Split Strategies](https://www.google.com/search?q=%23-train-test-split-strategies)). |
 | `-x`, `--cdb`                      | `float`     | `1.0`                       | **Checkpoint Division Exponent**; determines how quickly checkpoint length increases.                            |
 | `-r`, `--state-representation`     | `str`       | `ELA`                       | Method used to extract features from the algorithm population.                                                   |
@@ -79,6 +79,19 @@ uv run das <name> [options]
 
 ---
 
+### **🤖 Agent**
+
+There are following agent options available in this project.
+
+| Agent                              | Uses CDB? | Description                                                                                                                                                                       | Implementation                                                    |
+|------------------------------------|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
+| `neuroevolution`                   | Yes       | Neuroevolution-based agent. Its training is implemented using NEAT algorithm.                                                                                                     | [here](dynamicalgorithmselection/agents/neuroevolution_agent.py)  |
+| `policy-gradient`                  | Yes       | PPO-based agent. Main subject of experiments.                                                                                                                                     | [here](dynamicalgorithmselection/agents/policy_gradient_agent.py) |
+| `random`                           | Yes       | Baseline for agents, that use Checkpoint division. Randomly selects actions using equal probabilities.                                                                            | [here](dynamicalgorithmselection/agents/random_agent.py)          |
+| `RL-DAS`                           | No        | Implementation of [Deep Reinforcement Learning for Dynamic Algorithm Selection: A Proof-of-Principle Study on Differential Evolution](https://doi.org/10.48550/arXiv.2403.02131). | [here](dynamicalgorithmselection/agents/RLDAS_agent.py)           |
+| `RL-DAS-random`                    | No        | Implementation of the baseline proposed by the authors of `RL-DAS` algorithm. Randomly selects action using equal probabilities.                                                  | [here](dynamicalgorithmselection/agents/RLDAS_random_agent.py)    |
+---
+
 ## 📊 Train-Test Split Strategies
 
 The `-l` / `--mode` argument determines how the dataset is divided:
@@ -149,7 +162,7 @@ There are three options for representing the optimization state (`-r` flag):
    this [paper](https://arxiv.org/pdf/2408.10672).
 3. **`custom`:**
    A proposed feature extraction method
-   implemented [here](https://www.google.com/search?q=dynamicalgorithmselection/agents/agent_state.py%2349). This can be
+   implemented [here](dynamicalgorithmselection/agents/agent_state.py). This can be
    modified to include additional features.
 
 ---

From 4cb1514e4b6a528802ad7dcf023e7c4358df058e Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Mon, 23 Feb 2026 00:50:21 +0100
Subject: [PATCH 18/20] fix RLDAS agent's evaluation

---
 .../agents/RLDAS_agent.py                     | 15 ++++++++++---
 .../agents/RLDAS_random_agent.py              | 15 ++++++++++---
 .../agents/agent_utils.py                     | 21 ++-----------------
 dynamicalgorithmselection/experiments/core.py |  3 ---
 .../experiments/experiment.py                 | 19 +----------------
 .../experiments/utils.py                      |  7 -------
 tests/test_experiment.py                      |  7 ++-----
 7 files changed, 29 insertions(+), 58 deletions(-)

diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index 14835ee..1fa4b86 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -217,10 +217,19 @@ def optimize(self, fitness_function=None, args=None):
                 best_y=self.best_so_far_y,
             )
 
-            res = sub_opt.optimize()
+            result = sub_opt.optimize()
 
-            population_x = res["x"]
-            population_y = res["y"]
+            self.fitness_history.extend(result["fitness_history"])
+
+            self._save_fitness(
+                result["best_so_far_x"],
+                result["best_so_far_y"],
+                result["worst_so_far_x"],
+                result["worst_so_far_y"],
+            )
+
+            population_x = result["x"]
+            population_y = result["y"]
 
             self.n_function_evaluations = sub_opt.n_function_evaluations
 
diff --git a/dynamicalgorithmselection/agents/RLDAS_random_agent.py b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
index 74b4d7c..71a55b5 100644
--- a/dynamicalgorithmselection/agents/RLDAS_random_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
@@ -153,10 +153,19 @@ def optimize(self, fitness_function=None, args=None):
                 best_y=self.best_so_far_y,
             )
 
-            res = sub_opt.optimize()
+            result = sub_opt.optimize()
 
-            population_x = res["x"]
-            population_y = res["y"]
+            self.fitness_history.extend(result["fitness_history"])
+
+            self._save_fitness(
+                result["best_so_far_x"],
+                result["best_so_far_y"],
+                result["worst_so_far_x"],
+                result["worst_so_far_y"],
+            )
+
+            population_x = result["x"]
+            population_y = result["y"]
 
             self.n_function_evaluations = sub_opt.n_function_evaluations
 
diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py
index 849c941..0bb2f23 100644
--- a/dynamicalgorithmselection/agents/agent_utils.py
+++ b/dynamicalgorithmselection/agents/agent_utils.py
@@ -8,7 +8,6 @@
 def get_runtime_stats(
     fitness_history: list[tuple[int, float]],
     function_evaluations: int,
-    checkpoints: np.ndarray,
 ) -> dict[
     str, float | list[float]
 ]:  # Changed from list[Optional[float]] to list[float]
@@ -20,42 +19,26 @@ def get_runtime_stats(
     """
     area_under_optimization_curve = 0.0
     last_i = 0
-    checkpoint_idx = 0
-    last_fitness = float("inf")
-    checkpoints_fitness: list[float] = []
 
     for i, fitness in fitness_history:
         area_under_optimization_curve += fitness * (i - last_i)
-        while (
-            checkpoint_idx < len(checkpoints)
-            and last_i <= checkpoints[checkpoint_idx] < i
-        ):
-            checkpoints_fitness.append(last_fitness)
-            checkpoint_idx += 1
         last_i = i
-        last_fitness = fitness
 
     area_under_optimization_curve += fitness_history[-1][1] * (
         function_evaluations - fitness_history[-1][0]
     )
     final_fitness = fitness_history[-1][1]
 
-    if function_evaluations == checkpoints[-1]:
-        while len(checkpoints_fitness) < len(checkpoints):
-            checkpoints_fitness.append(final_fitness)
-
     return {
         "area_under_optimization_curve": area_under_optimization_curve
         / function_evaluations,
         "final_fitness": final_fitness,
-        "checkpoints_fitness": checkpoints_fitness,
     }
 
 
 def get_extreme_stats(
     fitness_histories: dict[str, list[tuple[int, float]]],
     function_evaluations: int,
-    checkpoints: np.ndarray,
 ) -> tuple[dict[str, float | list[float]], dict[str, float | list[float]]]:
     """
     :param fitness_histories: list of lists of tuples [fe, fitness] with only points where best so far fitness improved for each algorithm
@@ -101,8 +84,8 @@ def get_extreme_stats(
 
     # These now match the expected return type of tuple[dict[str, float | list[float]], ...]
     return (
-        get_runtime_stats(best_history, function_evaluations, checkpoints),
-        get_runtime_stats(worst_history, function_evaluations, checkpoints),
+        get_runtime_stats(best_history, function_evaluations),
+        get_runtime_stats(worst_history, function_evaluations),
     )
 
 
diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py
index 237f636..6ab5405 100644
--- a/dynamicalgorithmselection/experiments/core.py
+++ b/dynamicalgorithmselection/experiments/core.py
@@ -33,9 +33,6 @@ def run_testing(
             options.get("name"),
             problem_id,
             max_fe,
-            options.get("n_checkpoints"),
-            options.get("n_individuals"),
-            options.get("cdb"),
         )
 
 
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index a296832..fe7b578 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -2,8 +2,6 @@
 import os
 from typing import Type, Optional
 
-import numpy as np
-
 from dynamicalgorithmselection.experiments.core import run_testing, run_training
 from dynamicalgorithmselection.experiments.cross_validation import run_cross_validation
 from dynamicalgorithmselection.experiments.neuroevolution import (
@@ -20,7 +18,6 @@
 
 from dynamicalgorithmselection.agents.agent_utils import (
     get_extreme_stats,
-    get_checkpoints,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
@@ -30,16 +27,8 @@ def dump_extreme_stats(
     stats,
     problem_instance,
     max_function_evaluations,
-    n_checkpoints,
-    n_individuals,
-    cdb,
 ):
-    checkpoints = get_checkpoints(
-        n_checkpoints, max_function_evaluations, n_individuals or 100, cdb
-    )
-    best_case, worst_case = get_extreme_stats(
-        stats, max_function_evaluations, checkpoints
-    )
+    best_case, worst_case = get_extreme_stats(stats, max_function_evaluations)
     with open(
         os.path.join(
             "results",
@@ -220,9 +209,6 @@ def run_comparison(
                 result_folder_name,
                 problem_id,
                 max_fe,
-                options.get("n_checkpoints"),
-                options.get("n_individuals"),
-                options.get("cdb"),
             )
 
         dump_extreme_stats(
@@ -230,7 +216,4 @@ def run_comparison(
             stats,
             problem_id,
             max_fe,
-            options.get("n_checkpoints"),
-            options.get("n_individuals"),
-            options.get("cdb"),
         )
diff --git a/dynamicalgorithmselection/experiments/utils.py b/dynamicalgorithmselection/experiments/utils.py
index 18e489f..890dd2b 100644
--- a/dynamicalgorithmselection/experiments/utils.py
+++ b/dynamicalgorithmselection/experiments/utils.py
@@ -98,13 +98,7 @@ def dump_stats(
     name,
     problem_instance,
     max_function_evaluations,
-    n_checkpoints,
-    n_individuals,
-    cdb,
 ):
-    checkpoints = get_checkpoints(
-        n_checkpoints, max_function_evaluations, n_individuals or 100, cdb
-    )
     with open(
         os.path.join(
             "results",
@@ -118,7 +112,6 @@ def dump_stats(
                 problem_instance: get_runtime_stats(
                     results["fitness_history"],
                     max_function_evaluations,
-                    checkpoints,
                 )
             },
             f,
diff --git a/tests/test_experiment.py b/tests/test_experiment.py
index 9d88b24..e493066 100644
--- a/tests/test_experiment.py
+++ b/tests/test_experiment.py
@@ -113,19 +113,16 @@ def test_run_comparison(
         self.assertEqual(mock_dump_stats.call_count, 2)
         mock_dump_extreme.assert_called_once()
 
-    @patch("dynamicalgorithmselection.experiments.experiment.get_checkpoints")
     @patch("dynamicalgorithmselection.experiments.experiment.get_extreme_stats")
     @patch("builtins.open", new_callable=mock_open)
     @patch("json.dump")
-    def test_dump_extreme_stats(
-        self, mock_json_dump, mock_file, mock_get_extreme, mock_get_checkpoints
-    ):
+    def test_dump_extreme_stats(self, mock_json_dump, mock_file, mock_get_extreme):
         stats: dict[str, list[Any]] = {"Opt1": [], "Opt2": []}
 
         mock_get_extreme.return_value = ({"best": 1}, {"worst": 0})
         case_name = "OPT1_OPT2_OPT3"
 
-        dump_extreme_stats(case_name, stats, "p1", 100, 5, 10, 0.5)
+        dump_extreme_stats(case_name, stats, "p1", 100)
 
         self.assertEqual(mock_file.call_count, 2)
         self.assertEqual(mock_json_dump.call_count, 2)

From b505d6470dc2cd487df4ec163eaa28da6dfb646a Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Mon, 23 Feb 2026 01:25:08 +0100
Subject: [PATCH 19/20] fix RLDAS state representation

---
 .../agents/agent_state.py                     | 147 ++++++++----------
 1 file changed, 61 insertions(+), 86 deletions(-)

diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index 9df5349..856dc9e 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -375,111 +375,86 @@ def normalize(self, state, update=True):
 
 def get_la_features(agent, pop_x, pop_y):
     """
-    Extracts 9 Landscape Analysis features described in Reinforcement Learning Dynamic Algorithm Selection.
-    Includes sampling-based features (f5-f8) which consume function evaluations.
+    Extracts 9 Landscape Analysis features based on the logic in Population.py.
+    Uses a single-step random walk for sampling-based features (f5-f8) to
+    save function evaluations.
     """
-    sorted_idx = np.argsort(pop_y)
-    pop_x = pop_x[sorted_idx]
-    pop_y = pop_y[sorted_idx]
-
-    best_y = pop_y[0]
-    best_x = pop_x[0]
     n = len(pop_x)
 
+    best_y = np.min(pop_y)
+    best_x = pop_x[np.argmin(pop_y)]
     norm_factor = (
         agent.initial_cost
-        if agent.initial_cost and abs(agent.initial_cost) > 1e-9
+        if hasattr(agent, "initial_cost")
+        and agent.initial_cost
+        and abs(agent.initial_cost) > 1e-9
         else 1.0
     )
-    f1 = best_y / norm_factor
+    f1_gbc = best_y / norm_factor
 
     dists_to_best = np.linalg.norm(pop_x - best_x, axis=1)
     if np.std(pop_y) < 1e-9 or np.std(dists_to_best) < 1e-9:
-        f2 = 0.0
+        f2_fdc = 0.0
     else:
         fdc, _ = spearmanr(pop_y, dists_to_best)
-        f2 = fdc if not np.isnan(fdc) else 0.0
+        f2_fdc = fdc if not np.isnan(fdc) else 0.0
 
     n_top = max(2, int(0.1 * n))
-
     if n > 1:
         dist_matrix_all = pdist(pop_x)
         disp_all = np.mean(dist_matrix_all) if len(dist_matrix_all) > 0 else 0.0
 
-        dist_matrix_top = pdist(pop_x[:n_top])
+        # Get distances for the top 10% individuals
+        top_idx = np.argsort(pop_y)[:n_top]
+        dist_matrix_top = pdist(pop_x[top_idx])
         disp_top = np.mean(dist_matrix_top) if len(dist_matrix_top) > 0 else 0.0
 
-        f3 = disp_all - disp_top
-        f4 = np.max(dist_matrix_all) if len(dist_matrix_all) > 0 else 0.0
+        f3_disp = disp_all - disp_top
+        f4_disp_ratio = disp_top / disp_all if disp_all > 1e-9 else 0.0
     else:
-        f3, f4 = 0.0, 0.0
-
-    remaining_fes = agent.max_function_evaluations - agent.n_function_evaluations
-    cost_per_sample = n  # 1 generation of size N
-
-    sampled_pops_y = []
-
-    if remaining_fes >= (2 * cost_per_sample):
-        sample_indices = np.random.choice(len(agent.actions), 2, replace=False)
-
-        for idx in sample_indices:
-            alg_class = agent.actions[idx]
-
-            sub_opt = alg_class(agent.problem, agent.options)
-
-            sub_opt.population = pop_x.copy()
-            sub_opt.fitness = pop_y.copy()
-
-            sub_opt.n_function_evaluations = 0
-            sub_opt.max_function_evaluations = cost_per_sample
-
-            sub_opt.optimize()
-
-            sampled_pops_y.append(sub_opt.fitness)
-            agent.n_function_evaluations += sub_opt.n_function_evaluations
-
-    f5, f6, f7, f8 = 0.0, 0.0, 0.0, 0.0
-
-    if len(sampled_pops_y) > 0:
-        sorted_current = np.sort(pop_y)
-        sorted_samples = [np.sort(sy) for sy in sampled_pops_y]
-        avg_sample_y = np.mean(sorted_samples, axis=0)
-
-        # Slopes: (y_{i+1} - y_i)
-        diff_current = np.diff(sorted_current)
-        diff_sample = np.diff(avg_sample_y)
+        f3_disp, f4_disp_ratio = 0.0, 0.0
 
-        with np.errstate(divide="ignore", invalid="ignore"):
-            ratios = diff_current / diff_sample
-            ratios[diff_sample == 0] = 0.0
-            ratios[np.isnan(ratios)] = 0.0
-
-        f5 = min(np.sum(ratios), 0.0)
-
-        S = len(sampled_pops_y)
-        eps = 1e-8
-
-        neutral_count = 0
-        no_improve_counts = np.zeros(n)  # For f7
-        all_worse_counts = np.zeros(n)  # For f8
-
-        for sy in sampled_pops_y:
-            neutral_count += np.sum(np.abs(pop_y - sy) < eps)
-
-            improved = sy < pop_y
-            no_improve_counts += improved.astype(int)  # Add 1 if improved
-
-            worse = sy > pop_y
-            all_worse_counts += worse.astype(int)
-
-        f6 = neutral_count / (n * S)
-
-        alphas = (no_improve_counts == 0).astype(float)
-        f7 = np.mean(alphas)
-
-        betas = (all_worse_counts == S).astype(float)
-        f8 = np.mean(betas)
-
-    f9 = agent.n_function_evaluations / agent.max_function_evaluations
-
-    return np.array([f1, f2, f3, f4, f5, f6, f7, f8, f9])
+    # Adjust step size based on your search space bounds if available
+    step_scale = 0.01
+    if hasattr(agent, "Xmax") and hasattr(agent, "Xmin"):
+        step_size = step_scale * (agent.Xmax - agent.Xmin)
+    else:
+        step_size = step_scale
+
+    random_walk_samples = pop_x + np.random.normal(0, step_size, size=pop_x.shape)
+
+    # Evaluate the random walk samples
+    sample_costs = [agent.fitness_function(i) for i in random_walk_samples]
+    agent.n_function_evaluations += n  # Increment evaluations by population size
+
+    # Calculate differences between the walk and the current population
+    diffs = np.array(sample_costs) - pop_y
+
+    # --- Feature 5: Negative Slope Coefficient (nsc) ---
+    # Proportion of steps that resulted in an improvement
+    f5_nsc = np.sum(diffs < 0) / n
+
+    # --- Feature 6: Average Neutral Ratio (anr) ---
+    # Proportion of steps that resulted in practically zero change
+    eps = 1e-8
+    f6_anr = np.sum(np.abs(diffs) < eps) / n
+
+    f7_ni = np.sum(diffs >= 0) / n  # Ratio of individuals that failed to improve
+    f8_nw = np.sum(diffs <= 0) / n  # Ratio of individuals that failed to worsen
+
+    # --- Feature 9: Progress ---
+    f9_progress = agent.n_function_evaluations / agent.max_function_evaluations
+
+    return np.array(
+        [
+            f1_gbc,
+            f2_fdc,
+            f3_disp,
+            f4_disp_ratio,
+            f5_nsc,
+            f6_anr,
+            f7_ni,
+            f8_nw,
+            f9_progress,
+        ]
+    )

From 181e708c0e337827cbc40be55c3ed0189f5a8c82 Mon Sep 17 00:00:00 2001
From: wniec <niecwladek@gmail.com>
Date: Tue, 24 Feb 2026 13:59:38 +0100
Subject: [PATCH 20/20] fix memory issues for non-standard portfolio. Add
 Leave-One-Dimension-Out scenario. Add profiler to development dependencies

---
 compare2ELA.slurm => CDB_study.slurm          | 22 +++---
 .../agents/RLDAS_agent.py                     |  6 +-
 dynamicalgorithmselection/agents/agent.py     | 12 ++-
 .../experiments/cross_validation.py           | 39 +++++++---
 .../experiments/experiment.py                 |  2 +-
 .../experiments/utils.py                      | 13 +---
 dynamicalgorithmselection/main.py             | 23 +++---
 .../optimizers/DE/JDE21.py                    | 12 +--
 .../optimizers/DE/MADDE.py                    |  8 --
 .../optimizers/DE/NL_SHADE_RSP.py             |  8 --
 .../optimizers/DS/POWELL.py                   | 33 +++------
 .../optimizers/ES/CMAES.py                    | 16 ++--
 .../optimizers/ES/LMCMAES.py                  |  5 +-
 .../optimizers/ES/OPOA2015.py                 | 47 ++++++------
 only_PG.slurm                                 | 73 -------------------
 portfolio_study.slurm                         | 67 +++++++++++++++++
 pyproject.toml                                |  1 +
 runner.slurm                                  |  9 +--
 tests/test_cross_validation.py                |  4 +-
 tests/test_experiment.py                      |  2 +-
 uv.lock                                       | 17 +++++
 21 files changed, 209 insertions(+), 210 deletions(-)
 rename compare2ELA.slurm => CDB_study.slurm (66%)
 delete mode 100644 only_PG.slurm
 create mode 100644 portfolio_study.slurm

diff --git a/compare2ELA.slurm b/CDB_study.slurm
similarity index 66%
rename from compare2ELA.slurm
rename to CDB_study.slurm
index 18bc73e..ff7ac0f 100644
--- a/compare2ELA.slurm
+++ b/CDB_study.slurm
@@ -7,7 +7,7 @@
 #SBATCH --mem=32G
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
-#SBATCH --array=0-13           # 14 tasks total
+#SBATCH --array=0-9           # 10 tasks total
 
 CDB_VAL=${1:-1.5}
 
@@ -15,13 +15,13 @@ if [ "$#" -gt 0 ]; then
     shift
 fi
 
-# Store the remaining arguments as an array called PORTFOLIO.
-# If no additional arguments were provided, fall back to your default.
 if [ "$#" -eq 0 ]; then
     PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
 else
     PORTFOLIO=("$@")
 fi
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
 
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
@@ -37,7 +37,7 @@ if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
     DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM}_ELA \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
@@ -47,21 +47,21 @@ elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
     DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
     echo "Running Mode: $MODE | Dimension: $DIM"
 
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM}_ELA \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
       --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
 
-# 4. Multidimensional CV-LOIO (Index 12)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 12 ]]; then
+# 3. Multidimensional CV-LOIO (Index 8)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
     MODE="CV-LOIO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_ELA \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 
-# 5. Multidimensional CV-LOPO (Index 13)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 13 ]]; then
+# 4. Multidimensional CV-LOPO (Index 9)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
     MODE="CV-LOPO"
     echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL}_ELA \
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
       -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
 fi
\ No newline at end of file
diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
index 1fa4b86..f02bc47 100644
--- a/dynamicalgorithmselection/agents/RLDAS_agent.py
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -10,11 +10,11 @@
     DEVICE,
     RolloutBuffer,
     RLDASNetwork,
-    GAMMA,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
 INITIAL_POPSIZE = 170
+GAMMA = 0.99
 
 
 class RLDASAgent(Agent):
@@ -40,7 +40,7 @@ def __init__(self, problem, options):
         self.mean_rewards = options.get("mean_rewards", [])
         self.best_50_mean = float("inf")
         self.schedule_interval = options.get(
-            "schedule_interval", int(self.max_function_evaluations / 50)
+            "schedule_interval", int(self.max_function_evaluations / 10)
         )
 
         expected_trajectory_length = int(
@@ -272,7 +272,7 @@ def optimize(self, fitness_function=None, args=None):
 
             self._n_generations += 1
             self._print_verbose_info(fitness, self.best_so_far_y)
-
+        print(self._n_generations)
         fes_end = self.n_function_evaluations
         speed_factor = self.max_function_evaluations / fes_end
 
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index 3b515d9..abecf65 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -175,7 +175,17 @@ def iterate(
             raise ValueError("Inputs to iterate cannot be None")
         optimizer_input_data["best_x"] = self.best_so_far_x
         optimizer_input_data["best_y"] = self.best_so_far_y
-        optimizer.set_data(**optimizer_input_data)
+
+        historic_data = {
+            k[:-8]: v
+            for k, v in optimizer_input_data.items()
+            if k.endswith("_history") and k != "fitness_history"
+        }
+        current_data = {
+            k: v for k, v in optimizer_input_data.items() if not k.endswith("_history")
+        }
+        combined_input = current_data | historic_data
+        optimizer.set_data(**combined_input)
 
         if self._check_terminations():
             return optimizer.get_data()
diff --git a/dynamicalgorithmselection/experiments/cross_validation.py b/dynamicalgorithmselection/experiments/cross_validation.py
index 368cb1f..8a10b2f 100644
--- a/dynamicalgorithmselection/experiments/cross_validation.py
+++ b/dynamicalgorithmselection/experiments/cross_validation.py
@@ -1,6 +1,6 @@
 import os
 from itertools import product
-from typing import Type, Optional
+from typing import Type, List
 
 import cocoex
 import numpy as np
@@ -18,13 +18,15 @@ def run_cross_validation(
     optimizer: Type[Optimizer],
     options: dict,
     evaluations_multiplier: int = 1_000,
-    is_loio: bool = True,
+    leaving_mode: str = "LOIO",
 ):
     results_dir = os.path.join("results", f"{options.get('name')}")
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, cv_folds = _get_cv_folds(4, is_loio, options.get("dimensionality"))
+    problems_suite, cv_folds = _get_cv_folds(
+        4 if leaving_mode != "LODO" else 3, leaving_mode, options.get("dimensionality")
+    )
     options["n_problems"] = len(cv_folds[0])
     observer = cocoex.Observer("bbob", "result_folder: " + options["name"])
     for i, (train_set, test_set) in enumerate(cv_folds):
@@ -54,7 +56,7 @@ def run_cross_validation(
     return observer.result_folder
 
 
-def _get_cv_folds(n: int, is_loio: bool, dim: Optional[int]):
+def _get_cv_folds(n: int, leaving_mode: str, dim: List[int]):
     """
     :param n:  number of cross validation folds
     :param is_loio: boolean to indicate how train and test sets should be split (leave-instance-out/leave-problem-out).
@@ -65,22 +67,21 @@ def _get_cv_folds(n: int, is_loio: bool, dim: Optional[int]):
     problems_suite = cocoex.Suite("bbob", "", "")
     all_problem_ids = [
         f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-        for i_id, f_id, dim in product(
-            INSTANCE_IDS, ALL_FUNCTIONS, (DIMENSIONS if dim is None else [dim])
-        )
+        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, dim)
     ]
     remaining_problem_ids = set(all_problem_ids)
+    remaining_dimensions = set(DIMENSIONS)
     remaining_function_ids = {i for i in ALL_FUNCTIONS}
     test_sets = []
     for i in range(n):
-        if is_loio:
+        if leaving_mode == "LOIO":
             selected = np.random.choice(
                 list(remaining_problem_ids),
                 size=len(all_problem_ids) // n,
                 replace=False,
             ).tolist()
             remaining_problem_ids = remaining_problem_ids.difference(selected)
-        else:
+        elif leaving_mode == "LOPO":
             selected_functions = np.random.choice(
                 list(remaining_function_ids),
                 size=len(ALL_FUNCTIONS) // n,
@@ -94,9 +95,25 @@ def _get_cv_folds(n: int, is_loio: bool, dim: Optional[int]):
             remaining_function_ids = remaining_function_ids.difference(
                 selected_functions
             )
+        else:
+            selected_dimensionalities = np.random.choice(
+                list(remaining_dimensions),
+                size=len(DIMENSIONS) // n,
+                replace=False,
+            ).tolist()
+            selected = [
+                i
+                for i in all_problem_ids
+                if any(i.endswith(f"d{dim:02d}") for dim in selected_dimensionalities)
+            ]
+            remaining_dimensions = remaining_function_ids.difference(
+                selected_dimensionalities
+            )
         test_sets.append(selected)
-
-    return problems_suite, [
+    folds = [
         (list(set(all_problem_ids).difference(test_set)), test_set)
         for test_set in test_sets
     ]
+    for fold in folds:
+        np.random.shuffle(fold[0])
+    return problems_suite, folds
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index fe7b578..48b4692 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -67,7 +67,7 @@ def coco_bbob_experiment(
     options["name"] = name
     if mode.startswith("CV"):
         return run_cross_validation(
-            optimizer, options, evaluations_multiplier, is_loio=mode.endswith("LOIO")
+            optimizer, options, evaluations_multiplier, leaving_mode=mode[-4:]
         )
     elif agent in ["random", "RL-DAS-random"]:
         return _coco_bbob_test_all(optimizer, options, evaluations_multiplier, mode)
diff --git a/dynamicalgorithmselection/experiments/utils.py b/dynamicalgorithmselection/experiments/utils.py
index 890dd2b..f59a136 100644
--- a/dynamicalgorithmselection/experiments/utils.py
+++ b/dynamicalgorithmselection/experiments/utils.py
@@ -1,13 +1,12 @@
 import json
 import os
 from itertools import islice, product
-from typing import Type, Optional
+from typing import Type, List
 
 import cocoex
 import numpy as np
 
 from dynamicalgorithmselection.agents.agent_utils import (
-    get_checkpoints,
     get_runtime_stats,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
@@ -48,7 +47,7 @@ def coco_bbob_single_function(
     return results
 
 
-def get_suite(mode: str, train: bool, dim: Optional[int]):
+def get_suite(mode: str, train: bool, dim: List[int]):
     """
     :param mode:  mode of the training (LOPO: easy and hard) or LOIO
     :param train: if suite should be for testing or training:
@@ -59,9 +58,7 @@ def get_suite(mode: str, train: bool, dim: Optional[int]):
     problems_suite = cocoex.Suite("bbob", "", "")
     all_problem_ids = [
         f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-        for i_id, f_id, dim in product(
-            INSTANCE_IDS, ALL_FUNCTIONS, (DIMENSIONS if dim is None else [dim])
-        )
+        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, dim)
     ]
     if mode in ["easy", "hard"]:
         easy = mode == "easy"
@@ -73,9 +70,7 @@ def get_suite(mode: str, train: bool, dim: Optional[int]):
 
         problem_ids = [
             f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-            for i_id, f_id, dim in product(
-                INSTANCE_IDS, function_ids, (DIMENSIONS if dim is None else [dim])
-            )
+            for i_id, f_id, dim in product(INSTANCE_IDS, function_ids, dim)
         ]
 
     elif mode == "LOIO":
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index 924b87c..52e877d 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -75,14 +75,6 @@ def parse_arguments():
         help="Run also in test mode (default: True)",
     )
 
-    parser.add_argument(
-        "-c",
-        "--compare",
-        action=argparse.BooleanOptionalAction,
-        default=False,
-        help="Enable comparison with each algorithm alone (False by default)",
-    )
-
     parser.add_argument(
         "-e",
         "--wandb_entity",
@@ -113,7 +105,7 @@ def parse_arguments():
         "--mode",
         type=str,
         default="LOIO",
-        choices=["LOIO", "hard", "easy", "CV-LOIO", "CV-LOPO", "baselines"],
+        choices=["LOIO", "hard", "easy", "CV-LODO", "CV-LOIO", "CV-LOPO", "baselines"],
         help="specify which agent to use",
     )
 
@@ -145,9 +137,10 @@ def parse_arguments():
     parser.add_argument(
         "-D",
         "--dimensionality",
-        type=int,
         choices=DIMENSIONS,
-        default=None,
+        nargs="+",
+        type=int,
+        default=DIMENSIONS,
         help="dimensionality of problems",
     )
 
@@ -188,7 +181,7 @@ def print_info(args):
     print("Population size: ", args.population_size)
     print("Function eval multiplier: ", args.fe_multiplier)
     print("Test mode: ", args.test)
-    print("Compare mode: ", args.compare)
+    print("Mode: ", args.mode)
     print("Weights and Biases entity: ", args.wandb_entity)
     print("Weights and Biases project: ", args.wandb_project)
     print("Agent type: ", args.agent if args.mode != "baselines" else None)
@@ -285,6 +278,10 @@ def run_training(args, action_space):
 def run_CV(args, action_space):
     if os.path.exists(os.path.join("exdata", f"DAS_CV_{args.name}")):
         shutil.rmtree(os.path.join("exdata", f"DAS_CV_{args.name}"))
+    if args.mode == "CV-LODO" and args.dimensionality != DIMENSIONS:
+        raise ValueError(
+            "FOR Leave-One-Dimension-Out scenario all dimensionalities must be provided."
+        )
     coco_bbob_experiment(
         AGENTS_DICT[args.agent],
         {
@@ -359,7 +356,7 @@ def main():
             run_training(args, action_space)
         if args.test and args.mode != "baselines":
             test(args, action_space)
-    if args.compare or args.mode == "baselines":
+    if args.mode == "baselines":
         run_baselines(args, action_space)
 
 
diff --git a/dynamicalgorithmselection/optimizers/DE/JDE21.py b/dynamicalgorithmselection/optimizers/DE/JDE21.py
index 1ce830b..ce15507 100644
--- a/dynamicalgorithmselection/optimizers/DE/JDE21.py
+++ b/dynamicalgorithmselection/optimizers/DE/JDE21.py
@@ -348,11 +348,11 @@ def set_data(self, x=None, y=None, *args, **kwargs):
         else:
             indices = np.argsort(y)[: self.n_individuals]
             self.start_conditions = {"x": x[indices], "y": y[indices]}
-            self.Cr = kwargs.get("Cr", self.Cr)
-            if self.Cr is not None:
-                self.Cr = self.Cr[indices]
-            self.F = kwargs.get("F", self.F)
-            if self.F is not None:
-                self.F = self.F[indices]
+            Cr = kwargs.get("Cr")
+            if Cr is not None:
+                self.Cr = Cr[indices]
+            F = kwargs.get("F")
+            if F is not None:
+                self.F = F[indices]
         self.best_so_far_x = kwargs.get("best_x", None)
         self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/MADDE.py b/dynamicalgorithmselection/optimizers/DE/MADDE.py
index 35a5945..27bf266 100644
--- a/dynamicalgorithmselection/optimizers/DE/MADDE.py
+++ b/dynamicalgorithmselection/optimizers/DE/MADDE.py
@@ -42,10 +42,6 @@ def initialize(self, args=None, x=None, y=None):
                     self._evaluate_fitness(
                         xi,
                         args,
-                        MF=self.MF[:],
-                        MCr=self.MCr[:],
-                        k_idx=self.k_idx,
-                        pm=self.pm,
                     )
                     for xi in x
                 ]
@@ -158,10 +154,6 @@ def iterate(self, x=None, y=None, args=None):
                 self._evaluate_fitness(
                     ui,
                     args,
-                    MF=self.MF[:],
-                    MCr=self.MCr[:],
-                    k_idx=self.k_idx,
-                    pm=self.pm,
                 )
                 for ui in u
             ]
diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
index 9cf09c0..150ee6d 100644
--- a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
+++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
@@ -36,10 +36,6 @@ def initialize(self, args=None, x=None, y=None):
                     self._evaluate_fitness(
                         xi,
                         args,
-                        MF=self.MF[:],
-                        MCr=self.MCr[:],
-                        k_idx=self.k_idx,
-                        pa=self.pa,
                     )
                     for xi in x
                 ]
@@ -175,10 +171,6 @@ def iterate(self, x=None, y=None, args=None):
                 self._evaluate_fitness(
                     ui,
                     args,
-                    MF=self.MF[:],
-                    MCr=self.MCr[:],
-                    k_idx=self.k_idx,
-                    pa=self.pa,
                 )
                 for ui in us
             ]
diff --git a/dynamicalgorithmselection/optimizers/DS/POWELL.py b/dynamicalgorithmselection/optimizers/DS/POWELL.py
index 44fb383..9041bc7 100644
--- a/dynamicalgorithmselection/optimizers/DS/POWELL.py
+++ b/dynamicalgorithmselection/optimizers/DS/POWELL.py
@@ -113,20 +113,16 @@ class POWELL(DS):
     def __init__(self, problem, options):
         DS.__init__(self, problem, options)
         self._func = None  # only for inner line searcher
-        self.y_history = []
-        self.x_history = []
 
     def initialize(self, x=None, y=None, u=None, args=None, is_restart=False):
         x = (
             self._initialize_x(is_restart) if x is None else x
         )  # initial (starting) search point
-        y = self._evaluate_fitness(x, args, u=u) if y is None else y  # fitness
+        y = self._evaluate_fitness(x, args) if y is None else y  # fitness
         u = np.identity(self.ndim_problem) if u is None else u
-        self.y_history.append(y)
-        self.x_history.append(x)
 
         def _wrapper(xx):
-            return self._evaluate_fitness(xx, args, u=u)
+            return self._evaluate_fitness(xx, args)
 
         self._func = _wrapper
         return x, y, u, y
@@ -156,8 +152,6 @@ def iterate(self, x=None, y=None, u=None, args=None):
                 return x, y, u, ys
             d, diff = u[i], y
             y, x, d, fitness = self._line_search(x, d)
-            self.y_history.append(y)
-            self.x_history.append(x)
             ys.extend(fitness)
             diff -= y
             if diff > delta:
@@ -165,9 +159,7 @@ def iterate(self, x=None, y=None, u=None, args=None):
         d = x - xx  # extrapolated point
         _, ratio_e = _line_for_search(x, d, self.lower_boundary, self.upper_boundary)
         xxx = x + min(ratio_e, 1.0) * d
-        yyy = self.fitness_function(xxx)
-        self.y_history.append(yyy)
-        self.x_history.append(xxx)
+        yyy = self._evaluate_fitness(xxx, args=args)
         if yy > yyy:
             t, temp = 2.0 * (yy + yyy - 2.0 * y), yy - y - delta
             t *= np.square(temp)
@@ -175,8 +167,6 @@ def iterate(self, x=None, y=None, u=None, args=None):
             t -= delta * np.square(temp)
             if t < 0.0:
                 y, x, d, fitness = self._line_search(x, d)
-                self.y_history.append(y)
-                self.x_history.append(x)
                 ys.extend(fitness)
                 if np.any(d):
                     u[big_ind] = u[-1]
@@ -217,26 +207,21 @@ def set_data(
             u = None
         self.start_conditions = {
             "x": x,
-            "Y": y,
+            "y": y,
             "u": u,
         }
         self.best_so_far_x = kwargs.get("best_x", None)
         self.best_so_far_y = kwargs.get("best_y", float("inf"))
 
     def get_data(self, n_individuals: Optional[int] = None):
-        best_indices = sorted(
-            [i for i in range(len(self.y_history))],
-            key=lambda x: self.y_history[x],
-        )[:n_individuals]
-        x = np.array(self.x_history)[best_indices]
-        y = np.array(self.y_history)[best_indices]
+        indices = np.argsort(self.y_history)[: min(len(self.y_history), 200)]
+        x = np.array(self.x_history)[indices]
+        y = np.array(self.y_history)[indices]
         return (
             self.results
             | {
-                {
-                    "x": x,
-                    "Y": y,
-                }
+                "x": x,
+                "y": y,
             }
             or self.start_conditions
         )
diff --git a/dynamicalgorithmselection/optimizers/ES/CMAES.py b/dynamicalgorithmselection/optimizers/ES/CMAES.py
index 74b5896..fa10171 100644
--- a/dynamicalgorithmselection/optimizers/ES/CMAES.py
+++ b/dynamicalgorithmselection/optimizers/ES/CMAES.py
@@ -41,9 +41,6 @@ def __init__(self, problem, options):
             None,
             2.0,
         )  # for CMA (c_w -> c_μ)
-        self._save_eig = options.get(
-            "_save_eig", False
-        )  # whether or not save eigenvalues and eigenvectors
 
     def _set_c_c(self):
         """Set decay rate of evolution path for the rank-one update of CMA."""
@@ -161,7 +158,11 @@ def iterate(
             )  # Gaussian noise for mutation
             d[k] = np.dot(e_ve @ np.diag(e_va), z)
             x[k] = mean + self.sigma * d[k]  # offspring individual
-            y[k] = self._evaluate_fitness(x[k], args, d=d[k], e_ve=e_ve, e_va=e_va)
+            y[k] = self._evaluate_fitness(
+                x[k],
+                args,
+                d=d[k],
+            )
         return x, y, d
 
     def update_distribution(
@@ -315,9 +316,7 @@ def optimize(
             }
         )
         results = self._collect(fitness, y, mean)
-        # by default do *NOT* save eigenvalues and eigenvectors (with *quadratic* space complexity)
-        if self._save_eig:
-            results["e_va"], results["e_ve"] = e_va, e_ve
+        results["e_va"], results["e_ve"] = e_va, e_ve
         return results
 
     def set_data(
@@ -360,9 +359,10 @@ def set_data(
                     "cm",
                     "e_ve",
                     "e_va",
-                    "d",
                 ]
             }
+            start_conditions["d"] = d[indices] if d is not None else None
+
             mean = x[indices].mean(axis=0)
             stds = np.std(x[indices], axis=0)
             sigma: float = np.max(stds)
diff --git a/dynamicalgorithmselection/optimizers/ES/LMCMAES.py b/dynamicalgorithmselection/optimizers/ES/LMCMAES.py
index 7f3f7e5..d857312 100644
--- a/dynamicalgorithmselection/optimizers/ES/LMCMAES.py
+++ b/dynamicalgorithmselection/optimizers/ES/LMCMAES.py
@@ -86,14 +86,13 @@ def iterate(self, mean=None, x=None, pm=None, vm=None, y=None, b=None, args=None
                 z = self.rng_optimization.standard_normal((self.ndim_problem,))
                 a_z = self._a_z(z, pm, vm, b)
 
-            # FIX 2: Check for potential overflow before update
             mutation_step = sign * self.sigma * a_z
             if np.any(np.isnan(mutation_step)) or np.any(np.isinf(mutation_step)):
                 # Fallback to prevent crash, effectively skipping this mutation
                 mutation_step = np.zeros_like(mutation_step)
 
             x[k] = mean + mutation_step
-            y[k] = self._evaluate_fitness(x[k], args, pm=pm, vm=vm, b=b)
+            y[k] = self._evaluate_fitness(x[k], args)
             sign *= -1
         return x, y
 
@@ -119,7 +118,6 @@ def _update_distribution(
     ):
         mean_bak = np.dot(self._w, x[np.argsort(y)[: self.n_parents]])
 
-        # FIX 3: Safety clamp for sigma to prevent division by zero or overflow
         safe_sigma = np.clip(self.sigma, 1e-20, 1e20)
 
         p_c = self._p_c_1 * p_c + self._p_c_2 * (mean_bak - mean) / safe_sigma
@@ -147,7 +145,6 @@ def _update_distribution(
             vm[self._j[i]] = self._a_inv_z(pm[self._j[i]], vm, d, i)
             v_n = np.dot(vm[self._j[i]], vm[self._j[i]])
 
-            # FIX 4: Safety clamp for v_n (denominator safety)
             # If v_n is 0 or NaN, b and d will explode.
             if v_n < 1e-20:
                 v_n = 1e-20
diff --git a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
index c1b4d08..7dfa552 100644
--- a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
+++ b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
@@ -23,8 +23,6 @@ def cholesky_update(rm, z, downdate):
 
 class OPOA2015(ES):
     def __init__(self, problem, options):
-        self.mean_history = []
-        self.y_history = []
         options["n_individuals"] = 1  # mandatory setting
         options["n_parents"] = 1  # mandatory setting
         ES.__init__(self, problem, options | {"sigma": 0.9})
@@ -56,9 +54,6 @@ def initialize(
             self._evaluate_fitness(
                 x=mean,
                 args=args,
-                cf=cf,
-                p_s=p_s,
-                p_c=p_c,
             )
             if y is None
             else y
@@ -99,9 +94,6 @@ def iterate(
         y = self._evaluate_fitness(
             x=x,
             args=args,
-            cf=cf,
-            p_s=p_s,
-            p_c=p_c,
         )
         if y <= best_so_far_y:
             self._ancestors.append(y)
@@ -125,14 +117,14 @@ def iterate(
         self._n_generations += 1
         self.results.update(
             {
+                "x": np.array([x]),
+                "mean": mean,
                 "cf": cf,
                 "best_so_far_y": best_so_far_y,
                 "p_s": p_s,
                 "p_c": p_c,
             }
         )
-        self.mean_history.append(mean)
-        self.y_history.append(y)
         return mean, y, cf, best_so_far_y, p_s, p_c
 
     def restart_reinitialize(
@@ -161,7 +153,9 @@ def restart_reinitialize(
             self._list_generations.append(self._n_generations)  # for each restart
             self._n_generations = 0
             self.sigma = np.copy(self._sigma_bak)
-            mean, y, cf, best_so_far_y, p_s, p_c = self.initialize(args, True)
+            mean, y, cf, best_so_far_y, p_s, p_c = self.initialize(
+                args, is_restart=True
+            )
             self._list_fitness = [best_so_far_y]
             self._ancestors = []
         return mean, y, cf, best_so_far_y, p_s, p_c
@@ -177,7 +171,13 @@ def optimize(self, fitness_function=None, args=None):
         y = self.start_conditions.get("y", None)
 
         mean, y, cf, best_so_far_y, p_s, p_c = self.initialize(
-            mean, y, cf, best_so_far_y, p_s, p_c, args
+            mean=mean,
+            y=y,
+            cf=cf,
+            best_so_far_y=best_so_far_y,
+            p_s=p_s,
+            p_c=p_c,
+            args=args,
         )
         while not self._check_terminations():
             self._print_verbose_info(fitness, y)
@@ -201,15 +201,24 @@ def set_data(
         *args,
         **kwargs,
     ):
+        if isinstance(y, np.ndarray) and x is not None:
+            best_idx = np.argmin(y)
+            y_val = float(y[best_idx])
+            x_val = x[best_idx]
+            mean = x_val if mean is None else mean
+        else:
+            y_val = y if isinstance(y, float) else None
+            x_val = x if x is not None else mean
+
         mean = (
             mean
             if mean is not None
             else (np.mean(x, axis=0) if x is not None else None)
         )
-        y = y if isinstance(y, float) else None
         self.start_conditions = {
+            "x": x_val,
             "mean": mean,
-            "y": y,
+            "y": y_val,
             "cf": cf,
             "best_so_far_y": best_so_far_y,
             "p_s": p_s,
@@ -220,11 +229,7 @@ def set_data(
         self.best_so_far_y = kwargs.get("best_y", float("inf"))
 
     def get_data(self, n_individuals: Optional[int] = None):
-        pop_data = ["x", "y"]
-        best_indices = sorted(
-            [i for i in range(len(self.y_history))],
-            key=lambda x: self.y_history[x],
-        )[:n_individuals]
-        x = np.array(self.mean_history)[best_indices]
-        y = np.array(self.y_history)[best_indices]
+        indices = np.argsort(self.y_history)[: min(len(self.y_history), 200)]
+        x = np.array(self.x_history)[indices]
+        y = np.array(self.y_history)[indices]
         return self.results | {"x": x, "y": y} or self.start_conditions
diff --git a/only_PG.slurm b/only_PG.slurm
deleted file mode 100644
index 16d3396..0000000
--- a/only_PG.slurm
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=rl_das_experiment
-#SBATCH --output=logs/experiment_%A_%a.out
-#SBATCH --error=logs/experiment_%A_%a.err
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=1
-#SBATCH --mem=32G
-#SBATCH --time=48:00:00
-#SBATCH --partition=plgrid-gpu-a100
-#SBATCH --array=0-9           # Increased to 24 tasks total to split sequential runs
-
-CDB_VAL=${1:-1.5}
-
-if [ "$#" -gt 0 ]; then
-    shift
-fi
-
-# Store the remaining arguments as an array called PORTFOLIO.
-# If no additional arguments were provided, fall back to your default.
-if [ "$#" -eq 0 ]; then
-    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
-else
-    PORTFOLIO=("$@")
-fi
-
-# CONFIGURATION
-ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
-source "$ENV_PATH"
-mkdir -p logs
-
-# Array of Dimensions
-DIMS=(2 3 5 10)
-
-# 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 0-3)
-if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
-    MODE="CV-LOIO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID))]}
-    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
-
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
-
-# 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 4-7)
-elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
-    MODE="CV-LOPO"
-    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
-    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
-
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
-      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
-
-# 6. Multidimensional CV-LOIO (Index 20)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 7 ]]; then
-    MODE="CV-LOIO"
-    echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
-
-# 7. Multidimensional CV-LOPO (Index 21)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
-    MODE="CV-LOPO"
-    echo "Running Mode: $MODE | Multidimensional PG"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
-
-# 8. Global Random Agent (Index 22)
-elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
-    echo "Running Mode: Global Random Agent"
-    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL} \
-      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random
-fi
\ No newline at end of file
diff --git a/portfolio_study.slurm b/portfolio_study.slurm
new file mode 100644
index 0000000..4282edd
--- /dev/null
+++ b/portfolio_study.slurm
@@ -0,0 +1,67 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH --array=0-9           # 10 tasks total
+
+CDB_VAL=${1:-1.5}
+
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+if [ "$#" -eq 0 ]; then
+    PORTFOLIO=('MADDE' 'CMAES' 'SPSO')
+else
+    PORTFOLIO=("$@")
+fi
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 2. Dimension-specific CV-LOPO (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 3. Multidimensional CV-LOIO (Index 8)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 4. Multidimensional CV-LOPO (Index 9)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+fi
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 6010a65..dece663 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,7 @@ build-backend = "hatchling.build"
 [dependency-groups]
 dev = [
     "pre-commit>=4.5.1",
+    "py-spy>=0.4.1",
     "pytest>=9.0.2",
     "ruff>=0.14.5",
 ]
diff --git a/runner.slurm b/runner.slurm
index a771cad..d0bbca4 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -17,11 +17,8 @@ fi
 
 # Store the remaining arguments as an array called PORTFOLIO.
 # If no additional arguments were provided, fall back to your default.
-if [ "$#" -eq 0 ]; then
-    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
-else
-    PORTFOLIO=("$@")
-fi
+
+PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
 
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
@@ -101,5 +98,5 @@ elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then
 elif [[ $SLURM_ARRAY_TASK_ID -eq 23 ]]; then
     echo "Running Mode: Baselines"
     python3 dynamicalgorithmselection/main.py BASELINES \
-      -p "${PORTFOLIO[@]}" --agent random --mode baselines
+      -p "${PORTFOLIO[@]}" --mode baselines
 fi
\ No newline at end of file
diff --git a/tests/test_cross_validation.py b/tests/test_cross_validation.py
index 9ba9f4d..a8c1e58 100644
--- a/tests/test_cross_validation.py
+++ b/tests/test_cross_validation.py
@@ -24,7 +24,7 @@ def test_get_cv_folds_structure(self, mock_suite, mock_miniprint):
         # so we check if it returns lists of correct length/structure.
 
         n_folds = 4
-        suite, folds = _get_cv_folds(n_folds, is_loio=True, dim=10)
+        suite, folds = _get_cv_folds(n_folds, leaving_mode="LOIO", dim=[10])
 
         self.assertIsInstance(suite, MagicMock)  # Should return the mocked suite
         self.assertEqual(len(folds), n_folds)
@@ -67,7 +67,7 @@ def test_run_cross_validation_flow(
 
         # Execute
         res_folder = run_cross_validation(
-            self.optimizer_mock, self.options, self.eval_mult, is_loio=True
+            self.optimizer_mock, self.options, self.eval_mult, leaving_mode="LOIO"
         )
 
         # Assertions
diff --git a/tests/test_experiment.py b/tests/test_experiment.py
index e493066..809dc31 100644
--- a/tests/test_experiment.py
+++ b/tests/test_experiment.py
@@ -32,7 +32,7 @@ def test_coco_bbob_experiment_dispatch_cv(self, mock_cv):
             self.optimizer_mock, self.options, "test_exp", mode="CV_LOIO"
         )
         mock_cv.assert_called_once()
-        self.assertTrue(mock_cv.call_args[1]["is_loio"])
+        self.assertTrue(mock_cv.call_args[1]["leaving_mode"])
 
     @patch("dynamicalgorithmselection.experiments.experiment._coco_bbob_test_all")
     def test_coco_bbob_experiment_dispatch_random(self, mock_test_all):
diff --git a/uv.lock b/uv.lock
index 68aef71..294498f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -319,6 +319,7 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "pre-commit" },
+    { name = "py-spy" },
     { name = "pytest" },
     { name = "ruff" },
 ]
@@ -342,6 +343,7 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
     { name = "pre-commit", specifier = ">=4.5.1" },
+    { name = "py-spy", specifier = ">=0.4.1" },
     { name = "pytest", specifier = ">=9.0.2" },
     { name = "ruff", specifier = ">=0.14.5" },
 ]
@@ -1222,6 +1224,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
 ]
 
+[[package]]
+name = "py-spy"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/e2/ff811a367028b87e86714945bb9ecb5c1cc69114a8039a67b3a862cef921/py_spy-0.4.1.tar.gz", hash = "sha256:e53aa53daa2e47c2eef97dd2455b47bb3a7e7f962796a86cc3e7dbde8e6f4db4", size = 244726, upload-time = "2025-07-31T19:33:25.172Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/e3/3a32500d845bdd94f6a2b4ed6244982f42ec2bc64602ea8fcfe900678ae7/py_spy-0.4.1-py2.py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:809094208c6256c8f4ccadd31e9a513fe2429253f48e20066879239ba12cd8cc", size = 3682508, upload-time = "2025-07-31T19:33:13.753Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bf/e4d280e9e0bec71d39fc646654097027d4bbe8e04af18fb68e49afcff404/py_spy-0.4.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:1fb8bf71ab8df95a95cc387deed6552934c50feef2cf6456bc06692a5508fd0c", size = 1796395, upload-time = "2025-07-31T19:33:15.325Z" },
+    { url = "https://files.pythonhosted.org/packages/df/79/9ed50bb0a9de63ed023aa2db8b6265b04a7760d98c61eb54def6a5fddb68/py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee776b9d512a011d1ad3907ed53ae32ce2f3d9ff3e1782236554e22103b5c084", size = 2034938, upload-time = "2025-07-31T19:33:17.194Z" },
+    { url = "https://files.pythonhosted.org/packages/53/a5/36862e3eea59f729dfb70ee6f9e14b051d8ddce1aa7e70e0b81d9fe18536/py_spy-0.4.1-py2.py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:532d3525538254d1859b49de1fbe9744df6b8865657c9f0e444bf36ce3f19226", size = 2658968, upload-time = "2025-07-31T19:33:18.916Z" },
+    { url = "https://files.pythonhosted.org/packages/08/f8/9ea0b586b065a623f591e5e7961282ec944b5fbbdca33186c7c0296645b3/py_spy-0.4.1-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4972c21890b6814017e39ac233c22572c4a61fd874524ebc5ccab0f2237aee0a", size = 2147541, upload-time = "2025-07-31T19:33:20.565Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fb/bc7f639aed026bca6e7beb1e33f6951e16b7d315594e7635a4f7d21d63f4/py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6a80ec05eb8a6883863a367c6a4d4f2d57de68466f7956b6367d4edd5c61bb29", size = 2763338, upload-time = "2025-07-31T19:33:22.202Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/da/fcc9a9fcd4ca946ff402cff20348e838b051d69f50f5d1f5dca4cd3c5eb8/py_spy-0.4.1-py2.py3-none-win_amd64.whl", hash = "sha256:d92e522bd40e9bf7d87c204033ce5bb5c828fca45fa28d970f58d71128069fdc", size = 1818784, upload-time = "2025-07-31T19:33:23.802Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.2"