diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..d286e02
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,5 @@
+repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.15.0
+  hooks:
+    - id: ruff-format
\ No newline at end of file
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..2c07333
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.11
diff --git a/CDB_study.slurm b/CDB_study.slurm
new file mode 100644
index 0000000..ff7ac0f
--- /dev/null
+++ b/CDB_study.slurm
@@ -0,0 +1,67 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH --array=0-9           # 10 tasks total
+
+CDB_VAL=${1:-1.5}
+
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+if [ "$#" -eq 0 ]; then
+    PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
+else
+    PORTFOLIO=("$@")
+fi
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 2. Dimension-specific CV-LOPO (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 3. Multidimensional CV-LOIO (Index 8)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 4. Multidimensional CV-LOPO (Index 9)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+fi
\ No newline at end of file
diff --git a/LOIO_train_set.json b/LOIO_train_set.json
deleted file mode 100644
index 84afe0c..0000000
--- a/LOIO_train_set.json
+++ /dev/null
@@ -1,1444 +0,0 @@
-{
-  "data": [
-    "bbob_f021_i78_d02",
-    "bbob_f001_i03_d05",
-    "bbob_f021_i05_d02",
-    "bbob_f021_i05_d03",
-    "bbob_f002_i71_d03",
-    "bbob_f010_i04_d03",
-    "bbob_f012_i74_d10",
-    "bbob_f003_i73_d03",
-    "bbob_f002_i04_d03",
-    "bbob_f013_i75_d20",
-    "bbob_f005_i05_d40",
-    "bbob_f004_i80_d10",
-    "bbob_f021_i78_d10",
-    "bbob_f008_i01_d10",
-    "bbob_f009_i04_d03",
-    "bbob_f009_i01_d05",
-    "bbob_f020_i79_d10",
-    "bbob_f015_i73_d02",
-    "bbob_f013_i74_d40",
-    "bbob_f017_i78_d20",
-    "bbob_f023_i03_d03",
-    "bbob_f005_i02_d40",
-    "bbob_f017_i75_d05",
-    "bbob_f003_i73_d02",
-    "bbob_f022_i05_d02",
-    "bbob_f011_i05_d03",
-    "bbob_f012_i02_d10",
-    "bbob_f024_i04_d05",
-    "bbob_f003_i72_d02",
-    "bbob_f013_i02_d02",
-    "bbob_f011_i72_d05",
-    "bbob_f021_i80_d40",
-    "bbob_f014_i78_d40",
-    "bbob_f024_i01_d02",
-    "bbob_f018_i02_d10",
-    "bbob_f012_i03_d20",
-    "bbob_f002_i71_d02",
-    "bbob_f002_i75_d20",
-    "bbob_f007_i04_d40",
-    "bbob_f005_i73_d40",
-    "bbob_f022_i75_d20",
-    "bbob_f024_i77_d02",
-    "bbob_f017_i74_d20",
-    "bbob_f011_i02_d03",
-    "bbob_f004_i04_d40",
-    "bbob_f009_i02_d40",
-    "bbob_f020_i03_d20",
-    "bbob_f023_i78_d10",
-    "bbob_f004_i71_d05",
-    "bbob_f020_i72_d20",
-    "bbob_f018_i77_d10",
-    "bbob_f010_i02_d20",
-    "bbob_f009_i04_d05",
-    "bbob_f023_i73_d02",
-    "bbob_f019_i03_d10",
-    "bbob_f011_i04_d40",
-    "bbob_f014_i71_d05",
-    "bbob_f010_i02_d05",
-    "bbob_f022_i05_d20",
-    "bbob_f005_i03_d05",
-    "bbob_f002_i79_d03",
-    "bbob_f024_i80_d03",
-    "bbob_f001_i73_d02",
-    "bbob_f024_i74_d20",
-    "bbob_f013_i74_d10",
-    "bbob_f005_i74_d03",
-    "bbob_f003_i71_d02",
-    "bbob_f018_i80_d02",
-    "bbob_f008_i75_d02",
-    "bbob_f015_i74_d02",
-    "bbob_f020_i73_d05",
-    "bbob_f007_i73_d05",
-    "bbob_f014_i04_d40",
-    "bbob_f009_i77_d02",
-    "bbob_f004_i76_d40",
-    "bbob_f019_i80_d02",
-    "bbob_f013_i76_d40",
-    "bbob_f018_i71_d20",
-    "bbob_f009_i71_d02",
-    "bbob_f011_i75_d40",
-    "bbob_f012_i05_d10",
-    "bbob_f012_i01_d10",
-    "bbob_f019_i73_d40",
-    "bbob_f005_i71_d40",
-    "bbob_f021_i77_d02",
-    "bbob_f014_i02_d05",
-    "bbob_f001_i04_d10",
-    "bbob_f023_i80_d02",
-    "bbob_f009_i73_d10",
-    "bbob_f015_i80_d40",
-    "bbob_f024_i72_d20",
-    "bbob_f012_i75_d20",
-    "bbob_f009_i77_d10",
-    "bbob_f008_i03_d02",
-    "bbob_f011_i75_d20",
-    "bbob_f016_i01_d10",
-    "bbob_f012_i72_d05",
-    "bbob_f004_i02_d03",
-    "bbob_f017_i77_d40",
-    "bbob_f012_i79_d02",
-    "bbob_f022_i75_d03",
-    "bbob_f007_i03_d05",
-    "bbob_f017_i76_d10",
-    "bbob_f010_i75_d40",
-    "bbob_f010_i72_d20",
-    "bbob_f024_i77_d03",
-    "bbob_f011_i73_d20",
-    "bbob_f007_i80_d40",
-    "bbob_f004_i78_d40",
-    "bbob_f019_i79_d05",
-    "bbob_f014_i05_d05",
-    "bbob_f020_i76_d20",
-    "bbob_f024_i79_d03",
-    "bbob_f019_i77_d05",
-    "bbob_f016_i05_d02",
-    "bbob_f024_i79_d05",
-    "bbob_f001_i71_d02",
-    "bbob_f015_i05_d20",
-    "bbob_f002_i77_d05",
-    "bbob_f001_i75_d03",
-    "bbob_f022_i78_d03",
-    "bbob_f011_i80_d20",
-    "bbob_f013_i72_d05",
-    "bbob_f002_i01_d10",
-    "bbob_f013_i02_d03",
-    "bbob_f008_i78_d05",
-    "bbob_f019_i04_d20",
-    "bbob_f011_i03_d03",
-    "bbob_f014_i05_d40",
-    "bbob_f019_i79_d40",
-    "bbob_f022_i76_d10",
-    "bbob_f022_i74_d03",
-    "bbob_f023_i78_d40",
-    "bbob_f023_i76_d03",
-    "bbob_f007_i80_d05",
-    "bbob_f002_i02_d20",
-    "bbob_f013_i74_d20",
-    "bbob_f021_i03_d10",
-    "bbob_f013_i80_d05",
-    "bbob_f007_i73_d02",
-    "bbob_f010_i77_d05",
-    "bbob_f022_i79_d40",
-    "bbob_f004_i80_d02",
-    "bbob_f014_i03_d20",
-    "bbob_f015_i02_d02",
-    "bbob_f011_i01_d03",
-    "bbob_f006_i75_d40",
-    "bbob_f024_i71_d03",
-    "bbob_f021_i73_d02",
-    "bbob_f012_i05_d20",
-    "bbob_f008_i73_d20",
-    "bbob_f024_i04_d40",
-    "bbob_f017_i76_d02",
-    "bbob_f021_i80_d03",
-    "bbob_f002_i78_d02",
-    "bbob_f023_i72_d10",
-    "bbob_f019_i75_d03",
-    "bbob_f005_i71_d02",
-    "bbob_f001_i80_d05",
-    "bbob_f006_i04_d05",
-    "bbob_f001_i03_d10",
-    "bbob_f023_i71_d02",
-    "bbob_f005_i75_d03",
-    "bbob_f004_i78_d03",
-    "bbob_f010_i71_d05",
-    "bbob_f021_i71_d20",
-    "bbob_f011_i78_d20",
-    "bbob_f008_i77_d10",
-    "bbob_f004_i72_d20",
-    "bbob_f018_i78_d05",
-    "bbob_f003_i73_d05",
-    "bbob_f010_i73_d40",
-    "bbob_f003_i75_d03",
-    "bbob_f020_i04_d05",
-    "bbob_f011_i76_d20",
-    "bbob_f004_i80_d05",
-    "bbob_f015_i71_d02",
-    "bbob_f012_i74_d40",
-    "bbob_f010_i04_d40",
-    "bbob_f016_i04_d05",
-    "bbob_f020_i79_d05",
-    "bbob_f009_i72_d10",
-    "bbob_f015_i74_d40",
-    "bbob_f019_i80_d10",
-    "bbob_f007_i74_d10",
-    "bbob_f009_i02_d03",
-    "bbob_f007_i04_d05",
-    "bbob_f017_i72_d10",
-    "bbob_f024_i78_d10",
-    "bbob_f015_i71_d20",
-    "bbob_f001_i01_d40",
-    "bbob_f008_i02_d03",
-    "bbob_f005_i76_d05",
-    "bbob_f015_i03_d20",
-    "bbob_f008_i80_d05",
-    "bbob_f017_i02_d03",
-    "bbob_f020_i72_d40",
-    "bbob_f021_i02_d02",
-    "bbob_f008_i05_d05",
-    "bbob_f001_i74_d02",
-    "bbob_f022_i76_d40",
-    "bbob_f012_i02_d02",
-    "bbob_f006_i74_d05",
-    "bbob_f021_i73_d20",
-    "bbob_f017_i78_d02",
-    "bbob_f016_i02_d03",
-    "bbob_f005_i74_d10",
-    "bbob_f010_i03_d20",
-    "bbob_f008_i80_d20",
-    "bbob_f005_i04_d05",
-    "bbob_f002_i78_d20",
-    "bbob_f018_i73_d02",
-    "bbob_f013_i05_d02",
-    "bbob_f010_i74_d02",
-    "bbob_f009_i03_d20",
-    "bbob_f021_i04_d10",
-    "bbob_f015_i05_d03",
-    "bbob_f021_i04_d40",
-    "bbob_f003_i04_d20",
-    "bbob_f021_i78_d20",
-    "bbob_f008_i02_d05",
-    "bbob_f015_i80_d10",
-    "bbob_f021_i76_d03",
-    "bbob_f013_i02_d10",
-    "bbob_f008_i03_d03",
-    "bbob_f017_i05_d20",
-    "bbob_f020_i03_d02",
-    "bbob_f004_i02_d02",
-    "bbob_f017_i80_d05",
-    "bbob_f013_i73_d05",
-    "bbob_f022_i04_d20",
-    "bbob_f020_i78_d10",
-    "bbob_f008_i74_d10",
-    "bbob_f020_i05_d10",
-    "bbob_f003_i71_d20",
-    "bbob_f007_i05_d02",
-    "bbob_f019_i72_d20",
-    "bbob_f013_i79_d03",
-    "bbob_f014_i72_d03",
-    "bbob_f015_i03_d05",
-    "bbob_f005_i71_d03",
-    "bbob_f001_i04_d02",
-    "bbob_f010_i80_d03",
-    "bbob_f023_i78_d05",
-    "bbob_f005_i72_d20",
-    "bbob_f020_i76_d10",
-    "bbob_f009_i80_d20",
-    "bbob_f016_i04_d10",
-    "bbob_f006_i74_d10",
-    "bbob_f017_i75_d40",
-    "bbob_f021_i71_d02",
-    "bbob_f002_i72_d40",
-    "bbob_f014_i05_d03",
-    "bbob_f008_i78_d03",
-    "bbob_f007_i02_d10",
-    "bbob_f022_i75_d05",
-    "bbob_f016_i77_d40",
-    "bbob_f002_i75_d10",
-    "bbob_f020_i02_d05",
-    "bbob_f019_i01_d20",
-    "bbob_f017_i73_d03",
-    "bbob_f018_i05_d40",
-    "bbob_f015_i77_d05",
-    "bbob_f017_i72_d20",
-    "bbob_f010_i01_d40",
-    "bbob_f001_i80_d02",
-    "bbob_f006_i77_d03",
-    "bbob_f009_i74_d10",
-    "bbob_f012_i79_d20",
-    "bbob_f017_i73_d20",
-    "bbob_f006_i75_d05",
-    "bbob_f013_i03_d20",
-    "bbob_f008_i79_d02",
-    "bbob_f010_i73_d20",
-    "bbob_f003_i02_d05",
-    "bbob_f023_i05_d02",
-    "bbob_f011_i04_d03",
-    "bbob_f006_i74_d40",
-    "bbob_f011_i79_d02",
-    "bbob_f011_i05_d10",
-    "bbob_f011_i03_d20",
-    "bbob_f012_i02_d05",
-    "bbob_f018_i71_d05",
-    "bbob_f020_i78_d40",
-    "bbob_f012_i75_d05",
-    "bbob_f009_i80_d40",
-    "bbob_f004_i71_d40",
-    "bbob_f004_i04_d10",
-    "bbob_f004_i74_d03",
-    "bbob_f016_i72_d40",
-    "bbob_f024_i78_d03",
-    "bbob_f003_i79_d10",
-    "bbob_f023_i75_d05",
-    "bbob_f015_i77_d40",
-    "bbob_f010_i75_d02",
-    "bbob_f024_i02_d10",
-    "bbob_f008_i01_d05",
-    "bbob_f017_i79_d40",
-    "bbob_f003_i74_d02",
-    "bbob_f005_i72_d05",
-    "bbob_f008_i05_d10",
-    "bbob_f019_i78_d03",
-    "bbob_f019_i01_d03",
-    "bbob_f011_i75_d10",
-    "bbob_f004_i79_d03",
-    "bbob_f017_i80_d10",
-    "bbob_f008_i77_d03",
-    "bbob_f017_i71_d20",
-    "bbob_f019_i05_d40",
-    "bbob_f004_i73_d10",
-    "bbob_f006_i73_d40",
-    "bbob_f016_i05_d10",
-    "bbob_f002_i05_d20",
-    "bbob_f009_i05_d20",
-    "bbob_f019_i80_d05",
-    "bbob_f020_i79_d40",
-    "bbob_f019_i05_d03",
-    "bbob_f011_i01_d20",
-    "bbob_f009_i71_d03",
-    "bbob_f021_i72_d10",
-    "bbob_f006_i80_d10",
-    "bbob_f001_i04_d05",
-    "bbob_f022_i71_d05",
-    "bbob_f008_i73_d40",
-    "bbob_f006_i72_d05",
-    "bbob_f019_i02_d20",
-    "bbob_f011_i02_d40",
-    "bbob_f001_i73_d03",
-    "bbob_f015_i76_d05",
-    "bbob_f014_i04_d05",
-    "bbob_f009_i73_d40",
-    "bbob_f019_i76_d02",
-    "bbob_f024_i80_d10",
-    "bbob_f003_i75_d02",
-    "bbob_f016_i79_d20",
-    "bbob_f010_i78_d20",
-    "bbob_f018_i05_d02",
-    "bbob_f015_i80_d03",
-    "bbob_f016_i05_d40",
-    "bbob_f021_i04_d05",
-    "bbob_f019_i02_d40",
-    "bbob_f022_i72_d40",
-    "bbob_f015_i73_d10",
-    "bbob_f011_i77_d40",
-    "bbob_f023_i80_d40",
-    "bbob_f012_i75_d40",
-    "bbob_f012_i78_d03",
-    "bbob_f010_i76_d02",
-    "bbob_f021_i75_d40",
-    "bbob_f007_i74_d40",
-    "bbob_f012_i75_d03",
-    "bbob_f018_i02_d03",
-    "bbob_f009_i03_d02",
-    "bbob_f002_i79_d10",
-    "bbob_f022_i03_d20",
-    "bbob_f018_i73_d05",
-    "bbob_f004_i76_d03",
-    "bbob_f016_i04_d20",
-    "bbob_f020_i76_d02",
-    "bbob_f002_i02_d03",
-    "bbob_f006_i72_d10",
-    "bbob_f015_i71_d40",
-    "bbob_f019_i71_d02",
-    "bbob_f019_i04_d40",
-    "bbob_f021_i01_d10",
-    "bbob_f019_i76_d20",
-    "bbob_f023_i75_d10",
-    "bbob_f016_i72_d03",
-    "bbob_f016_i78_d03",
-    "bbob_f022_i01_d40",
-    "bbob_f017_i75_d10",
-    "bbob_f016_i01_d05",
-    "bbob_f023_i04_d10",
-    "bbob_f004_i79_d20",
-    "bbob_f002_i72_d02",
-    "bbob_f019_i05_d02",
-    "bbob_f023_i80_d10",
-    "bbob_f017_i03_d05",
-    "bbob_f019_i80_d40",
-    "bbob_f002_i05_d02",
-    "bbob_f002_i73_d03",
-    "bbob_f015_i04_d02",
-    "bbob_f013_i05_d20",
-    "bbob_f022_i02_d40",
-    "bbob_f017_i02_d05",
-    "bbob_f018_i71_d03",
-    "bbob_f022_i01_d03",
-    "bbob_f014_i03_d05",
-    "bbob_f001_i03_d20",
-    "bbob_f023_i73_d10",
-    "bbob_f013_i76_d05",
-    "bbob_f006_i03_d05",
-    "bbob_f006_i76_d20",
-    "bbob_f003_i01_d20",
-    "bbob_f022_i74_d05",
-    "bbob_f003_i01_d10",
-    "bbob_f015_i72_d40",
-    "bbob_f015_i74_d10",
-    "bbob_f008_i02_d02",
-    "bbob_f004_i74_d20",
-    "bbob_f007_i77_d40",
-    "bbob_f017_i75_d20",
-    "bbob_f015_i78_d40",
-    "bbob_f021_i74_d03",
-    "bbob_f023_i72_d03",
-    "bbob_f016_i03_d10",
-    "bbob_f002_i71_d05",
-    "bbob_f023_i76_d40",
-    "bbob_f012_i73_d10",
-    "bbob_f012_i80_d40",
-    "bbob_f008_i04_d40",
-    "bbob_f006_i75_d20",
-    "bbob_f001_i79_d05",
-    "bbob_f004_i04_d20",
-    "bbob_f002_i05_d03",
-    "bbob_f019_i75_d02",
-    "bbob_f019_i72_d02",
-    "bbob_f002_i03_d02",
-    "bbob_f024_i01_d20",
-    "bbob_f020_i75_d40",
-    "bbob_f019_i74_d10",
-    "bbob_f001_i72_d05",
-    "bbob_f022_i77_d20",
-    "bbob_f011_i72_d02",
-    "bbob_f003_i03_d10",
-    "bbob_f004_i75_d40",
-    "bbob_f010_i77_d10",
-    "bbob_f014_i80_d40",
-    "bbob_f023_i01_d10",
-    "bbob_f009_i71_d40",
-    "bbob_f011_i79_d10",
-    "bbob_f011_i77_d03",
-    "bbob_f018_i77_d05",
-    "bbob_f004_i72_d05",
-    "bbob_f020_i79_d02",
-    "bbob_f001_i77_d03",
-    "bbob_f015_i73_d20",
-    "bbob_f020_i04_d10",
-    "bbob_f001_i02_d02",
-    "bbob_f020_i80_d03",
-    "bbob_f011_i79_d40",
-    "bbob_f001_i05_d02",
-    "bbob_f016_i72_d10",
-    "bbob_f008_i77_d40",
-    "bbob_f020_i73_d02",
-    "bbob_f024_i74_d03",
-    "bbob_f007_i76_d20",
-    "bbob_f020_i71_d03",
-    "bbob_f008_i79_d05",
-    "bbob_f002_i05_d05",
-    "bbob_f002_i74_d03",
-    "bbob_f013_i76_d03",
-    "bbob_f019_i71_d03",
-    "bbob_f010_i04_d05",
-    "bbob_f011_i79_d03",
-    "bbob_f023_i05_d03",
-    "bbob_f001_i72_d03",
-    "bbob_f010_i76_d03",
-    "bbob_f009_i80_d02",
-    "bbob_f013_i71_d40",
-    "bbob_f009_i77_d05",
-    "bbob_f007_i71_d03",
-    "bbob_f024_i77_d05",
-    "bbob_f007_i05_d40",
-    "bbob_f003_i77_d05",
-    "bbob_f018_i76_d03",
-    "bbob_f010_i04_d20",
-    "bbob_f007_i05_d03",
-    "bbob_f017_i01_d40",
-    "bbob_f015_i73_d05",
-    "bbob_f016_i01_d03",
-    "bbob_f022_i03_d05",
-    "bbob_f014_i02_d03",
-    "bbob_f001_i02_d40",
-    "bbob_f004_i74_d40",
-    "bbob_f003_i74_d10",
-    "bbob_f002_i04_d10",
-    "bbob_f001_i73_d20",
-    "bbob_f017_i73_d10",
-    "bbob_f008_i74_d05",
-    "bbob_f004_i05_d10",
-    "bbob_f012_i77_d10",
-    "bbob_f010_i74_d03",
-    "bbob_f022_i01_d02",
-    "bbob_f009_i76_d02",
-    "bbob_f009_i80_d05",
-    "bbob_f009_i04_d20",
-    "bbob_f007_i05_d10",
-    "bbob_f007_i76_d40",
-    "bbob_f022_i73_d02",
-    "bbob_f020_i03_d05",
-    "bbob_f012_i72_d03",
-    "bbob_f007_i80_d02",
-    "bbob_f021_i02_d40",
-    "bbob_f023_i71_d03",
-    "bbob_f011_i77_d20",
-    "bbob_f011_i02_d02",
-    "bbob_f002_i73_d20",
-    "bbob_f011_i71_d10",
-    "bbob_f009_i02_d05",
-    "bbob_f014_i72_d05",
-    "bbob_f017_i72_d40",
-    "bbob_f021_i03_d20",
-    "bbob_f022_i77_d10",
-    "bbob_f011_i04_d10",
-    "bbob_f010_i79_d20",
-    "bbob_f007_i74_d20",
-    "bbob_f019_i01_d05",
-    "bbob_f003_i05_d03",
-    "bbob_f021_i05_d10",
-    "bbob_f017_i73_d05",
-    "bbob_f005_i04_d03",
-    "bbob_f004_i73_d20",
-    "bbob_f016_i05_d05",
-    "bbob_f005_i72_d03",
-    "bbob_f002_i74_d20",
-    "bbob_f014_i75_d02",
-    "bbob_f004_i76_d10",
-    "bbob_f019_i01_d10",
-    "bbob_f024_i05_d03",
-    "bbob_f014_i76_d03",
-    "bbob_f022_i75_d10",
-    "bbob_f009_i79_d05",
-    "bbob_f011_i71_d03",
-    "bbob_f014_i04_d02",
-    "bbob_f003_i03_d40",
-    "bbob_f008_i75_d03",
-    "bbob_f004_i05_d05",
-    "bbob_f022_i02_d10",
-    "bbob_f014_i04_d10",
-    "bbob_f003_i03_d03",
-    "bbob_f013_i71_d10",
-    "bbob_f004_i78_d02",
-    "bbob_f002_i74_d05",
-    "bbob_f012_i72_d02",
-    "bbob_f012_i76_d10",
-    "bbob_f010_i77_d40",
-    "bbob_f015_i05_d10",
-    "bbob_f007_i03_d20",
-    "bbob_f009_i72_d20",
-    "bbob_f019_i78_d20",
-    "bbob_f010_i72_d02",
-    "bbob_f005_i80_d05",
-    "bbob_f022_i04_d02",
-    "bbob_f008_i04_d03",
-    "bbob_f004_i75_d05",
-    "bbob_f013_i75_d05",
-    "bbob_f009_i78_d03",
-    "bbob_f022_i74_d10",
-    "bbob_f024_i05_d10",
-    "bbob_f009_i78_d40",
-    "bbob_f012_i77_d03",
-    "bbob_f002_i79_d20",
-    "bbob_f001_i01_d20",
-    "bbob_f016_i80_d02",
-    "bbob_f017_i74_d05",
-    "bbob_f024_i02_d20",
-    "bbob_f010_i80_d20",
-    "bbob_f016_i71_d05",
-    "bbob_f019_i05_d10",
-    "bbob_f022_i71_d40",
-    "bbob_f015_i75_d05",
-    "bbob_f002_i78_d40",
-    "bbob_f004_i71_d10",
-    "bbob_f012_i80_d10",
-    "bbob_f017_i75_d03",
-    "bbob_f020_i04_d02",
-    "bbob_f023_i02_d40",
-    "bbob_f018_i02_d02",
-    "bbob_f020_i79_d03",
-    "bbob_f002_i80_d10",
-    "bbob_f020_i73_d20",
-    "bbob_f013_i02_d20",
-    "bbob_f017_i74_d10",
-    "bbob_f018_i04_d03",
-    "bbob_f013_i80_d20",
-    "bbob_f006_i01_d20",
-    "bbob_f020_i75_d05",
-    "bbob_f007_i79_d10",
-    "bbob_f015_i76_d10",
-    "bbob_f017_i77_d10",
-    "bbob_f022_i77_d05",
-    "bbob_f003_i79_d02",
-    "bbob_f015_i04_d03",
-    "bbob_f018_i75_d05",
-    "bbob_f002_i78_d05",
-    "bbob_f024_i76_d20",
-    "bbob_f008_i78_d40",
-    "bbob_f008_i05_d40",
-    "bbob_f008_i71_d03",
-    "bbob_f022_i03_d40",
-    "bbob_f001_i05_d40",
-    "bbob_f005_i79_d10",
-    "bbob_f007_i04_d10",
-    "bbob_f009_i74_d05",
-    "bbob_f003_i77_d40",
-    "bbob_f020_i77_d20",
-    "bbob_f021_i78_d40",
-    "bbob_f023_i72_d02",
-    "bbob_f002_i03_d20",
-    "bbob_f006_i03_d10",
-    "bbob_f003_i72_d20",
-    "bbob_f014_i03_d03",
-    "bbob_f002_i77_d20",
-    "bbob_f012_i05_d03",
-    "bbob_f004_i76_d05",
-    "bbob_f001_i78_d10",
-    "bbob_f012_i04_d02",
-    "bbob_f010_i01_d20",
-    "bbob_f016_i02_d02",
-    "bbob_f018_i75_d02",
-    "bbob_f007_i75_d05",
-    "bbob_f017_i02_d20",
-    "bbob_f005_i05_d03",
-    "bbob_f021_i79_d10",
-    "bbob_f005_i71_d05",
-    "bbob_f007_i71_d20",
-    "bbob_f020_i72_d05",
-    "bbob_f014_i03_d40",
-    "bbob_f005_i03_d20",
-    "bbob_f022_i73_d20",
-    "bbob_f012_i79_d03",
-    "bbob_f020_i75_d03",
-    "bbob_f003_i72_d40",
-    "bbob_f016_i04_d02",
-    "bbob_f002_i72_d20",
-    "bbob_f007_i71_d02",
-    "bbob_f009_i74_d02",
-    "bbob_f011_i76_d40",
-    "bbob_f020_i01_d02",
-    "bbob_f006_i71_d05",
-    "bbob_f014_i76_d10",
-    "bbob_f014_i80_d10",
-    "bbob_f017_i74_d40",
-    "bbob_f012_i04_d05",
-    "bbob_f023_i75_d03",
-    "bbob_f002_i80_d05",
-    "bbob_f009_i05_d10",
-    "bbob_f005_i76_d02",
-    "bbob_f017_i72_d03",
-    "bbob_f014_i04_d03",
-    "bbob_f011_i80_d02",
-    "bbob_f022_i71_d03",
-    "bbob_f009_i75_d20",
-    "bbob_f020_i74_d40",
-    "bbob_f015_i79_d40",
-    "bbob_f014_i75_d20",
-    "bbob_f024_i75_d03",
-    "bbob_f021_i02_d03",
-    "bbob_f010_i75_d10",
-    "bbob_f013_i78_d03",
-    "bbob_f003_i01_d05",
-    "bbob_f009_i78_d10",
-    "bbob_f010_i79_d02",
-    "bbob_f013_i73_d20",
-    "bbob_f011_i78_d05",
-    "bbob_f015_i80_d02",
-    "bbob_f006_i03_d03",
-    "bbob_f012_i71_d02",
-    "bbob_f008_i76_d03",
-    "bbob_f014_i03_d10",
-    "bbob_f014_i79_d10",
-    "bbob_f022_i80_d05",
-    "bbob_f015_i05_d05",
-    "bbob_f013_i02_d40",
-    "bbob_f003_i72_d10",
-    "bbob_f021_i80_d05",
-    "bbob_f023_i74_d10",
-    "bbob_f009_i74_d20",
-    "bbob_f015_i72_d20",
-    "bbob_f018_i73_d20",
-    "bbob_f010_i72_d10",
-    "bbob_f004_i02_d05",
-    "bbob_f009_i03_d40",
-    "bbob_f016_i73_d03",
-    "bbob_f011_i72_d10",
-    "bbob_f012_i80_d05",
-    "bbob_f017_i80_d20",
-    "bbob_f005_i71_d20",
-    "bbob_f007_i80_d03",
-    "bbob_f002_i03_d40",
-    "bbob_f018_i79_d40",
-    "bbob_f013_i80_d02",
-    "bbob_f013_i74_d05",
-    "bbob_f021_i01_d20",
-    "bbob_f006_i73_d20",
-    "bbob_f020_i77_d02",
-    "bbob_f014_i72_d10",
-    "bbob_f022_i01_d05",
-    "bbob_f007_i79_d40",
-    "bbob_f006_i79_d40",
-    "bbob_f015_i77_d02",
-    "bbob_f010_i05_d40",
-    "bbob_f001_i72_d20",
-    "bbob_f002_i73_d10",
-    "bbob_f009_i76_d03",
-    "bbob_f019_i79_d02",
-    "bbob_f002_i76_d20",
-    "bbob_f008_i04_d05",
-    "bbob_f010_i78_d05",
-    "bbob_f013_i71_d20",
-    "bbob_f014_i76_d05",
-    "bbob_f009_i79_d40",
-    "bbob_f001_i79_d03",
-    "bbob_f021_i71_d05",
-    "bbob_f002_i74_d02",
-    "bbob_f024_i73_d05",
-    "bbob_f016_i71_d20",
-    "bbob_f002_i76_d02",
-    "bbob_f023_i05_d20",
-    "bbob_f020_i01_d10",
-    "bbob_f023_i03_d05",
-    "bbob_f016_i74_d10",
-    "bbob_f008_i79_d10",
-    "bbob_f018_i04_d05",
-    "bbob_f002_i72_d05",
-    "bbob_f018_i04_d10",
-    "bbob_f022_i76_d02",
-    "bbob_f011_i77_d02",
-    "bbob_f004_i75_d03",
-    "bbob_f021_i71_d10",
-    "bbob_f007_i72_d10",
-    "bbob_f016_i01_d20",
-    "bbob_f007_i01_d40",
-    "bbob_f018_i78_d10",
-    "bbob_f012_i04_d20",
-    "bbob_f024_i71_d40",
-    "bbob_f003_i78_d02",
-    "bbob_f023_i78_d20",
-    "bbob_f019_i03_d40",
-    "bbob_f009_i01_d40",
-    "bbob_f021_i72_d02",
-    "bbob_f020_i74_d05",
-    "bbob_f014_i01_d40",
-    "bbob_f007_i72_d03",
-    "bbob_f006_i72_d03",
-    "bbob_f018_i71_d02",
-    "bbob_f018_i74_d40",
-    "bbob_f019_i73_d20",
-    "bbob_f007_i03_d03",
-    "bbob_f006_i01_d03",
-    "bbob_f004_i75_d02",
-    "bbob_f009_i75_d05",
-    "bbob_f004_i75_d10",
-    "bbob_f016_i04_d40",
-    "bbob_f024_i02_d40",
-    "bbob_f004_i77_d03",
-    "bbob_f021_i76_d05",
-    "bbob_f004_i79_d02",
-    "bbob_f017_i02_d02",
-    "bbob_f009_i05_d05",
-    "bbob_f007_i73_d03",
-    "bbob_f009_i71_d20",
-    "bbob_f020_i74_d10",
-    "bbob_f010_i71_d20",
-    "bbob_f008_i04_d20",
-    "bbob_f007_i75_d20",
-    "bbob_f017_i79_d03",
-    "bbob_f003_i01_d40",
-    "bbob_f024_i03_d40",
-    "bbob_f023_i76_d20",
-    "bbob_f006_i04_d40",
-    "bbob_f009_i74_d40",
-    "bbob_f015_i72_d02",
-    "bbob_f003_i78_d05",
-    "bbob_f012_i79_d40",
-    "bbob_f007_i79_d05",
-    "bbob_f006_i74_d03",
-    "bbob_f006_i01_d10",
-    "bbob_f016_i74_d02",
-    "bbob_f011_i78_d03",
-    "bbob_f012_i77_d02",
-    "bbob_f015_i74_d20",
-    "bbob_f024_i76_d05",
-    "bbob_f022_i75_d02",
-    "bbob_f001_i01_d05",
-    "bbob_f011_i03_d10",
-    "bbob_f011_i05_d40",
-    "bbob_f006_i76_d05",
-    "bbob_f003_i74_d05",
-    "bbob_f017_i78_d10",
-    "bbob_f019_i03_d20",
-    "bbob_f019_i76_d05",
-    "bbob_f012_i77_d20",
-    "bbob_f019_i79_d03",
-    "bbob_f013_i71_d02",
-    "bbob_f002_i02_d02",
-    "bbob_f003_i71_d40",
-    "bbob_f017_i01_d03",
-    "bbob_f015_i79_d02",
-    "bbob_f010_i05_d20",
-    "bbob_f004_i04_d03",
-    "bbob_f024_i05_d40",
-    "bbob_f017_i03_d20",
-    "bbob_f010_i80_d02",
-    "bbob_f006_i05_d05",
-    "bbob_f007_i78_d40",
-    "bbob_f009_i75_d02",
-    "bbob_f018_i72_d20",
-    "bbob_f010_i76_d20",
-    "bbob_f004_i80_d40",
-    "bbob_f012_i04_d03",
-    "bbob_f011_i01_d05",
-    "bbob_f003_i71_d10",
-    "bbob_f006_i03_d02",
-    "bbob_f023_i73_d40",
-    "bbob_f016_i71_d03",
-    "bbob_f016_i77_d02",
-    "bbob_f011_i71_d02",
-    "bbob_f014_i04_d20",
-    "bbob_f016_i77_d03",
-    "bbob_f012_i76_d03",
-    "bbob_f021_i02_d20",
-    "bbob_f017_i05_d02",
-    "bbob_f004_i03_d40",
-    "bbob_f006_i03_d20",
-    "bbob_f023_i78_d03",
-    "bbob_f012_i03_d03",
-    "bbob_f012_i01_d40",
-    "bbob_f022_i03_d02",
-    "bbob_f022_i02_d05",
-    "bbob_f013_i79_d40",
-    "bbob_f005_i73_d05",
-    "bbob_f014_i05_d20",
-    "bbob_f010_i03_d02",
-    "bbob_f022_i02_d20",
-    "bbob_f013_i04_d20",
-    "bbob_f014_i73_d02",
-    "bbob_f008_i76_d02",
-    "bbob_f018_i73_d10",
-    "bbob_f021_i76_d40",
-    "bbob_f005_i03_d10",
-    "bbob_f016_i73_d10",
-    "bbob_f020_i76_d40",
-    "bbob_f008_i78_d02",
-    "bbob_f011_i73_d40",
-    "bbob_f007_i76_d10",
-    "bbob_f011_i73_d10",
-    "bbob_f019_i78_d10",
-    "bbob_f024_i05_d20",
-    "bbob_f001_i03_d03",
-    "bbob_f010_i04_d10",
-    "bbob_f002_i80_d03",
-    "bbob_f002_i73_d05",
-    "bbob_f007_i02_d05",
-    "bbob_f004_i74_d10",
-    "bbob_f003_i76_d03",
-    "bbob_f003_i79_d03",
-    "bbob_f024_i04_d02",
-    "bbob_f023_i75_d40",
-    "bbob_f008_i01_d20",
-    "bbob_f008_i79_d03",
-    "bbob_f005_i02_d03",
-    "bbob_f002_i79_d05",
-    "bbob_f016_i03_d03",
-    "bbob_f020_i73_d40",
-    "bbob_f004_i78_d20",
-    "bbob_f015_i75_d20",
-    "bbob_f023_i78_d02",
-    "bbob_f018_i77_d40",
-    "bbob_f017_i71_d10",
-    "bbob_f021_i80_d02",
-    "bbob_f003_i77_d20",
-    "bbob_f004_i76_d20",
-    "bbob_f019_i74_d05",
-    "bbob_f023_i71_d40",
-    "bbob_f021_i73_d10",
-    "bbob_f018_i72_d40",
-    "bbob_f003_i04_d10",
-    "bbob_f014_i76_d40",
-    "bbob_f015_i03_d10",
-    "bbob_f007_i75_d03",
-    "bbob_f024_i79_d10",
-    "bbob_f006_i76_d10",
-    "bbob_f016_i02_d40",
-    "bbob_f003_i01_d02",
-    "bbob_f002_i80_d40",
-    "bbob_f024_i78_d02",
-    "bbob_f002_i02_d40",
-    "bbob_f018_i01_d40",
-    "bbob_f008_i72_d40",
-    "bbob_f004_i05_d40",
-    "bbob_f012_i78_d20",
-    "bbob_f004_i77_d05",
-    "bbob_f015_i76_d02",
-    "bbob_f023_i79_d02",
-    "bbob_f008_i73_d10",
-    "bbob_f005_i02_d20",
-    "bbob_f020_i80_d05",
-    "bbob_f024_i01_d03",
-    "bbob_f015_i78_d05",
-    "bbob_f010_i74_d10",
-    "bbob_f010_i78_d10",
-    "bbob_f001_i72_d40",
-    "bbob_f020_i05_d40",
-    "bbob_f004_i77_d40",
-    "bbob_f018_i76_d40",
-    "bbob_f006_i73_d02",
-    "bbob_f009_i01_d10",
-    "bbob_f023_i04_d03",
-    "bbob_f008_i73_d05",
-    "bbob_f022_i80_d02",
-    "bbob_f018_i72_d03",
-    "bbob_f019_i01_d02",
-    "bbob_f020_i78_d05",
-    "bbob_f007_i80_d20",
-    "bbob_f016_i02_d10",
-    "bbob_f019_i79_d20",
-    "bbob_f018_i73_d40",
-    "bbob_f015_i79_d20",
-    "bbob_f017_i77_d03",
-    "bbob_f007_i77_d02",
-    "bbob_f003_i03_d05",
-    "bbob_f001_i01_d02",
-    "bbob_f016_i73_d05",
-    "bbob_f006_i79_d02",
-    "bbob_f015_i01_d03",
-    "bbob_f018_i77_d20",
-    "bbob_f007_i05_d05",
-    "bbob_f014_i05_d02",
-    "bbob_f023_i01_d40",
-    "bbob_f022_i04_d03",
-    "bbob_f018_i80_d20",
-    "bbob_f013_i01_d40",
-    "bbob_f020_i71_d20",
-    "bbob_f011_i05_d20",
-    "bbob_f020_i79_d20",
-    "bbob_f005_i05_d10",
-    "bbob_f006_i05_d40",
-    "bbob_f014_i75_d03",
-    "bbob_f012_i75_d02",
-    "bbob_f003_i77_d03",
-    "bbob_f001_i02_d10",
-    "bbob_f008_i77_d05",
-    "bbob_f020_i74_d02",
-    "bbob_f002_i78_d10",
-    "bbob_f014_i71_d20",
-    "bbob_f020_i77_d05",
-    "bbob_f013_i72_d20",
-    "bbob_f001_i74_d10",
-    "bbob_f001_i76_d03",
-    "bbob_f008_i05_d20",
-    "bbob_f011_i02_d05",
-    "bbob_f023_i74_d40",
-    "bbob_f024_i73_d03",
-    "bbob_f012_i73_d40",
-    "bbob_f014_i75_d05",
-    "bbob_f008_i80_d02",
-    "bbob_f004_i72_d03",
-    "bbob_f003_i76_d02",
-    "bbob_f023_i77_d10",
-    "bbob_f023_i03_d40",
-    "bbob_f013_i05_d03",
-    "bbob_f015_i77_d10",
-    "bbob_f022_i78_d20",
-    "bbob_f005_i74_d20",
-    "bbob_f024_i02_d05",
-    "bbob_f015_i75_d02",
-    "bbob_f009_i74_d03",
-    "bbob_f010_i78_d02",
-    "bbob_f008_i76_d40",
-    "bbob_f012_i01_d05",
-    "bbob_f005_i79_d03",
-    "bbob_f008_i76_d05",
-    "bbob_f008_i77_d02",
-    "bbob_f007_i78_d20",
-    "bbob_f003_i04_d02",
-    "bbob_f015_i72_d10",
-    "bbob_f004_i75_d20",
-    "bbob_f003_i77_d10",
-    "bbob_f016_i80_d05",
-    "bbob_f024_i78_d20",
-    "bbob_f011_i79_d20",
-    "bbob_f013_i72_d02",
-    "bbob_f023_i72_d20",
-    "bbob_f021_i74_d20",
-    "bbob_f003_i74_d40",
-    "bbob_f019_i02_d10",
-    "bbob_f007_i77_d20",
-    "bbob_f009_i01_d02",
-    "bbob_f024_i04_d03",
-    "bbob_f008_i03_d05",
-    "bbob_f020_i02_d20",
-    "bbob_f013_i05_d40",
-    "bbob_f010_i79_d10",
-    "bbob_f022_i01_d20",
-    "bbob_f014_i75_d10",
-    "bbob_f006_i05_d20",
-    "bbob_f005_i78_d03",
-    "bbob_f020_i05_d20",
-    "bbob_f018_i80_d03",
-    "bbob_f019_i72_d40",
-    "bbob_f017_i04_d20",
-    "bbob_f009_i76_d40",
-    "bbob_f006_i80_d05",
-    "bbob_f008_i74_d02",
-    "bbob_f022_i78_d02",
-    "bbob_f015_i77_d03",
-    "bbob_f003_i72_d03",
-    "bbob_f016_i05_d20",
-    "bbob_f005_i77_d05",
-    "bbob_f024_i74_d05",
-    "bbob_f022_i02_d02",
-    "bbob_f011_i73_d03",
-    "bbob_f006_i80_d20",
-    "bbob_f016_i78_d10",
-    "bbob_f005_i77_d02",
-    "bbob_f024_i77_d40",
-    "bbob_f013_i78_d10",
-    "bbob_f005_i01_d40",
-    "bbob_f011_i80_d03",
-    "bbob_f020_i78_d20",
-    "bbob_f008_i76_d10",
-    "bbob_f009_i03_d05",
-    "bbob_f021_i79_d20",
-    "bbob_f016_i74_d40",
-    "bbob_f003_i80_d40",
-    "bbob_f011_i76_d05",
-    "bbob_f021_i01_d05",
-    "bbob_f018_i05_d03",
-    "bbob_f008_i80_d10",
-    "bbob_f004_i01_d20",
-    "bbob_f023_i73_d03",
-    "bbob_f007_i72_d40",
-    "bbob_f022_i72_d10",
-    "bbob_f024_i80_d05",
-    "bbob_f015_i79_d10",
-    "bbob_f017_i05_d10",
-    "bbob_f017_i76_d05",
-    "bbob_f023_i79_d03",
-    "bbob_f003_i75_d05",
-    "bbob_f009_i77_d03",
-    "bbob_f023_i76_d05",
-    "bbob_f014_i72_d02",
-    "bbob_f006_i01_d05",
-    "bbob_f003_i73_d40",
-    "bbob_f001_i79_d10",
-    "bbob_f007_i71_d40",
-    "bbob_f008_i76_d20",
-    "bbob_f021_i78_d05",
-    "bbob_f008_i05_d03",
-    "bbob_f004_i02_d20",
-    "bbob_f017_i78_d40",
-    "bbob_f002_i74_d10",
-    "bbob_f013_i77_d05",
-    "bbob_f016_i80_d03",
-    "bbob_f012_i03_d02",
-    "bbob_f024_i72_d03",
-    "bbob_f007_i79_d02",
-    "bbob_f018_i71_d10",
-    "bbob_f016_i75_d03",
-    "bbob_f009_i77_d20",
-    "bbob_f020_i77_d10",
-    "bbob_f017_i77_d20",
-    "bbob_f002_i01_d20",
-    "bbob_f017_i76_d40",
-    "bbob_f010_i75_d20",
-    "bbob_f018_i03_d20",
-    "bbob_f006_i76_d03",
-    "bbob_f005_i73_d20",
-    "bbob_f012_i04_d40",
-    "bbob_f014_i01_d03",
-    "bbob_f012_i74_d20",
-    "bbob_f012_i78_d05",
-    "bbob_f003_i03_d02",
-    "bbob_f002_i77_d02",
-    "bbob_f002_i01_d03",
-    "bbob_f014_i02_d02",
-    "bbob_f001_i71_d10",
-    "bbob_f010_i73_d05",
-    "bbob_f023_i05_d05",
-    "bbob_f010_i73_d03",
-    "bbob_f011_i78_d10",
-    "bbob_f015_i02_d40",
-    "bbob_f014_i77_d10",
-    "bbob_f011_i80_d10",
-    "bbob_f020_i03_d40",
-    "bbob_f003_i76_d10",
-    "bbob_f011_i02_d10",
-    "bbob_f003_i73_d10",
-    "bbob_f012_i76_d05",
-    "bbob_f008_i73_d02",
-    "bbob_f002_i80_d02",
-    "bbob_f011_i01_d02",
-    "bbob_f014_i72_d40",
-    "bbob_f018_i73_d03",
-    "bbob_f004_i04_d02",
-    "bbob_f022_i79_d03",
-    "bbob_f007_i03_d10",
-    "bbob_f013_i80_d10",
-    "bbob_f010_i01_d02",
-    "bbob_f008_i71_d02",
-    "bbob_f003_i75_d20",
-    "bbob_f024_i78_d05",
-    "bbob_f006_i02_d40",
-    "bbob_f004_i74_d02",
-    "bbob_f011_i79_d05",
-    "bbob_f019_i01_d40",
-    "bbob_f017_i76_d03",
-    "bbob_f003_i73_d20",
-    "bbob_f017_i02_d40",
-    "bbob_f010_i80_d05",
-    "bbob_f014_i79_d40",
-    "bbob_f017_i73_d40",
-    "bbob_f020_i72_d10",
-    "bbob_f008_i72_d20",
-    "bbob_f009_i73_d20",
-    "bbob_f005_i72_d10",
-    "bbob_f018_i03_d02",
-    "bbob_f001_i73_d05",
-    "bbob_f011_i80_d40",
-    "bbob_f001_i76_d02",
-    "bbob_f006_i03_d40",
-    "bbob_f007_i75_d02",
-    "bbob_f005_i76_d10",
-    "bbob_f002_i72_d10",
-    "bbob_f001_i75_d10",
-    "bbob_f009_i78_d02",
-    "bbob_f013_i71_d03",
-    "bbob_f023_i73_d20",
-    "bbob_f012_i71_d40",
-    "bbob_f004_i79_d10",
-    "bbob_f022_i02_d03",
-    "bbob_f004_i71_d03",
-    "bbob_f010_i02_d03",
-    "bbob_f018_i72_d10",
-    "bbob_f018_i02_d40",
-    "bbob_f016_i72_d02",
-    "bbob_f010_i76_d10",
-    "bbob_f001_i01_d03",
-    "bbob_f023_i02_d10",
-    "bbob_f012_i74_d02",
-    "bbob_f019_i74_d02",
-    "bbob_f005_i74_d40",
-    "bbob_f007_i02_d03",
-    "bbob_f007_i76_d05",
-    "bbob_f001_i79_d40",
-    "bbob_f018_i72_d05",
-    "bbob_f022_i79_d02",
-    "bbob_f014_i76_d02",
-    "bbob_f017_i74_d02",
-    "bbob_f010_i79_d05",
-    "bbob_f022_i80_d10",
-    "bbob_f005_i77_d03",
-    "bbob_f006_i77_d10",
-    "bbob_f023_i01_d03",
-    "bbob_f009_i79_d02",
-    "bbob_f014_i77_d20",
-    "bbob_f006_i75_d10",
-    "bbob_f003_i75_d10",
-    "bbob_f021_i05_d20",
-    "bbob_f018_i78_d02",
-    "bbob_f019_i72_d10",
-    "bbob_f009_i72_d02",
-    "bbob_f015_i01_d02",
-    "bbob_f015_i75_d40",
-    "bbob_f024_i02_d03",
-    "bbob_f020_i02_d03",
-    "bbob_f002_i03_d10",
-    "bbob_f021_i74_d02",
-    "bbob_f015_i80_d05",
-    "bbob_f014_i77_d02",
-    "bbob_f020_i04_d20",
-    "bbob_f024_i03_d10",
-    "bbob_f002_i04_d02",
-    "bbob_f018_i76_d10",
-    "bbob_f005_i78_d40",
-    "bbob_f011_i73_d05",
-    "bbob_f023_i79_d05",
-    "bbob_f002_i05_d40",
-    "bbob_f023_i77_d05",
-    "bbob_f004_i03_d03",
-    "bbob_f004_i71_d02",
-    "bbob_f001_i71_d03",
-    "bbob_f021_i77_d10",
-    "bbob_f016_i80_d20",
-    "bbob_f010_i73_d02",
-    "bbob_f011_i01_d10",
-    "bbob_f009_i04_d02",
-    "bbob_f024_i78_d40",
-    "bbob_f011_i77_d05",
-    "bbob_f011_i01_d40",
-    "bbob_f009_i02_d20",
-    "bbob_f007_i75_d40",
-    "bbob_f006_i04_d02",
-    "bbob_f019_i05_d20",
-    "bbob_f004_i72_d40",
-    "bbob_f021_i05_d40",
-    "bbob_f024_i01_d40",
-    "bbob_f005_i75_d05",
-    "bbob_f003_i76_d05",
-    "bbob_f002_i76_d05",
-    "bbob_f008_i01_d02",
-    "bbob_f006_i71_d10",
-    "bbob_f018_i77_d02",
-    "bbob_f016_i01_d02",
-    "bbob_f010_i74_d40",
-    "bbob_f004_i73_d03",
-    "bbob_f014_i73_d05",
-    "bbob_f006_i79_d20",
-    "bbob_f006_i05_d02",
-    "bbob_f006_i01_d02",
-    "bbob_f016_i79_d05",
-    "bbob_f012_i72_d10",
-    "bbob_f015_i76_d20",
-    "bbob_f019_i04_d05",
-    "bbob_f001_i04_d03",
-    "bbob_f005_i77_d40",
-    "bbob_f022_i75_d40",
-    "bbob_f002_i01_d02",
-    "bbob_f019_i78_d05",
-    "bbob_f024_i80_d40",
-    "bbob_f008_i74_d03",
-    "bbob_f020_i01_d05",
-    "bbob_f013_i04_d05",
-    "bbob_f007_i02_d20",
-    "bbob_f010_i71_d03",
-    "bbob_f006_i78_d03",
-    "bbob_f021_i05_d05",
-    "bbob_f020_i72_d02",
-    "bbob_f003_i02_d40",
-    "bbob_f014_i79_d05",
-    "bbob_f017_i71_d03",
-    "bbob_f012_i76_d02",
-    "bbob_f011_i04_d05",
-    "bbob_f005_i79_d40",
-    "bbob_f023_i01_d02",
-    "bbob_f018_i71_d40",
-    "bbob_f018_i05_d05",
-    "bbob_f023_i02_d02",
-    "bbob_f021_i74_d05",
-    "bbob_f011_i74_d20",
-    "bbob_f006_i72_d02",
-    "bbob_f021_i03_d02",
-    "bbob_f014_i02_d40",
-    "bbob_f024_i77_d10",
-    "bbob_f023_i76_d02",
-    "bbob_f019_i78_d02",
-    "bbob_f016_i76_d40",
-    "bbob_f020_i77_d03",
-    "bbob_f008_i05_d02",
-    "bbob_f022_i80_d03",
-    "bbob_f004_i72_d10",
-    "bbob_f023_i03_d20",
-    "bbob_f005_i72_d02",
-    "bbob_f004_i03_d20",
-    "bbob_f004_i03_d02",
-    "bbob_f004_i78_d10",
-    "bbob_f020_i02_d10",
-    "bbob_f005_i73_d03",
-    "bbob_f007_i74_d02",
-    "bbob_f019_i04_d02",
-    "bbob_f002_i03_d03",
-    "bbob_f017_i03_d03",
-    "bbob_f016_i77_d05",
-    "bbob_f011_i76_d10",
-    "bbob_f008_i71_d05",
-    "bbob_f004_i74_d05",
-    "bbob_f010_i71_d40",
-    "bbob_f020_i05_d03",
-    "bbob_f001_i73_d10",
-    "bbob_f022_i80_d40",
-    "bbob_f020_i01_d03",
-    "bbob_f014_i71_d02",
-    "bbob_f006_i04_d10",
-    "bbob_f015_i05_d40",
-    "bbob_f022_i77_d02",
-    "bbob_f020_i03_d10",
-    "bbob_f010_i78_d40",
-    "bbob_f013_i72_d10",
-    "bbob_f015_i04_d05",
-    "bbob_f002_i71_d10",
-    "bbob_f007_i01_d20",
-    "bbob_f021_i01_d40",
-    "bbob_f023_i71_d10",
-    "bbob_f007_i72_d02",
-    "bbob_f016_i78_d20",
-    "bbob_f013_i79_d02",
-    "bbob_f024_i02_d02",
-    "bbob_f003_i02_d02",
-    "bbob_f022_i73_d03",
-    "bbob_f023_i73_d05",
-    "bbob_f010_i01_d10",
-    "bbob_f021_i78_d03",
-    "bbob_f002_i79_d02",
-    "bbob_f006_i79_d10",
-    "bbob_f014_i74_d20",
-    "bbob_f022_i73_d05",
-    "bbob_f022_i72_d02",
-    "bbob_f020_i01_d20",
-    "bbob_f010_i72_d40",
-    "bbob_f023_i71_d05",
-    "bbob_f001_i75_d20",
-    "bbob_f014_i77_d03",
-    "bbob_f013_i77_d40",
-    "bbob_f016_i71_d10",
-    "bbob_f018_i75_d40",
-    "bbob_f010_i74_d05",
-    "bbob_f004_i80_d03",
-    "bbob_f002_i73_d40",
-    "bbob_f007_i77_d03",
-    "bbob_f015_i72_d03",
-    "bbob_f013_i73_d03",
-    "bbob_f019_i72_d03",
-    "bbob_f001_i75_d05",
-    "bbob_f001_i78_d05",
-    "bbob_f024_i75_d05",
-    "bbob_f016_i78_d05",
-    "bbob_f023_i74_d03",
-    "bbob_f024_i80_d02",
-    "bbob_f011_i74_d40",
-    "bbob_f005_i76_d40",
-    "bbob_f012_i80_d03",
-    "bbob_f021_i02_d10",
-    "bbob_f013_i72_d03",
-    "bbob_f014_i76_d20",
-    "bbob_f015_i74_d03",
-    "bbob_f001_i05_d03",
-    "bbob_f010_i05_d02",
-    "bbob_f018_i02_d20",
-    "bbob_f009_i76_d10",
-    "bbob_f014_i78_d20",
-    "bbob_f005_i78_d05",
-    "bbob_f003_i78_d20",
-    "bbob_f019_i71_d10",
-    "bbob_f021_i76_d20",
-    "bbob_f005_i01_d05",
-    "bbob_f006_i72_d40",
-    "bbob_f022_i71_d10",
-    "bbob_f016_i76_d20",
-    "bbob_f002_i02_d05",
-    "bbob_f010_i02_d10",
-    "bbob_f006_i77_d05",
-    "bbob_f018_i79_d05",
-    "bbob_f015_i04_d10",
-    "bbob_f017_i01_d20",
-    "bbob_f014_i05_d10",
-    "bbob_f021_i77_d05",
-    "bbob_f002_i76_d03",
-    "bbob_f015_i01_d40",
-    "bbob_f001_i71_d20",
-    "bbob_f003_i71_d03",
-    "bbob_f018_i76_d02",
-    "bbob_f009_i71_d05",
-    "bbob_f001_i79_d02",
-    "bbob_f017_i79_d05",
-    "bbob_f011_i05_d02",
-    "bbob_f007_i74_d03",
-    "bbob_f002_i01_d05",
-    "bbob_f010_i03_d03",
-    "bbob_f010_i80_d10",
-    "bbob_f013_i77_d10",
-    "bbob_f002_i76_d10",
-    "bbob_f012_i02_d03",
-    "bbob_f006_i76_d02",
-    "bbob_f017_i78_d03",
-    "bbob_f014_i03_d02",
-    "bbob_f005_i75_d20",
-    "bbob_f023_i71_d20",
-    "bbob_f011_i76_d03",
-    "bbob_f005_i03_d02",
-    "bbob_f007_i74_d05",
-    "bbob_f005_i75_d10",
-    "bbob_f009_i79_d10",
-    "bbob_f011_i74_d03",
-    "bbob_f019_i77_d20",
-    "bbob_f012_i03_d05",
-    "bbob_f011_i72_d20",
-    "bbob_f008_i02_d40",
-    "bbob_f010_i72_d05",
-    "bbob_f016_i77_d20",
-    "bbob_f023_i74_d05",
-    "bbob_f022_i03_d10",
-    "bbob_f018_i74_d02",
-    "bbob_f001_i78_d40",
-    "bbob_f010_i74_d20",
-    "bbob_f012_i74_d05",
-    "bbob_f005_i79_d05",
-    "bbob_f016_i76_d05",
-    "bbob_f014_i74_d05",
-    "bbob_f005_i74_d05",
-    "bbob_f020_i75_d20",
-    "bbob_f018_i03_d10",
-    "bbob_f011_i03_d05",
-    "bbob_f002_i71_d20",
-    "bbob_f009_i04_d10",
-    "bbob_f017_i04_d03",
-    "bbob_f010_i05_d03",
-    "bbob_f024_i75_d10",
-    "bbob_f005_i78_d20",
-    "bbob_f020_i75_d02",
-    "bbob_f024_i75_d40",
-    "bbob_f011_i78_d02",
-    "bbob_f008_i78_d10",
-    "bbob_f001_i80_d40",
-    "bbob_f013_i74_d03",
-    "bbob_f009_i76_d05",
-    "bbob_f023_i79_d10",
-    "bbob_f001_i02_d05",
-    "bbob_f003_i01_d03",
-    "bbob_f001_i73_d40",
-    "bbob_f010_i04_d02",
-    "bbob_f015_i04_d20",
-    "bbob_f001_i74_d40",
-    "bbob_f017_i05_d40",
-    "bbob_f010_i76_d40",
-    "bbob_f018_i04_d02",
-    "bbob_f011_i75_d05",
-    "bbob_f018_i74_d20",
-    "bbob_f024_i76_d10",
-    "bbob_f024_i73_d10",
-    "bbob_f015_i77_d20",
-    "bbob_f009_i72_d05",
-    "bbob_f024_i73_d40",
-    "bbob_f019_i73_d02",
-    "bbob_f016_i03_d05",
-    "bbob_f017_i01_d05",
-    "bbob_f014_i02_d20",
-    "bbob_f008_i72_d10",
-    "bbob_f004_i02_d10",
-    "bbob_f018_i75_d20",
-    "bbob_f001_i02_d20",
-    "bbob_f017_i04_d40",
-    "bbob_f022_i79_d20",
-    "bbob_f007_i04_d20",
-    "bbob_f004_i80_d20",
-    "bbob_f016_i75_d05",
-    "bbob_f012_i78_d10",
-    "bbob_f006_i78_d02",
-    "bbob_f022_i74_d02",
-    "bbob_f005_i77_d10",
-    "bbob_f017_i80_d02",
-    "bbob_f017_i01_d10",
-    "bbob_f018_i76_d20",
-    "bbob_f011_i72_d03",
-    "bbob_f023_i01_d05",
-    "bbob_f018_i02_d05",
-    "bbob_f005_i79_d20",
-    "bbob_f001_i76_d10",
-    "bbob_f015_i78_d20"
-  ]
-}
\ No newline at end of file
diff --git a/README.md b/README.md
index e0833dd..5469d65 100644
--- a/README.md
+++ b/README.md
@@ -55,32 +55,48 @@ uv run das <name> [options]
 
 ```
 
-### **Arguments**
+### **💡 Arguments**
 
 | Argument                           | Type        | Default                     | Description                                                                                                      |
 |------------------------------------|-------------|-----------------------------|------------------------------------------------------------------------------------------------------------------|
 | `name`                             | `str`       | —                           | **Required.** Name tag for the run or experiment.                                                                |
 | `-p`, `--portfolio`                | `list[str]` | `['SPSO', 'IPSO', 'SPSOL']` | Portfolio of sub-optimizers to include.                                                                          |
-| `-m`, `--population_size`          | `int`       | `20`                        | Population size for all fixed-population optimizers.                                                             |
+| `-m`, `--population_size`          | `int`       | `None`                      | Population size for all fixed-population optimizers. None means no fixed population size.                        |
 | `-f`, `--fe_multiplier`            | `int`       | `10_000`                    | Function evaluation multiplier.                                                                                  |
 | `-s`, `--n_checkpoints`            | `int`       | `10`                        | Number of checkpoints for sub-optimizer selection.                                                               |
 | `-t`, `--test` / `--no-test`       | `bool`      | `True`                      | Whether to execute in test mode.                                                                                 |
 | `-c`, `--compare` / `--no-compare` | `bool`      | `False`                     | Whether to compare results against standalone optimizers.                                                        |
 | `-e`, `--wandb_entity`             | `str`       | `None`                      | Weights and Biases (WandB) entity name.                                                                          |
 | `-w`, `--wandb_project`            | `str`       | `None`                      | Weights and Biases (WandB) project name.                                                                         |
-| `-a`, `--agent`                    | `str`       | `policy-gradient`           | Agent type. Options: `neuroevolution`, `policy-gradient`, `random`.                                              |
+| `-a`, `--agent`                    | `str`       | `policy-gradient`           | Agent type. Options: `neuroevolution`, `policy-gradient`, `random`, `RL-DAS`, `RL-DAS-random`.                   |
 | `-l`, `--mode`                     | `str`       | `LOIO`                      | Train/Test split mode (see [Split Strategies](https://www.google.com/search?q=%23-train-test-split-strategies)). |
 | `-x`, `--cdb`                      | `float`     | `1.0`                       | **Checkpoint Division Exponent**; determines how quickly checkpoint length increases.                            |
 | `-r`, `--state-representation`     | `str`       | `ELA`                       | Method used to extract features from the algorithm population.                                                   |
+| `-d`, `--force-restarts`           | `bool`      | `False`                     | Enable selection of forcibly restarting optimizers.                                                              |
+| `-D`, `--dimensionality`           | `int`       | `None`                      | Dimensionality of problems.                                                                                      |
+| `-E`, `--n_epochs`                 | `int`       | `1`                         | Number of training epochs.                                                                                       |
+| `-O`, `--reward-option`            | `int`       | `1`                         | ID of method used to compute reward.                                                                             |
 
 ---
 
+### **🤖 Agent**
+
+There are following agent options available in this project.
+
+| Agent                              | Uses CDB? | Description                                                                                                                                                                       | Implementation                                                    |
+|------------------------------------|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------|
+| `neuroevolution`                   | Yes       | Neuroevolution-based agent. Its training is implemented using NEAT algorithm.                                                                                                     | [here](dynamicalgorithmselection/agents/neuroevolution_agent.py)  |
+| `policy-gradient`                  | Yes       | PPO-based agent. Main subject of experiments.                                                                                                                                     | [here](dynamicalgorithmselection/agents/policy_gradient_agent.py) |
+| `random`                           | Yes       | Baseline for agents, that use Checkpoint division. Randomly selects actions using equal probabilities.                                                                            | [here](dynamicalgorithmselection/agents/random_agent.py)          |
+| `RL-DAS`                           | No        | Implementation of [Deep Reinforcement Learning for Dynamic Algorithm Selection: A Proof-of-Principle Study on Differential Evolution](https://doi.org/10.48550/arXiv.2403.02131). | [here](dynamicalgorithmselection/agents/RLDAS_agent.py)           |
+| `RL-DAS-random`                    | No        | Implementation of the baseline proposed by the authors of `RL-DAS` algorithm. Randomly selects action using equal probabilities.                                                  | [here](dynamicalgorithmselection/agents/RLDAS_random_agent.py)    |
+---
+
 ## 📊 Train-Test Split Strategies
 
 The `-l` / `--mode` argument determines how the dataset is divided:
 
-* **`LOIO` (Leave One Instance Out):** Uses `LOLO_train_set.json`, a randomly generated subset containing mixed problem
-  types.
+* **`LOIO` (Leave One Instance Out):** Uses  a randomly generated subset containing mixed problem types.
 * **`hard` (Leave One Problem Out):** Splits the dataset by grouping identical problem instances. Contains **twice as
   many** training functions as test functions.
 * **`easy` (Leave One Problem Out):** Similar to `hard`, but with **inverted** train-test proportions (more test
@@ -117,6 +133,22 @@ Below is a comparison of how the checkpoint lengths correspond to different `cdb
     * **Early Stages (Short):** Allows the agent to make rapid decisions and switch algorithms frequently during the initial exploration phase.
     * **Later Stages (Long):** Provides longer uninterrupted periods for algorithms to converge (exploitation) without being disrupted by frequent agent switching.
 
+## 🏆 Reward Options
+
+The `-O` or `--reward-option` argument determines how the agent calculates the reward after each checkpoint. All options compute an `improvement` metric based on the change in the best objective value (`y`), scaled against the initial value range (`initial_value_range[1] - initial_value_range[0]`).
+
+Here are the available reward strategies:
+
+* **Option 1 (`1`): Logarithmic Scaled Improvement**
+Calculates the improvement between the current checkpoint and the previous one (`old_best_y - new_best_y`), scales it, clips the value between 0.0 and 1.0, and applies a logarithmic transformation (`np.log(reward + 1e-5)`). Useful for smoothing out large variance in improvements.
+* **Option 2 (`2`): Linear Clipped Improvement**
+Calculates the scaled improvement between the current checkpoint and the previous one (`old_best_y - new_best_y`), and simply clips the result between 0.0 and 1.0 without any logarithmic scaling (`np.clip(reward, 0.0, 1.0)`).
+* **Option 3 (`3`): Sparse Total Improvement (Final Checkpoint Only)**
+Provides a sparse reward. It returns `0.0` for all intermediate checkpoints. At the final checkpoint, it calculates the *total* improvement from the very start of the optimization run (`initial_value_range[0] - new_best_y`), scales it, and applies a logarithmic transformation.
+* **Option 4 (`4`): Binary Threshold Reward**
+Calculates the scaled improvement between checkpoints and provides a binary outcome: it returns `1.0` if the scaled improvement is greater than or equal to a minimum threshold (`1e-3`), and `0.0` otherwise.
+
+
 ## 🧠 State Representation
 
 There are three options for representing the optimization state (`-r` flag):
@@ -130,7 +162,7 @@ There are three options for representing the optimization state (`-r` flag):
    this [paper](https://arxiv.org/pdf/2408.10672).
 3. **`custom`:**
    A proposed feature extraction method
-   implemented [here](https://www.google.com/search?q=dynamicalgorithmselection/agents/agent_state.py%2349). This can be
+   implemented [here](dynamicalgorithmselection/agents/agent_state.py). This can be
    modified to include additional features.
 
 ---
diff --git a/dynamicalgorithmselection/NeurELA/NeurELA.py b/dynamicalgorithmselection/NeurELA/NeurELA.py
index f141c54..ebc8f37 100644
--- a/dynamicalgorithmselection/NeurELA/NeurELA.py
+++ b/dynamicalgorithmselection/NeurELA/NeurELA.py
@@ -27,8 +27,6 @@ def load_data(path):
     return data
 
 
-seed = 0
-
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 load_path = os.path.join(BASE_DIR, "NeurELA.pkl")
 
diff --git a/dynamicalgorithmselection/agents/RLDAS_agent.py b/dynamicalgorithmselection/agents/RLDAS_agent.py
new file mode 100644
index 0000000..f02bc47
--- /dev/null
+++ b/dynamicalgorithmselection/agents/RLDAS_agent.py
@@ -0,0 +1,410 @@
+import numpy as np
+import torch
+import copy
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from dynamicalgorithmselection.agents.agent import Agent
+from dynamicalgorithmselection.agents.agent_state import get_la_features
+from dynamicalgorithmselection.agents.ppo_utils import (
+    DEVICE,
+    RolloutBuffer,
+    RLDASNetwork,
+)
+from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
+
+INITIAL_POPSIZE = 170
+GAMMA = 0.99
+
+
+class RLDASAgent(Agent):
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+
+        self.alg_names = [alg.__name__ for alg in self.actions]
+        self.n_algorithms = len(self.actions)
+
+        self.network = RLDASNetwork(
+            num_algorithms=self.n_algorithms, d_dim=self.ndim_problem
+        ).to(DEVICE)
+
+        self.optimizer = torch.optim.Adam(self.network.parameters(), lr=3e-5)
+
+        self._load_parameters(options)
+        self.ah_vectors = np.zeros((self.n_algorithms, 2, self.ndim_problem))
+        self.alg_usage_counts = np.zeros(self.n_algorithms)
+        self.context_memory: Dict[str, Dict[str, Any]] = {
+            name: {} for name in self.alg_names
+        }
+        self.context_memory["Common"] = {}
+        self.mean_rewards = options.get("mean_rewards", [])
+        self.best_50_mean = float("inf")
+        self.schedule_interval = options.get(
+            "schedule_interval", int(self.max_function_evaluations / 10)
+        )
+
+        expected_trajectory_length = int(
+            np.ceil(self.max_function_evaluations / self.schedule_interval)
+        )
+        buffer_capacity = expected_trajectory_length * 10  # Safety margin
+        self.buffer = RolloutBuffer(capacity=buffer_capacity, device=DEVICE)
+
+    def _load_parameters(self, options):
+        if p := options.get("network_parameters", None):
+            self.network.load_state_dict(p)
+        if p := options.get("optimizer", None):
+            self.optimizer.load_state_dict(p)
+
+    def get_state(
+        self,
+        x: Optional[np.ndarray] = None,
+        y: Optional[np.ndarray] = None,
+        x_history: Optional[np.ndarray] = None,
+        y_history: Optional[np.ndarray] = None,
+        update: bool = True,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        la = get_la_features(self, x, y)
+        ah = self.ah_vectors.copy()
+
+        return la, ah
+
+    def _update_ah_history(
+        self, alg_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+    ):
+        """
+        Updates Shift Vectors (SV) for the selected algorithm.
+        Eq (8), (9).
+        """
+        sv_best_current = x_best_new - x_best_old
+        sv_worst_current = x_worst_new - x_worst_old
+
+        H = self.alg_usage_counts[alg_idx]
+
+        self.ah_vectors[alg_idx, 0] = (
+            self.ah_vectors[alg_idx, 0] * H + sv_best_current
+        ) / (H + 1)
+        self.ah_vectors[alg_idx, 1] = (
+            self.ah_vectors[alg_idx, 1] * H + sv_worst_current
+        ) / (H + 1)
+
+        self.alg_usage_counts[alg_idx] += 1
+
+    def _save_context(self, optimizer, alg_name):
+        common_attrs = ["MF", "MCr", "archive"]
+        for attr in common_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory["Common"][attr] = getattr(optimizer, attr)
+
+        specific_attrs = []
+        if "JDE21" in alg_name:
+            specific_attrs = [
+                "tau1",
+                "tau2",
+                "ageLmt",
+                "eps",
+                "myEqs",
+            ]
+        elif "MadDE" in alg_name:
+            specific_attrs = ["pm", "pbest", "PqBX"]
+        elif "NL_SHADE" in alg_name:
+            specific_attrs = ["NA", "pa"]
+
+        for attr in specific_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory[alg_name][attr] = getattr(optimizer, attr)
+
+    def _restore_context(self, optimizer, alg_name):
+        """
+        Restores parameters to the optimizer from self.context_memory.
+        """
+        for attr, val in self.context_memory["Common"].items():
+            if hasattr(optimizer, attr):
+                setattr(optimizer, attr, copy.deepcopy(val))
+
+        if alg_name in self.context_memory:
+            for attr, val in self.context_memory[alg_name].items():
+                if hasattr(optimizer, attr):
+                    setattr(optimizer, attr, copy.deepcopy(val))
+
+    def _select_action(self, state):
+        """
+        Selects action using the shared network with split inputs.
+        """
+        la_state, ah_state = state
+        la_tensor = torch.FloatTensor(la_state).unsqueeze(0).to(DEVICE)
+        ah_tensor = torch.FloatTensor(ah_state).unsqueeze(0).to(DEVICE)
+
+        with torch.no_grad():
+            probs, value = self.network(la_tensor, ah_tensor)
+            dist = torch.distributions.Categorical(probs)
+            action = dist.sample()
+            log_prob = dist.log_prob(action)
+            probs = probs.detach().cpu().numpy()[0]
+
+            if self.run is not None:
+                entropy = -np.sum(probs * np.log(probs + 1e-12)) / np.log(len(probs))
+                self.run.log({"normalized entropy": entropy})
+
+        return action.item(), log_prob, value
+
+    def initialize(self):
+        x = self.rng_initialization.uniform(
+            self.initial_lower_boundary,
+            self.initial_upper_boundary,
+            size=(INITIAL_POPSIZE, self.ndim_problem),
+        )
+        y = np.zeros((INITIAL_POPSIZE,))
+        for i in range(INITIAL_POPSIZE):
+            y[i] = self._evaluate_fitness(x[i])
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        """
+        Main Optimization Loop implementing RL-DAS workflow (Algorithm 1).
+        Does NOT use checkpoints. Uses interval-based scheduling.
+        """
+        fitness = Optimizer.optimize(self, fitness_function)
+        population_x, population_y = self.initialize()
+        self.n_function_evaluations = INITIAL_POPSIZE
+
+        best_idx = np.argmin(population_y)
+        best_y_global = population_y[best_idx]
+        best_x_global = population_x[best_idx].copy()
+
+        self.best_so_far_y = best_y_global
+        self.best_so_far_x = best_x_global
+
+        self.history.append(self.best_so_far_y)
+        fitness.append(float(self.best_so_far_y))
+
+        self.initial_cost = best_y_global if abs(best_y_global) > 1e-8 else 1.0
+
+        self.ah_vectors.fill(0.0)
+        self.alg_usage_counts.fill(0.0)
+        self.context_memory = {name: {} for name in self.alg_names}
+        self.context_memory["Common"] = {}
+
+        trajectory = []
+
+        while self.n_function_evaluations < self.max_function_evaluations:
+            state = self.get_state(population_x, population_y)
+
+            action_idx, log_prob, value = self._select_action(state)
+            self.choices_history.append(action_idx)
+
+            selected_alg_class = self.actions[action_idx]
+            alg_name = self.alg_names[action_idx]
+
+            sub_opt = selected_alg_class(self.problem, self.options)
+            sub_opt.n_function_evaluations = self.n_function_evaluations
+            sub_opt.max_function_evaluations = self.max_function_evaluations
+
+            self._restore_context(sub_opt, alg_name)
+
+            x_best_old = population_x[np.argmin(population_y)].copy()
+            x_worst_old = population_x[np.argmax(population_y)].copy()
+            cost_old = np.copy(np.min(population_y))
+
+            target_fes = min(
+                self.n_function_evaluations + self.schedule_interval,
+                self.max_function_evaluations,
+            )
+            sub_opt.target_FE = target_fes
+            sub_opt.set_data(
+                x=population_x,
+                y=population_y,
+                best_x=self.best_so_far_x,
+                best_y=self.best_so_far_y,
+            )
+
+            result = sub_opt.optimize()
+
+            self.fitness_history.extend(result["fitness_history"])
+
+            self._save_fitness(
+                result["best_so_far_x"],
+                result["best_so_far_y"],
+                result["worst_so_far_x"],
+                result["worst_so_far_y"],
+            )
+
+            population_x = result["x"]
+            population_y = result["y"]
+
+            self.n_function_evaluations = sub_opt.n_function_evaluations
+
+            self._save_context(sub_opt, alg_name)
+
+            x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy()
+            x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy()
+            cost_new: float = np.min(population_y)
+
+            self._update_ah_history(
+                action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+            )
+
+            adc = (cost_old - cost_new) / self.initial_cost
+            if self.run:
+                self.run.log({"adc": adc})
+
+            done = self.n_function_evaluations >= self.max_function_evaluations
+
+            trajectory.append(
+                {
+                    "state": state,
+                    "action": action_idx,
+                    "adc": adc,
+                    "log_prob": log_prob,
+                    "value": value,
+                    "done": done,
+                }
+            )
+
+            best_y_global = min(best_y_global, cost_new)
+
+            # Update Agent Best State and History
+            if cost_new < self.best_so_far_y:
+                self.best_so_far_y = cost_new
+                self.best_so_far_x = x_best_new
+
+            self.history.append(self.best_so_far_y)
+            fitness.append(float(self.best_so_far_y))
+
+            self._n_generations += 1
+            self._print_verbose_info(fitness, self.best_so_far_y)
+        print(self._n_generations)
+        fes_end = self.n_function_evaluations
+        speed_factor = self.max_function_evaluations / fes_end
+
+        for step in trajectory:
+            final_reward = step["adc"] * speed_factor
+            self.rewards.append(final_reward)
+            la_state, ah_state = step["state"]
+
+            la_tensor = torch.FloatTensor(la_state).to(DEVICE)
+            ah_tensor = torch.FloatTensor(ah_state).to(DEVICE)
+
+            self.buffer.add(
+                (la_tensor, ah_tensor),
+                step["action"],
+                final_reward,
+                step["done"],
+                step["log_prob"],
+                step["value"],
+            )
+
+        if self.train_mode:
+            T = len(trajectory)
+            K = max(1, int(0.3 * T))
+
+            self.ppo_update(
+                self.buffer,
+                epochs=K,
+                minibatch_size=32,
+                clip_eps=0.2,
+                value_coef=0.5,
+                entropy_coef=0.01,
+            )
+
+            self.buffer.clear()
+
+        return self._collect(fitness, self.best_so_far_y)
+
+    def _collect(self, fitness, y=None):
+        results, _ = super()._collect(fitness, y)
+        self.mean_rewards.append(sum(self.rewards) / len(self.rewards))
+        agent_state = {
+            "network_parameters": self.network.state_dict(),
+            "optimizer": self.optimizer.state_dict(),
+            "buffer": self.buffer,
+            "mean_rewards": self.mean_rewards,
+            "reward_normalizer": self.reward_normalizer,
+            "state_normalizer": self.state_normalizer,
+        }
+
+        last_50_mean = sum(self.mean_rewards[-50:]) / len(self.mean_rewards[-50:])
+        if self.best_50_mean > last_50_mean:
+            self.best_50_mean = last_50_mean
+            torch.save(agent_state, os.path.join("models", f"{self.name}_best.pth"))
+
+        if self.n_function_evaluations == self.max_function_evaluations:
+            torch.save(agent_state, os.path.join("models", f"{self.name}_final.pth"))
+
+        return results, agent_state
+
+    def ppo_update(
+        self,
+        buffer,
+        epochs=4,
+        minibatch_size=None,
+        clip_eps=0.2,
+        value_coef=0.5,
+        entropy_coef=0.01,
+    ):
+        la_list, ah_list = zip(*buffer.states)
+
+        la_states = torch.stack(la_list).to(DEVICE)
+        ah_states = torch.stack(ah_list).to(DEVICE)
+
+        actions = torch.tensor(buffer.actions).to(DEVICE)
+        rewards = buffer.rewards
+        dones = buffer.dones
+
+        old_logprobs = torch.stack(buffer.log_probs).detach().to(DEVICE).view(-1)
+
+        old_values = torch.stack(buffer.values).detach().to(DEVICE).view(-1)
+
+        for _ in range(epochs):
+            policy_probs, current_values = self.network(la_states, ah_states)
+
+            dist = torch.distributions.Categorical(policy_probs)
+            logprobs = dist.log_prob(actions)
+
+            current_values = current_values.squeeze()
+
+            with torch.no_grad():
+                if dones[-1]:
+                    return_value = 0.0
+                else:
+                    return_value = current_values[-1].item() if not dones[-1] else 0.0
+
+            returns_list: List[float] = []
+            for r in reversed(rewards):
+                return_value = return_value * GAMMA + r
+                returns_list.insert(0, return_value)
+
+            returns_tensor: torch.Tensor = torch.tensor(returns_list).to(DEVICE).float()
+            advantages = returns_tensor - current_values.detach()
+
+            ratios = torch.exp(logprobs - old_logprobs)
+
+            # Actor Loss (Reinforce Loss with Clipping)
+            surr1 = ratios * advantages
+            surr2 = torch.clamp(ratios, 1 - clip_eps, 1 + clip_eps) * advantages
+            actor_loss = -torch.min(surr1, surr2).mean()
+
+            vpredclipped = old_values + torch.clamp(
+                current_values - old_values, -clip_eps, clip_eps
+            )
+
+            v_max = torch.max(
+                ((current_values - returns_tensor) ** 2),
+                ((vpredclipped - returns_tensor) ** 2),
+            )
+            critic_loss = v_max.mean()
+
+            loss = critic_loss + actor_loss
+
+            if self.run is not None:
+                self.run.log(
+                    {
+                        "Returns": returns_tensor.mean().item(),
+                        "actor_loss": actor_loss.item(),
+                        "critic_loss": critic_loss.item(),
+                    }
+                )
+
+            self.optimizer.zero_grad()
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.network.parameters(), 0.5)
+            self.optimizer.step()
diff --git a/dynamicalgorithmselection/agents/RLDAS_random_agent.py b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
new file mode 100644
index 0000000..71a55b5
--- /dev/null
+++ b/dynamicalgorithmselection/agents/RLDAS_random_agent.py
@@ -0,0 +1,199 @@
+import numpy as np
+import torch
+import copy
+from typing import Any, Dict
+
+from dynamicalgorithmselection.agents.agent import Agent
+from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
+
+INITIAL_POPSIZE = 170
+
+
+class RLDASRandomAgent(Agent):
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+
+        self.alg_names = [alg.__name__ for alg in self.actions]
+        self.n_algorithms = len(self.actions)
+
+        self.ah_vectors = np.zeros((self.n_algorithms, 2, self.ndim_problem))
+        self.alg_usage_counts = np.zeros(self.n_algorithms)
+        self.context_memory: Dict[str, Dict[str, Any]] = {
+            name: {} for name in self.alg_names
+        }
+        self.context_memory["Common"] = {}
+        self.mean_rewards = options.get("mean_rewards", [])
+        self.best_50_mean = float("inf")
+        self.schedule_interval = options.get(
+            "schedule_interval", int(self.max_function_evaluations / 50)
+        )
+
+    def _update_ah_history(
+        self, alg_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+    ):
+        sv_best_current = x_best_new - x_best_old
+        sv_worst_current = x_worst_new - x_worst_old
+
+        H = self.alg_usage_counts[alg_idx]
+
+        self.ah_vectors[alg_idx, 0] = (
+            self.ah_vectors[alg_idx, 0] * H + sv_best_current
+        ) / (H + 1)
+        self.ah_vectors[alg_idx, 1] = (
+            self.ah_vectors[alg_idx, 1] * H + sv_worst_current
+        ) / (H + 1)
+
+        self.alg_usage_counts[alg_idx] += 1
+
+    def _save_context(self, optimizer, alg_name):
+        common_attrs = ["MF", "MCr", "archive"]
+        for attr in common_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory["Common"][attr] = getattr(optimizer, attr)
+
+        specific_attrs = []
+        if "JDE21" in alg_name:
+            specific_attrs = [
+                "tau1",
+                "tau2",
+                "ageLmt",
+                "eps",
+                "myEqs",
+            ]
+        elif "MadDE" in alg_name:
+            specific_attrs = ["pm", "pbest", "PqBX"]
+        elif "NL_SHADE" in alg_name:
+            specific_attrs = ["NA", "pa"]
+
+        for attr in specific_attrs:
+            if hasattr(optimizer, attr):
+                self.context_memory[alg_name][attr] = getattr(optimizer, attr)
+
+    def _restore_context(self, optimizer, alg_name):
+        """
+        Restores parameters to the optimizer from self.context_memory.
+        """
+        for attr, val in self.context_memory["Common"].items():
+            if hasattr(optimizer, attr):
+                setattr(optimizer, attr, copy.deepcopy(val))
+
+        if alg_name in self.context_memory:
+            for attr, val in self.context_memory[alg_name].items():
+                if hasattr(optimizer, attr):
+                    setattr(optimizer, attr, copy.deepcopy(val))
+
+    def _select_action(self):
+        with torch.no_grad():
+            probs = torch.ones(size=(1, len(self.actions))) / len(self.actions)
+            dist = torch.distributions.Categorical(probs)
+            action = dist.sample()
+
+        return action.item()
+
+    def initialize(self):
+        x = self.rng_initialization.uniform(
+            self.initial_lower_boundary,
+            self.initial_upper_boundary,
+            size=(INITIAL_POPSIZE, self.ndim_problem),
+        )
+        y = np.zeros((INITIAL_POPSIZE,))
+        for i in range(INITIAL_POPSIZE):
+            y[i] = self._evaluate_fitness(x[i])
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        """
+        Main Optimization Loop implementing RL-DAS workflow (Algorithm 1).
+        Does NOT use checkpoints. Uses interval-based scheduling.
+        """
+        fitness = Optimizer.optimize(self, fitness_function)
+        population_x, population_y = self.initialize()
+        self.n_function_evaluations = INITIAL_POPSIZE
+
+        best_idx = np.argmin(population_y)
+        best_y_global = population_y[best_idx]
+        best_x_global = population_x[best_idx].copy()
+
+        self.best_so_far_y = best_y_global
+        self.best_so_far_x = best_x_global
+
+        self.history.append(self.best_so_far_y)
+        fitness.append(float(self.best_so_far_y))
+
+        self.ah_vectors.fill(0.0)
+        self.alg_usage_counts.fill(0.0)
+        self.context_memory = {name: {} for name in self.alg_names}
+        self.context_memory["Common"] = {}
+
+        while self.n_function_evaluations < self.max_function_evaluations:
+            action_idx = self._select_action()
+            self.choices_history.append(action_idx)
+
+            selected_alg_class = self.actions[action_idx]
+            alg_name = self.alg_names[action_idx]
+
+            sub_opt = selected_alg_class(self.problem, self.options)
+            sub_opt.n_function_evaluations = self.n_function_evaluations
+            sub_opt.max_function_evaluations = self.max_function_evaluations
+
+            self._restore_context(sub_opt, alg_name)
+
+            x_best_old = population_x[np.argmin(population_y)].copy()
+            x_worst_old = population_x[np.argmax(population_y)].copy()
+
+            target_fes = min(
+                self.n_function_evaluations + self.schedule_interval,
+                self.max_function_evaluations,
+            )
+            sub_opt.target_FE = target_fes
+            sub_opt.set_data(
+                x=population_x,
+                y=population_y,
+                best_x=self.best_so_far_x,
+                best_y=self.best_so_far_y,
+            )
+
+            result = sub_opt.optimize()
+
+            self.fitness_history.extend(result["fitness_history"])
+
+            self._save_fitness(
+                result["best_so_far_x"],
+                result["best_so_far_y"],
+                result["worst_so_far_x"],
+                result["worst_so_far_y"],
+            )
+
+            population_x = result["x"]
+            population_y = result["y"]
+
+            self.n_function_evaluations = sub_opt.n_function_evaluations
+
+            self._save_context(sub_opt, alg_name)
+
+            x_best_new: np.ndarray = population_x[np.argmin(population_y)].copy()
+            x_worst_new: np.ndarray = population_x[np.argmax(population_y)].copy()
+            cost_new: float = np.min(population_y)
+
+            self._update_ah_history(
+                action_idx, x_best_old, x_best_new, x_worst_old, x_worst_new
+            )
+
+            best_y_global = min(best_y_global, cost_new)
+
+            if cost_new < self.best_so_far_y:
+                self.best_so_far_y = cost_new
+                self.best_so_far_x = x_best_new
+
+            self.history.append(self.best_so_far_y)
+            fitness.append(float(self.best_so_far_y))
+
+            self._n_generations += 1
+            self._print_verbose_info(fitness, self.best_so_far_y)
+
+        return self._collect(fitness, self.best_so_far_y)
+
+    def _collect(self, fitness, y=None):
+        results, _ = super()._collect(fitness, y)
+        agent_state = {}
+        return results, agent_state
diff --git a/dynamicalgorithmselection/agents/agent.py b/dynamicalgorithmselection/agents/agent.py
index 0ed6827..abecf65 100644
--- a/dynamicalgorithmselection/agents/agent.py
+++ b/dynamicalgorithmselection/agents/agent.py
@@ -1,6 +1,8 @@
 from itertools import product
-from typing import List, Type, Optional
+from typing import List, Type, Optional, Dict, Any, Tuple
 import numpy as np
+
+from dynamicalgorithmselection.agents.agent_reward import AgentReward
 from dynamicalgorithmselection.agents.agent_state import (
     get_state_representation,
     StateNormalizer,
@@ -8,6 +10,7 @@
 from dynamicalgorithmselection.agents.agent_utils import (
     get_checkpoints,
     StepwiseRewardNormalizer,
+    MAX_DIM,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 from dynamicalgorithmselection.optimizers.RestartOptimizer import restart_optimizer
@@ -55,14 +58,15 @@ def __init__(self, problem, options):
         self.state_normalizer = self.options.get(
             "state_normalizer", StateNormalizer(input_shape=(self.state_dim,))
         )
-        self.initial_value_range = None
+        self.initial_value_range: Tuple[Optional[float], Optional[float]] = (None, None)
+        self.reward_method = AgentReward(self.options.get("reward_option", 1))
 
     def get_partial_state(
         self,
         x: Optional[np.ndarray],
         y: Optional[np.ndarray],
         optimization_state: bool = False,
-    ) -> np.array:
+    ) -> np.ndarray:
         sr_additional_params = (
             self.lower_boundary,
             self.upper_boundary,
@@ -72,11 +76,15 @@ def get_partial_state(
         )
 
         if x is None or y is None:
-            state_representation = self.state_representation(
-                np.zeros((50, self.ndim_problem)),
-                np.zeros((50,)),
-                sr_additional_params,
-            )
+            if self.options.get("state_representation") != "ELA":
+                state_representation = self.state_representation(
+                    np.zeros((50, self.ndim_problem)),
+                    np.zeros((50,)),
+                    sr_additional_params,
+                )
+            else:
+                state_representation = (np.zeros((43,)),)
+
             return np.append(state_representation, (0, 0) if optimization_state else ())
         used_fe = self.n_function_evaluations / self.max_function_evaluations
         stagnation_coef = self.stagnation_count / self.max_function_evaluations
@@ -106,7 +114,14 @@ def get_state(
             optimization_state = self.get_partial_state(x, y, True).flatten()
             state = np.concatenate((landscape_state, optimization_state))
         else:
-            state = self.get_partial_state(x_history, y_history, True).flatten()
+            partial_state = self.get_partial_state(x_history, y_history, True).flatten()
+            state = np.append(
+                partial_state,
+                (
+                    self.ndim_problem / MAX_DIM,
+                    self.n_function_evaluations / self.max_function_evaluations,
+                ),
+            )
         return self.state_normalizer.normalize(state, update)
 
     def _print_verbose_info(self, fitness, y):
@@ -138,8 +153,8 @@ def _check_early_stopping(self, best_y):
             self._counter_early_stopping, self._base_early_stopping = 0, best_y
 
     def _save_fitness(self, best_x, best_y, worst_x, worst_y):
-        if self.initial_value_range is None:
-            self.initial_value_range = max(worst_y - best_y, 1e-5)
+        if self.initial_value_range[0] is None:
+            self.initial_value_range = best_y, max(worst_y, best_y + 1e-5)
 
         self.best_parent = best_y
         self.history.append(best_y)
@@ -151,16 +166,32 @@ def _save_fitness(self, best_x, best_y, worst_x, worst_y):
 
         self._check_early_stopping(best_y)
 
-    def iterate(self, optimizer_input_data=None, optimizer=None):
+    def iterate(
+        self,
+        optimizer_input_data: Optional[Dict] = None,
+        optimizer: Optional[Optimizer] = None,
+    ):
+        if optimizer_input_data is None or optimizer is None:
+            raise ValueError("Inputs to iterate cannot be None")
         optimizer_input_data["best_x"] = self.best_so_far_x
         optimizer_input_data["best_y"] = self.best_so_far_y
-        optimizer.set_data(**optimizer_input_data)
+
+        historic_data = {
+            k[:-8]: v
+            for k, v in optimizer_input_data.items()
+            if k.endswith("_history") and k != "fitness_history"
+        }
+        current_data = {
+            k: v for k, v in optimizer_input_data.items() if not k.endswith("_history")
+        }
+        combined_input = current_data | historic_data
+        optimizer.set_data(**combined_input)
 
         if self._check_terminations():
             return optimizer.get_data()
 
         self._n_generations += 1
-        results = optimizer.optimize()
+        results: Dict[str, Any] = optimizer.optimize()
         self.fitness_history.extend(results["fitness_history"])
 
         self._save_fitness(
@@ -196,7 +227,7 @@ def _log_run_metrics(self):
                 1 if self.choices_history[i] == action_id else 0
             )
             for i, (action_id, action) in product(
-                range(self.n_checkpoints), enumerate(self.actions)
+                range(len(self.choices_history)), enumerate(self.actions)
             )
         }
         self.run.log(checkpoint_choices)
@@ -216,12 +247,9 @@ def _collect(self, fitness, y=None):
     def optimize(self, fitness_function=None, args=None):
         raise NotImplementedError
 
-    def get_reward(self, new_best_y, old_best_y):
-        if old_best_y == float("inf"):
-            return 0.0
-
-        improvement = old_best_y - new_best_y
-
-        # return float(improvement > 1e-3)
-        reward = improvement / self.initial_value_range
-        return np.log(np.clip(reward, 0.0, 1.0) + 1e-5)
+    def get_reward(
+        self, new_best_y: float, old_best_y: float, is_final_checkpoint: bool = False
+    ):
+        return self.reward_method(
+            new_best_y, old_best_y, self.initial_value_range, is_final_checkpoint
+        )
diff --git a/dynamicalgorithmselection/agents/agent_reward.py b/dynamicalgorithmselection/agents/agent_reward.py
new file mode 100644
index 0000000..20c6ed4
--- /dev/null
+++ b/dynamicalgorithmselection/agents/agent_reward.py
@@ -0,0 +1,79 @@
+from typing import Tuple
+
+import numpy as np
+
+
+class AgentReward:
+    def __init__(self, option: int):
+        self.reward_method = getattr(self, f"r{option}")
+
+    def __call__(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        return self.reward_method(
+            new_best_y, old_best_y, initial_value_range, is_final_checkpoint
+        )
+
+    def r1(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf"):
+            return 0.0
+
+        improvement = old_best_y - new_best_y
+
+        reward = improvement / (initial_value_range[1] - initial_value_range[0])
+        return np.log(np.clip(reward, 0.0, 1.0) + 1e-5)
+
+    def r2(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf"):
+            return 0.0
+
+        improvement = old_best_y - new_best_y
+
+        reward = improvement / (initial_value_range[1] - initial_value_range[0])
+        return np.clip(reward, 0.0, 1.0)
+
+    def r3(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf") or not is_final_checkpoint:
+            return 0.0
+
+        improvement = initial_value_range[0] - new_best_y
+        scale = initial_value_range[1] - initial_value_range[0]
+        reward = improvement / scale
+        return np.log(reward + 1e-5)
+
+    def r4(
+        self,
+        new_best_y: float,
+        old_best_y: float,
+        initial_value_range: Tuple[float, float],
+        is_final_checkpoint: bool = False,
+    ):
+        if old_best_y == float("inf"):
+            return 0.0
+
+        improvement = old_best_y - new_best_y
+
+        reward = improvement / (initial_value_range[1] - initial_value_range[0])
+        return 1.0 if reward >= 1e-3 else 0.0
diff --git a/dynamicalgorithmselection/agents/agent_state.py b/dynamicalgorithmselection/agents/agent_state.py
index dc3b896..856dc9e 100644
--- a/dynamicalgorithmselection/agents/agent_state.py
+++ b/dynamicalgorithmselection/agents/agent_state.py
@@ -11,6 +11,8 @@
     calculate_information_content,
     calculate_ela_distribution,  # Information Content
 )
+from scipy.spatial.distance import pdist
+from scipy.stats import spearmanr
 
 from dynamicalgorithmselection.NeurELA.NeurELA import feature_embedder
 from dynamicalgorithmselection.agents.agent_utils import MAX_DIM, RunningMeanStd
@@ -32,9 +34,7 @@ def get_state_representation(
             x[-MAX_CONSIDERED_POPSIZE:], y[-MAX_CONSIDERED_POPSIZE:]
         )[0].mean(axis=0), 34
     elif name == "ELA":
-        return lambda x, y, *args: ela_state_representation(
-            x[-MAX_CONSIDERED_POPSIZE:], y[-MAX_CONSIDERED_POPSIZE:]
-        ), 45
+        return lambda x, y, *args: ela_state_representation(x, y), 47
     elif name == "custom":
         return lambda x, y, args: AgentState(
             x, y, n_actions, *args
@@ -46,20 +46,39 @@ def get_state_representation(
 def ela_state_representation(x, y, *args):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
-        x_norm, y_norm = (
-            (x - x.mean()) / (x.std() + 1e-8),
-            (y - y.mean()) / (y.std() + 1e-8),
+
+        _, unique_indices = np.unique(x, axis=0, return_index=True)
+        unique_indices = np.sort(unique_indices)
+        x_deduplicated = x[unique_indices][-MAX_CONSIDERED_POPSIZE:]
+        y_deduplicated = y[unique_indices][-MAX_CONSIDERED_POPSIZE:]
+
+        x_raw = np.ascontiguousarray(x_deduplicated - x_deduplicated.mean()) / (
+            x_deduplicated.std() + 1e-8
         )
+        y_raw = np.ascontiguousarray(y_deduplicated - y_deduplicated.mean()) / (
+            y_deduplicated.std() + 1e-8
+        )
+
+        x_norm = pd.DataFrame(x_raw).reset_index(drop=True)
+        x_norm.columns = [f"x_{i}" for i in range(x_norm.shape[1])]
+        y_norm = pd.Series(y_raw).reset_index(drop=True)
+
+        is_unique = ~x_norm.duplicated()
+
+        # If we lost data, re-slice to ensure alignment
+        if not is_unique.all():
+            x_norm = x_norm[is_unique].reset_index(drop=True)
+            y_norm = y_norm[is_unique].reset_index(drop=True)
+
         meta_feats = calculate_ela_meta(x_norm, y_norm)
         ela_distr = (
             calculate_ela_distribution(x_norm, y_norm)
-            if (y**2).sum() > 0
+            if ((y**2).sum() > 0 and np.var(y_norm) > 1e-8)
             else {str(i): 0 for i in range(4)}
         )
         nbc_feats = calculate_nbc(x_norm, y_norm)
         disp_feats = calculate_dispersion(x_norm, y_norm)
-        df_temp = pd.DataFrame(x_norm)
-        df_temp.columns = [f"x_{i}" for i in range(df_temp.shape[1])]
+
         ic_feats = calculate_information_content(x_norm, y_norm)
 
         all_features = {
@@ -143,10 +162,10 @@ def get_weighted_central_moment(self, n: int):
         numerator = min((weights * norms_squared**exponent).sum(), 1e8)
         inertia_denom_w = np.linalg.norm(weights)
         inertia_denom_n = np.linalg.norm(norms_squared**exponent)
-        return numerator / max(1e-5, inertia_denom_w * max(1e-5, inertia_denom_n))
+        return numerator / max(1e-5, inertia_denom_w * inertia_denom_n)
 
     def normalized_distance(self, x0: np.ndarray, x1: np.ndarray) -> float:
-        return min(np.linalg.norm(x0 - x1) / self.max_distance, 1.0)
+        return float(min(np.linalg.norm(x0 - x1) / self.max_distance, 1.0))
 
     def get_fitness_weights(self) -> np.ndarray:
         weights = (
@@ -156,9 +175,9 @@ def get_fitness_weights(self) -> np.ndarray:
         )
         return weights / weights.sum()
 
-    def population_relative_radius(self) -> np.ndarray:
+    def population_relative_radius(self) -> float:
         population_radius = np.linalg.norm(self.x.max(axis=0) - self.x.min(axis=0))
-        return population_radius / self.max_distance
+        return float(population_radius / self.max_distance)
 
     def slopes_stats(self) -> tuple:
         return get_list_stats(
@@ -311,7 +330,7 @@ def get_state(self, optimization_status=False) -> np.ndarray:
 
 
 def distance(x0: np.ndarray, x1: np.ndarray) -> float:
-    return np.linalg.norm(x0 - x1)
+    return float(np.linalg.norm(x0 - x1))
 
 
 def inverse_scaling(x):
@@ -340,20 +359,102 @@ def normalize(self, state, update=True):
             update (bool): Whether to update the running statistics.
                            Usually True during training, False during testing.
         """
-        # Ensure state is an array
         state = np.asarray(state)
 
-        # If training, update the statistics
         if update:
-            # RunningMeanStd expects a batch, so we add a dimension if needed
             if len(state.shape) == 1:
                 self.rms.update(state.reshape(1, -1))
             else:
                 self.rms.update(state)
 
-        # Calculate standard deviation
         std = np.sqrt(self.rms.var) + 1e-8
 
-        # Normalize and Clip to prevent extreme outliers (e.g., -5 to 5)
         normalized_state = (state - self.rms.mean) / std
         return np.clip(normalized_state, -5.0, 5.0)
+
+
+def get_la_features(agent, pop_x, pop_y):
+    """
+    Extracts 9 Landscape Analysis features based on the logic in Population.py.
+    Uses a single-step random walk for sampling-based features (f5-f8) to
+    save function evaluations.
+    """
+    n = len(pop_x)
+
+    best_y = np.min(pop_y)
+    best_x = pop_x[np.argmin(pop_y)]
+    norm_factor = (
+        agent.initial_cost
+        if hasattr(agent, "initial_cost")
+        and agent.initial_cost
+        and abs(agent.initial_cost) > 1e-9
+        else 1.0
+    )
+    f1_gbc = best_y / norm_factor
+
+    dists_to_best = np.linalg.norm(pop_x - best_x, axis=1)
+    if np.std(pop_y) < 1e-9 or np.std(dists_to_best) < 1e-9:
+        f2_fdc = 0.0
+    else:
+        fdc, _ = spearmanr(pop_y, dists_to_best)
+        f2_fdc = fdc if not np.isnan(fdc) else 0.0
+
+    n_top = max(2, int(0.1 * n))
+    if n > 1:
+        dist_matrix_all = pdist(pop_x)
+        disp_all = np.mean(dist_matrix_all) if len(dist_matrix_all) > 0 else 0.0
+
+        # Get distances for the top 10% individuals
+        top_idx = np.argsort(pop_y)[:n_top]
+        dist_matrix_top = pdist(pop_x[top_idx])
+        disp_top = np.mean(dist_matrix_top) if len(dist_matrix_top) > 0 else 0.0
+
+        f3_disp = disp_all - disp_top
+        f4_disp_ratio = disp_top / disp_all if disp_all > 1e-9 else 0.0
+    else:
+        f3_disp, f4_disp_ratio = 0.0, 0.0
+
+    # Adjust step size based on your search space bounds if available
+    step_scale = 0.01
+    if hasattr(agent, "Xmax") and hasattr(agent, "Xmin"):
+        step_size = step_scale * (agent.Xmax - agent.Xmin)
+    else:
+        step_size = step_scale
+
+    random_walk_samples = pop_x + np.random.normal(0, step_size, size=pop_x.shape)
+
+    # Evaluate the random walk samples
+    sample_costs = [agent.fitness_function(i) for i in random_walk_samples]
+    agent.n_function_evaluations += n  # Increment evaluations by population size
+
+    # Calculate differences between the walk and the current population
+    diffs = np.array(sample_costs) - pop_y
+
+    # --- Feature 5: Negative Slope Coefficient (nsc) ---
+    # Proportion of steps that resulted in an improvement
+    f5_nsc = np.sum(diffs < 0) / n
+
+    # --- Feature 6: Average Neutral Ratio (anr) ---
+    # Proportion of steps that resulted in practically zero change
+    eps = 1e-8
+    f6_anr = np.sum(np.abs(diffs) < eps) / n
+
+    f7_ni = np.sum(diffs >= 0) / n  # Ratio of individuals that failed to improve
+    f8_nw = np.sum(diffs <= 0) / n  # Ratio of individuals that failed to worsen
+
+    # --- Feature 9: Progress ---
+    f9_progress = agent.n_function_evaluations / agent.max_function_evaluations
+
+    return np.array(
+        [
+            f1_gbc,
+            f2_fdc,
+            f3_disp,
+            f4_disp_ratio,
+            f5_nsc,
+            f6_anr,
+            f7_ni,
+            f8_nw,
+            f9_progress,
+        ]
+    )
diff --git a/dynamicalgorithmselection/agents/agent_utils.py b/dynamicalgorithmselection/agents/agent_utils.py
index 6b9c1c8..0bb2f23 100644
--- a/dynamicalgorithmselection/agents/agent_utils.py
+++ b/dynamicalgorithmselection/agents/agent_utils.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 
 MAX_DIM = 40
@@ -6,8 +8,9 @@
 def get_runtime_stats(
     fitness_history: list[tuple[int, float]],
     function_evaluations: int,
-    checkpoints: np.ndarray,
-) -> dict[str, float | list[float]]:
+) -> dict[
+    str, float | list[float]
+]:  # Changed from list[Optional[float]] to list[float]
     """
     :param fitness_history: list of tuples [fe, fitness] with only points where best so far fitness improved
     :param function_evaluations: max number of function evaluations during run.
@@ -16,35 +19,26 @@ def get_runtime_stats(
     """
     area_under_optimization_curve = 0.0
     last_i = 0
-    checkpoint_idx = 0
-    last_fitness = None
-    checkpoints_fitness = []
+
     for i, fitness in fitness_history:
         area_under_optimization_curve += fitness * (i - last_i)
-        while last_i <= checkpoints[checkpoint_idx] < i:
-            checkpoints_fitness.append(last_fitness)
-            checkpoint_idx += 1
         last_i = i
-        last_fitness = fitness
+
     area_under_optimization_curve += fitness_history[-1][1] * (
         function_evaluations - fitness_history[-1][0]
     )
     final_fitness = fitness_history[-1][1]
-    if function_evaluations == checkpoints[-1]:
-        while len(checkpoints_fitness) < len(checkpoints):
-            checkpoints_fitness.append(final_fitness)
+
     return {
         "area_under_optimization_curve": area_under_optimization_curve
         / function_evaluations,
         "final_fitness": final_fitness,
-        "checkpoints_fitness": checkpoints_fitness,
     }
 
 
 def get_extreme_stats(
     fitness_histories: dict[str, list[tuple[int, float]]],
     function_evaluations: int,
-    checkpoints: np.ndarray,
 ) -> tuple[dict[str, float | list[float]], dict[str, float | list[float]]]:
     """
     :param fitness_histories: list of lists of tuples [fe, fitness] with only points where best so far fitness improved for each algorithm
@@ -72,25 +66,26 @@ def get_extreme_stats(
         key=lambda x: (x[0], -x[2])
     )  # sort fe - increasing and by fitness - decreasing
 
-    current_fitness = {
+    current_fitnesses = {
         alg: float("inf") for alg in fitness_histories
     }  # current best fitness for each algorithm
     current_worst_fitness = float("inf")  # worst performance so far for each algorithm
 
     worst_history = []
     for fe, algorithm, fitness in all_improvements:
-        if fitness < current_fitness[algorithm]:
-            current_fitness[algorithm] = fitness
+        if fitness < current_fitnesses[algorithm]:
+            current_fitnesses[algorithm] = fitness
             new_worst_fitness = max(
-                i for i in current_fitness.values() if i != float("inf")
+                i for i in current_fitnesses.values() if i != float("inf")
             )
             if new_worst_fitness < current_worst_fitness:
                 worst_history.append((fe, fitness))
                 current_worst_fitness = new_worst_fitness
 
+    # These now match the expected return type of tuple[dict[str, float | list[float]], ...]
     return (
-        get_runtime_stats(best_history, function_evaluations, checkpoints),
-        get_runtime_stats(worst_history, function_evaluations, checkpoints),
+        get_runtime_stats(best_history, function_evaluations),
+        get_runtime_stats(worst_history, function_evaluations),
     )
 
 
diff --git a/dynamicalgorithmselection/agents/neuroevolution_agent.py b/dynamicalgorithmselection/agents/neuroevolution_agent.py
index 0408287..abcd53b 100644
--- a/dynamicalgorithmselection/agents/neuroevolution_agent.py
+++ b/dynamicalgorithmselection/agents/neuroevolution_agent.py
@@ -42,7 +42,7 @@ def optimize(self, fitness_function=None, args=None):
             action_options["verbose"] = False
             optimizer = self.actions[action](self.problem, action_options)
             optimizer.n_function_evaluations = self.n_function_evaluations
-            optimizer._n_generations = 0
+            setattr(optimizer, "_n_generations", 0)
             best_parent = self.best_so_far_y
             iteration_result = self.iterate(iteration_result, optimizer)
             x, y = iteration_result.get("x"), iteration_result.get("y")
diff --git a/dynamicalgorithmselection/agents/policy_gradient_agent.py b/dynamicalgorithmselection/agents/policy_gradient_agent.py
index 036ab9f..98ce440 100644
--- a/dynamicalgorithmselection/agents/policy_gradient_agent.py
+++ b/dynamicalgorithmselection/agents/policy_gradient_agent.py
@@ -17,8 +17,14 @@
 class PolicyGradientAgent(Agent):
     def __init__(self, problem, options):
         Agent.__init__(self, problem, options)
+        buffer_len = int(
+            options.get("n_problems")
+            * self.n_checkpoints
+            * 0.17
+            * options.get("n_epochs")
+        )
         self.buffer = options.get("buffer") or RolloutBuffer(
-            capacity=options.get("ppo_batch_size", 2_500), device=DEVICE
+            capacity=buffer_len, device=DEVICE
         )
         self.actor = Actor(n_actions=len(self.actions), input_size=self.state_dim).to(
             DEVICE
@@ -34,7 +40,6 @@ def __init__(self, problem, options):
         self.mean_rewards = options.get("mean_rewards", [])
         self.best_50_mean = float("inf")
 
-        self.tau = self.options.get("critic_target_tau", 0.05)
         self.target_kl = 0.03
 
         # Initialize history dict
@@ -59,7 +64,7 @@ def _update_learning_rate(self, mean_kl):
         elif mean_kl < self.target_kl / 1.5:
             current_lr *= 1.5
 
-        current_lr = np.clip(current_lr, 3e-6, 1e-4)
+        current_lr = np.clip(current_lr, 3e-6, 3e-4)
 
         for param_group in self.actor_optimizer.param_groups:
             param_group["lr"] = current_lr
@@ -245,13 +250,14 @@ def _select_action(self, state, full_buffer):
     def _execute_action(self, action_idx, iteration_result):
         """Instantiates and runs the selected optimizer."""
         action_options = {k: v for k, v in self.options.items()}
-        action_options["max_function_evaluations"] = min(
-            self.checkpoints[self._n_generations],
-            self.max_function_evaluations,
-        )
+        action_options["max_function_evaluations"] = self.max_function_evaluations
         action_options["verbose"] = False
 
         optimizer = self.actions[action_idx](self.problem, action_options)
+        optimizer.target_FE = min(
+            self.checkpoints[self._n_generations],
+            self.max_function_evaluations,
+        )
         optimizer.n_function_evaluations = self.n_function_evaluations
         optimizer._n_generations = 0
 
@@ -269,9 +275,15 @@ def _update_history(self, iteration_result):
                 if historic_val is None:
                     self.iterations_history[variable_name] = appended_val
                 else:
-                    self.iterations_history[variable_name] = np.concatenate(
-                        (historic_val, appended_val)
-                    )
+                    if appended_val.shape != (0,):
+                        self.iterations_history[variable_name] = np.concatenate(
+                            (historic_val, appended_val)
+                        )
+                    else:
+                        self.iterations_history[variable_name] = historic_val
+                        self._counter_early_stopping = self.early_stopping_evaluations
+                        # Population has collapsed - further optimization makes no sense
+                        # case when sub-optimizer didn't run - DE variants case
 
         return iteration_result
 
@@ -306,18 +318,18 @@ def optimize(self, fitness_function=None, args=None):
         iteration_result = {"x": x, "y": y}
         idx = 0
         last_used_params = []
-        while not self._check_terminations():
+        while True:
             full_buffer = self.buffer.size() >= self.buffer.capacity
 
-            # 1. Prepare State (uses self.iterations_history internally)
+            # Prepare State (uses self.iterations_history internally)
             state = self._prepare_state_tensor(x, y, full_buffer)
 
-            # 2. Select Action
+            # Select Action
             action, log_prob, value = self._select_action(state, full_buffer)
             self.choices_history.append(action)
 
-            # 3. Execute Optimization Step
             best_parent = self.best_so_far_y
+            # Execute Optimization Step
 
             iteration_result, optimizer = self._execute_action(action, iteration_result)
             if len(last_used_params) > 0:
@@ -327,16 +339,19 @@ def optimize(self, fitness_function=None, args=None):
             last_used_params = optimizer.start_condition_parameters
             x, y = iteration_result.get("x"), iteration_result.get("y")
 
-            # 4. Update and Deduplicate History (updates self.iterations_history internally)
+            # Update History
 
             iteration_result = self._update_history(iteration_result)
 
-            # 5. Process Reward
+            # Process Reward
             reward = self._process_step_reward(best_parent, idx, full_buffer)
 
-            # 6. Store in Buffer
+            # Store in Buffer
             self.n_function_evaluations = optimizer.n_function_evaluations
-            is_done = self.n_function_evaluations >= self.max_function_evaluations
+            is_done = (
+                self.n_function_evaluations >= self.max_function_evaluations
+                or idx == self.n_checkpoints - 1
+            )
             self.buffer.add(
                 state.squeeze(0).to(DEVICE),
                 action,
@@ -346,7 +361,7 @@ def optimize(self, fitness_function=None, args=None):
                 value.detach(),
             )
 
-            # 7. PPO Update if needed
+            # PPO Update
             if self.train_mode and self.buffer.size() >= batch_size:
                 self.ppo_update(
                     self.buffer,
@@ -356,7 +371,6 @@ def optimize(self, fitness_function=None, args=None):
                     entropy_coef=entropy_coef,
                 )
 
-            # 8. Post-step updates
             entropy_coef = max(entropy_coef * 0.99, 0.001)
             self._print_verbose_info(fitness, y)
 
@@ -370,5 +384,7 @@ def optimize(self, fitness_function=None, args=None):
 
             self.n_function_evaluations = optimizer.n_function_evaluations
             idx += 1
+            if self._check_terminations():
+                break
 
         return self._collect(fitness, self.best_so_far_y)
diff --git a/dynamicalgorithmselection/agents/ppo_utils.py b/dynamicalgorithmselection/agents/ppo_utils.py
index 651631a..cf1f19f 100644
--- a/dynamicalgorithmselection/agents/ppo_utils.py
+++ b/dynamicalgorithmselection/agents/ppo_utils.py
@@ -139,3 +139,51 @@ def __init__(self):
 
     def forward(self, advantage, log_prob):
         return -advantage * log_prob
+
+
+class RLDASNetwork(nn.Module):
+    def __init__(self, d_dim, num_algorithms, la_dim=9):
+        super(RLDASNetwork, self).__init__()
+        self.L = num_algorithms
+        self.D = d_dim
+        self.la_dim = la_dim
+
+        self.ah_input_flat_dim = self.L * 2 * self.D
+
+        self.ah_embed = nn.Sequential(
+            nn.Linear(self.ah_input_flat_dim, 64),
+            nn.ReLU(),
+            nn.Linear(64, 2 * self.L),  # Output size aligned with paper description
+            nn.ReLU(),
+        )
+        self.fusion_input_dim = self.la_dim + (2 * self.L)
+
+        self.dv_layer = nn.Sequential(nn.Linear(self.fusion_input_dim, 64), nn.Tanh())
+
+        self.actor_head = nn.Sequential(
+            nn.Linear(64, 16), nn.Tanh(), nn.Linear(16, self.L), nn.Softmax(dim=-1)
+        )
+
+        self.critic_head = nn.Sequential(
+            nn.Linear(64, 64),
+            nn.ReLU(),
+            nn.Linear(64, 1),  # Scalar Value
+        )
+
+    def forward(self, la_state, ah_state):
+        if ah_state.dim() > 2:
+            batch_size = ah_state.size(0)
+            ah_flat = ah_state.view(batch_size, -1)
+        else:
+            ah_flat = ah_state
+
+        v_ah = self.ah_embed(ah_flat)
+
+        combined = torch.cat([la_state, v_ah], dim=1)
+
+        dv = self.dv_layer(combined)
+
+        probs = self.actor_head(dv)
+        value = self.critic_head(dv)
+
+        return probs, value
diff --git a/dynamicalgorithmselection/agents/random_agent.py b/dynamicalgorithmselection/agents/random_agent.py
index a8e5551..3c73df0 100644
--- a/dynamicalgorithmselection/agents/random_agent.py
+++ b/dynamicalgorithmselection/agents/random_agent.py
@@ -41,7 +41,7 @@ def _update_history(self, iteration_result):
 
                 if historic_val is None:
                     self.iterations_history[variable_name] = appended_val
-                else:
+                elif appended_val.shape != (0,):
                     self.iterations_history[variable_name] = np.concatenate(
                         (historic_val, appended_val)
                     )
@@ -54,7 +54,7 @@ def optimize(self, fitness_function=None, args=None):
         self.iterations_history = {"x": None, "y": None}
         iteration_result = {"x": x, "y": y}
         last_used_params = []
-        while not self._check_terminations():
+        while True:
             action = self._select_action()
             iteration_result, optimizer = self._execute_action(action, iteration_result)
             if len(last_used_params) > 0:
@@ -67,4 +67,6 @@ def optimize(self, fitness_function=None, args=None):
             self.n_function_evaluations = optimizer.n_function_evaluations
             self._print_verbose_info(fitness, y)
             self.n_function_evaluations = optimizer.n_function_evaluations
+            if self._check_terminations() or self._n_generations == self.n_checkpoints:
+                break
         return self._collect(fitness, self.best_so_far_y)
diff --git a/dynamicalgorithmselection/experiments/core.py b/dynamicalgorithmselection/experiments/core.py
index 357145c..6ab5405 100644
--- a/dynamicalgorithmselection/experiments/core.py
+++ b/dynamicalgorithmselection/experiments/core.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Type, Any
 
 import cocoex
 import numpy as np
@@ -19,7 +19,7 @@ def run_testing(
     problem_ids: list[str],
     observer: cocoex.Observer,
 ):
-    for problem_id in tqdm(problem_ids):
+    for problem_id in tqdm(problem_ids, smoothing=0.0):
         problem_instance = problems_suite.get_problem(problem_id)
         problem_instance.observe_with(observer)
         max_fe = evaluations_multiplier * problem_instance.dimension
@@ -33,9 +33,6 @@ def run_testing(
             options.get("name"),
             problem_id,
             max_fe,
-            options.get("n_checkpoints"),
-            options.get("n_individuals"),
-            options.get("cdb"),
         )
 
 
@@ -46,8 +43,11 @@ def run_training(
     problems_suite: cocoex.Suite,
     problem_ids: list[str],
 ):
-    agent_state = {}
-    for problem_id in tqdm(np.random.permutation(problem_ids)):
+    agent_state: dict[str, Any] = {}
+    n_epochs = options["n_epochs"]
+    for problem_id in tqdm(
+        np.random.permutation(problem_ids).tolist() * n_epochs, smoothing=0.0
+    ):
         problem_instance = problems_suite.get_problem(problem_id)
         max_fe = evaluations_multiplier * problem_instance.dimension
         options["max_function_evaluations"] = max_fe
diff --git a/dynamicalgorithmselection/experiments/cross_validation.py b/dynamicalgorithmselection/experiments/cross_validation.py
index 584c543..8a10b2f 100644
--- a/dynamicalgorithmselection/experiments/cross_validation.py
+++ b/dynamicalgorithmselection/experiments/cross_validation.py
@@ -1,6 +1,6 @@
 import os
 from itertools import product
-from typing import Type
+from typing import Type, List
 
 import cocoex
 import numpy as np
@@ -18,14 +18,17 @@ def run_cross_validation(
     optimizer: Type[Optimizer],
     options: dict,
     evaluations_multiplier: int = 1_000,
-    is_loio: bool = True,
+    leaving_mode: str = "LOIO",
 ):
     results_dir = os.path.join("results", f"{options.get('name')}")
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, cv_folds = _get_cv_folds(4, is_loio)
-    observer = cocoex.Observer("bbob", "result_folder: " + options.get("name"))
+    problems_suite, cv_folds = _get_cv_folds(
+        4 if leaving_mode != "LODO" else 3, leaving_mode, options.get("dimensionality")
+    )
+    options["n_problems"] = len(cv_folds[0])
+    observer = cocoex.Observer("bbob", "result_folder: " + options["name"])
     for i, (train_set, test_set) in enumerate(cv_folds):
         print(f"Running cross validation training, fold {i + 1}")
         run_training(
@@ -53,31 +56,32 @@ def run_cross_validation(
     return observer.result_folder
 
 
-def _get_cv_folds(n: int, is_loio: bool):
+def _get_cv_folds(n: int, leaving_mode: str, dim: List[int]):
     """
     :param n:  number of cross validation folds
     :param is_loio: boolean to indicate how train and test sets should be split (leave-instance-out/leave-problem-out).
+    :param dim: dimensionality of the problems. None indicates all of them.
     :return suite, list of (train set, test set) pairs:
     """
-    np.random.seed(1234)
     cocoex.utilities.MiniPrint()
     problems_suite = cocoex.Suite("bbob", "", "")
     all_problem_ids = [
         f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, DIMENSIONS)
+        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, dim)
     ]
     remaining_problem_ids = set(all_problem_ids)
+    remaining_dimensions = set(DIMENSIONS)
     remaining_function_ids = {i for i in ALL_FUNCTIONS}
     test_sets = []
     for i in range(n):
-        if is_loio:
+        if leaving_mode == "LOIO":
             selected = np.random.choice(
                 list(remaining_problem_ids),
                 size=len(all_problem_ids) // n,
                 replace=False,
             ).tolist()
             remaining_problem_ids = remaining_problem_ids.difference(selected)
-        else:
+        elif leaving_mode == "LOPO":
             selected_functions = np.random.choice(
                 list(remaining_function_ids),
                 size=len(ALL_FUNCTIONS) // n,
@@ -91,9 +95,25 @@ def _get_cv_folds(n: int, is_loio: bool):
             remaining_function_ids = remaining_function_ids.difference(
                 selected_functions
             )
+        else:
+            selected_dimensionalities = np.random.choice(
+                list(remaining_dimensions),
+                size=len(DIMENSIONS) // n,
+                replace=False,
+            ).tolist()
+            selected = [
+                i
+                for i in all_problem_ids
+                if any(i.endswith(f"d{dim:02d}") for dim in selected_dimensionalities)
+            ]
+            remaining_dimensions = remaining_function_ids.difference(
+                selected_dimensionalities
+            )
         test_sets.append(selected)
-
-    return problems_suite, [
+    folds = [
         (list(set(all_problem_ids).difference(test_set)), test_set)
         for test_set in test_sets
     ]
+    for fold in folds:
+        np.random.shuffle(fold[0])
+    return problems_suite, folds
diff --git a/dynamicalgorithmselection/experiments/experiment.py b/dynamicalgorithmselection/experiments/experiment.py
index 489fd02..48b4692 100644
--- a/dynamicalgorithmselection/experiments/experiment.py
+++ b/dynamicalgorithmselection/experiments/experiment.py
@@ -2,8 +2,6 @@
 import os
 from typing import Type, Optional
 
-import cocopp
-
 from dynamicalgorithmselection.experiments.core import run_testing, run_training
 from dynamicalgorithmselection.experiments.cross_validation import run_cross_validation
 from dynamicalgorithmselection.experiments.neuroevolution import (
@@ -20,31 +18,21 @@
 
 from dynamicalgorithmselection.agents.agent_utils import (
     get_extreme_stats,
-    get_checkpoints,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
 
 def dump_extreme_stats(
-    optimizer_portfolio: list[Type[Optimizer]],
+    name: str,
     stats,
     problem_instance,
     max_function_evaluations,
-    n_checkpoints,
-    n_individuals,
-    cdb,
 ):
-    checkpoints = get_checkpoints(
-        n_checkpoints, max_function_evaluations, n_individuals or 100, cdb
-    )
-    best_case, worst_case = get_extreme_stats(
-        stats, max_function_evaluations, checkpoints
-    )
-    portfolio_name = "_".join(i.__name__ for i in optimizer_portfolio)
+    best_case, worst_case = get_extreme_stats(stats, max_function_evaluations)
     with open(
         os.path.join(
             "results",
-            f"{portfolio_name}_best",
+            f"{name}_best",
             f"{problem_instance}.json",
         ),
         "w",
@@ -56,7 +44,7 @@ def dump_extreme_stats(
     with open(
         os.path.join(
             "results",
-            f"{portfolio_name}_worst",
+            f"{name}_worst",
             f"{problem_instance}.json",
         ),
         "w",
@@ -68,7 +56,7 @@ def dump_extreme_stats(
 
 
 def coco_bbob_experiment(
-    optimizer: Type[Optimizer],
+    optimizer: Optional[Type[Optimizer]],
     options: dict,
     name: str,
     evaluations_multiplier: int = 1_000,
@@ -79,15 +67,13 @@ def coco_bbob_experiment(
     options["name"] = name
     if mode.startswith("CV"):
         return run_cross_validation(
-            optimizer, options, evaluations_multiplier, is_loio=mode.endswith("LOIO")
+            optimizer, options, evaluations_multiplier, leaving_mode=mode[-4:]
         )
-    elif agent == "random":
-        # running random baseline
+    elif agent in ["random", "RL-DAS-random"]:
         return _coco_bbob_test_all(optimizer, options, evaluations_multiplier, mode)
     elif options.get("baselines"):
-        # running only baselines
         return run_comparison(
-            options.get("optimizer_portfolio"), options, evaluations_multiplier
+            options["optimizer_portfolio"], options, evaluations_multiplier
         )
     elif not train:
         return _coco_bbob_test(optimizer, options, evaluations_multiplier, mode)
@@ -111,7 +97,8 @@ def _coco_bbob_policy_gradient_train(
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, problem_ids = get_suite(mode, True)
+    problems_suite, problem_ids = get_suite(mode, True, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
     run_training(
         optimizer, options, evaluations_multiplier, problems_suite, problem_ids
     )
@@ -127,8 +114,9 @@ def _coco_bbob_test(
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, problem_ids = get_suite(mode, False)
-    observer = cocoex.Observer("bbob", "result_folder: " + options.get("name"))
+    problems_suite, problem_ids = get_suite(mode, False, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
+    observer = cocoex.Observer("bbob", "result_folder: " + options["name"])
     run_testing(
         optimizer,
         options,
@@ -145,7 +133,10 @@ def _coco_bbob_test_all(optimizer, options, evaluations_multiplier, mode):
     if not os.path.exists(results_dir):
         os.mkdir(results_dir)
     cocoex.utilities.MiniPrint()
-    problems_suite, problem_ids = get_suite("baselines", False)
+    problems_suite, problem_ids = get_suite(
+        "baselines", False, options.get("dimensionality")
+    )
+    options["n_problems"] = len(problem_ids)
     observer = cocoex.Observer("bbob", "result_folder: " + options.get("name"))
     run_testing(
         optimizer,
@@ -170,32 +161,36 @@ def run_comparison(
     print("Initializing Observers...")
     for optimizer in optimizer_portfolio:
         optimizer_name = optimizer.__name__
+        case_name = f"{options['name']}_{optimizer_name}"
 
-        results_dir = os.path.join("results", f"{optimizer_name}")
+        results_dir = os.path.join("results", case_name)
         os.makedirs(results_dir, exist_ok=True)
 
-        observer = cocoex.Observer("bbob", "result_folder: " + optimizer_name)
+        observer = cocoex.Observer("bbob", "result_folder: " + case_name)
         observers[optimizer_name] = observer
-        results_folders.append("exdata/" + optimizer_name)  # Adjust path if needed
+        results_folders.append("exdata/" + case_name)  # Adjust path if needed
 
-        suites[optimizer_name] = get_suite("all", False)[0]
+        suites[optimizer_name] = get_suite("all", False, options.get("dimensionality"))[
+            0
+        ]
 
     # Create directories for best/worst JSON stats
-    portfolio_name = "_".join(i.__name__ for i in optimizer_portfolio)
     for ext in ["best", "worst"]:
-        os.makedirs(os.path.join("results", f"{portfolio_name}_{ext}"), exist_ok=True)
+        os.makedirs(os.path.join("results", f"{options['name']}_{ext}"), exist_ok=True)
 
     cocoex.utilities.MiniPrint()
 
     # We use the problem_ids from the first suite to iterate
-    _, problem_ids = get_suite("all", False)
+    _, problem_ids = get_suite("all", False, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
 
-    for problem_id in tqdm(problem_ids, desc="Evaluating Problems"):
+    for problem_id in tqdm(problem_ids, desc="Evaluating Problems", smoothing=0.0):
         stats = {}
         max_fe = None
 
         for optimizer in optimizer_portfolio:
             optimizer_name = optimizer.__name__
+            result_folder_name = f"{options['name']}_{optimizer_name}"
 
             problem_instance = suites[optimizer_name].get_problem(problem_id)
             problem_instance.observe_with(observers[optimizer_name])
@@ -211,20 +206,14 @@ def run_comparison(
             stats[optimizer_name] = results["fitness_history"]
             dump_stats(
                 results[0] if isinstance(results, tuple) else results,
-                optimizer_name,
+                result_folder_name,
                 problem_id,
                 max_fe,
-                options.get("n_checkpoints"),
-                options.get("n_individuals"),
-                options.get("cdb"),
             )
 
         dump_extreme_stats(
-            optimizer_portfolio,
+            options.get("name"),
             stats,
             problem_id,
             max_fe,
-            options.get("n_checkpoints"),
-            options.get("n_individuals"),
-            options.get("cdb"),
         )
diff --git a/dynamicalgorithmselection/experiments/neuroevolution.py b/dynamicalgorithmselection/experiments/neuroevolution.py
index 30420dc..33cc59c 100644
--- a/dynamicalgorithmselection/experiments/neuroevolution.py
+++ b/dynamicalgorithmselection/experiments/neuroevolution.py
@@ -84,7 +84,10 @@ def evaluate(self, genomes, config):
                 jobs.append(self.pool.apply_async(self.eval_function, (genome, config)))
 
         for job, (ignored_genome_id, genome) in tqdm(
-            zip(jobs, genomes), total=len(jobs), desc="Evaluating Genomes"
+            zip(jobs, genomes),
+            total=len(jobs),
+            desc="Evaluating Genomes",
+            smoothing=0.0,
         ):
             # Result is now a tuple: (fitness, log_dict)
             fitness, log_data = job.get(timeout=self.timeout)
@@ -171,7 +174,8 @@ def _coco_bbob_neuroevolution_train(
     mode: str = "easy",
 ):
     cocoex.utilities.MiniPrint()
-    _, problem_ids = get_suite(mode, True)
+    _, problem_ids = get_suite(mode, True, options.get("dimensionality"))
+    options["n_problems"] = len(problem_ids)
     batch_size = 30
     input_dim = None
     if options.get("state_representation") == "ELA":
@@ -181,9 +185,12 @@ def _coco_bbob_neuroevolution_train(
     elif options.get("state_representation") == "custom":
         input_dim = BASE_STATE_SIZE + 2 * len(options.get("action_space")) + 2
 
+    action_space = options.get("action_space")
+    if not action_space:
+        raise Exception("No action space")
     adjust_config(
         input_dim,
-        len(options.get("action_space")),
+        len(action_space),
     )
 
     config = neat.Config(
diff --git a/dynamicalgorithmselection/experiments/utils.py b/dynamicalgorithmselection/experiments/utils.py
index 977065d..f59a136 100644
--- a/dynamicalgorithmselection/experiments/utils.py
+++ b/dynamicalgorithmselection/experiments/utils.py
@@ -1,13 +1,12 @@
 import json
 import os
 from itertools import islice, product
-from typing import Type
+from typing import Type, List
 
 import cocoex
 import numpy as np
 
 from dynamicalgorithmselection.agents.agent_utils import (
-    get_checkpoints,
     get_runtime_stats,
 )
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
@@ -48,17 +47,18 @@ def coco_bbob_single_function(
     return results
 
 
-def get_suite(mode, train):
+def get_suite(mode: str, train: bool, dim: List[int]):
     """
     :param mode:  mode of the training (LOPO: easy and hard) or LOIO
     :param train: if suite should be for testing or training:
+    :param dim: dimensionality of suite's problem. None indicates all of them
     :return suite and list of problem ids:
     """
     cocoex.utilities.MiniPrint()
     problems_suite = cocoex.Suite("bbob", "", "")
     all_problem_ids = [
         f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, DIMENSIONS)
+        for i_id, f_id, dim in product(INSTANCE_IDS, ALL_FUNCTIONS, dim)
     ]
     if mode in ["easy", "hard"]:
         easy = mode == "easy"
@@ -70,17 +70,17 @@ def get_suite(mode, train):
 
         problem_ids = [
             f"bbob_f{f_id:03d}_i{i_id:02d}_d{dim:02d}"
-            for i_id, f_id, dim in product(INSTANCE_IDS, function_ids, DIMENSIONS)
+            for i_id, f_id, dim in product(INSTANCE_IDS, function_ids, dim)
         ]
 
     elif mode == "LOIO":
-        with open("LOIO_train_set.json") as f:
-            problem_ids = json.load(f)["data"]
-        np.random.seed(1234)
+        train_problem_ids = np.random.choice(
+            all_problem_ids, size=2 * len(all_problem_ids) // 3, replace=False
+        )
         if train:
-            pass
+            problem_ids = train_problem_ids
         else:
-            problem_ids = list(set(all_problem_ids).difference(problem_ids))
+            problem_ids = list(set(all_problem_ids).difference(train_problem_ids))
     elif mode == "CV":
         raise ValueError("CV mode is not suitable for get_suite function")
     else:
@@ -93,13 +93,7 @@ def dump_stats(
     name,
     problem_instance,
     max_function_evaluations,
-    n_checkpoints,
-    n_individuals,
-    cdb,
 ):
-    checkpoints = get_checkpoints(
-        n_checkpoints, max_function_evaluations, n_individuals or 100, cdb
-    )
     with open(
         os.path.join(
             "results",
@@ -113,7 +107,6 @@ def dump_stats(
                 problem_instance: get_runtime_stats(
                     results["fitness_history"],
                     max_function_evaluations,
-                    checkpoints,
                 )
             },
             f,
diff --git a/dynamicalgorithmselection/main.py b/dynamicalgorithmselection/main.py
index c120fa2..52e877d 100644
--- a/dynamicalgorithmselection/main.py
+++ b/dynamicalgorithmselection/main.py
@@ -2,23 +2,30 @@
 import os
 import pickle
 import shutil
-from typing import List, Type, Optional
+from random import seed as set_random_seed
+from typing import List, Type, Dict, Any
 import cocopp
 import neat
+import numpy as np
 import torch
 import wandb
 
+from dynamicalgorithmselection.agents.RLDAS_agent import RLDASAgent
+from dynamicalgorithmselection.agents.RLDAS_random_agent import RLDASRandomAgent
 from dynamicalgorithmselection.agents.neuroevolution_agent import NeuroevolutionAgent
 from dynamicalgorithmselection.agents.policy_gradient_agent import PolicyGradientAgent
 from dynamicalgorithmselection.agents.random_agent import RandomAgent
 from dynamicalgorithmselection.experiments.experiment import coco_bbob_experiment
 from dynamicalgorithmselection import optimizers
+from dynamicalgorithmselection.experiments.utils import DIMENSIONS
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
 AGENTS_DICT = {
     "random": RandomAgent,
     "neuroevolution": NeuroevolutionAgent,
     "policy-gradient": PolicyGradientAgent,
+    "RL-DAS": RLDASAgent,
+    "RL-DAS-random": RLDASRandomAgent,
 }
 
 
@@ -40,7 +47,7 @@ def parse_arguments():
     parser.add_argument(
         "-m",
         "--population_size",
-        type=Optional[int],
+        type=int,
         default=None,
         help="Population size (default: 20)",
     )
@@ -68,14 +75,6 @@ def parse_arguments():
         help="Run also in test mode (default: True)",
     )
 
-    parser.add_argument(
-        "-c",
-        "--compare",
-        action=argparse.BooleanOptionalAction,
-        default=False,
-        help="Enable comparison with each algorithm alone (False by default)",
-    )
-
     parser.add_argument(
         "-e",
         "--wandb_entity",
@@ -97,7 +96,7 @@ def parse_arguments():
         "--agent",
         type=str,
         default="policy-gradient",
-        choices=["random", "neuroevolution", "policy-gradient"],
+        choices=list(AGENTS_DICT.keys()),
         help="specify which agent to use",
     )
 
@@ -105,8 +104,8 @@ def parse_arguments():
         "-l",
         "--mode",
         type=str,
-        default="easy",
-        choices=["LOIO", "hard", "easy", "CV-LOIO", "CV-LOPO", "baselines"],
+        default="LOIO",
+        choices=["LOIO", "hard", "easy", "CV-LODO", "CV-LOIO", "CV-LOPO", "baselines"],
         help="specify which agent to use",
     )
 
@@ -134,6 +133,40 @@ def parse_arguments():
         default=False,
         help="Enable selection of forcibly restarting optimizers",
     )
+
+    parser.add_argument(
+        "-D",
+        "--dimensionality",
+        choices=DIMENSIONS,
+        nargs="+",
+        type=int,
+        default=DIMENSIONS,
+        help="dimensionality of problems",
+    )
+
+    parser.add_argument(
+        "-E",
+        "--n_epochs",
+        type=int,
+        default=1,
+        help="number of training epochs",
+    )
+
+    parser.add_argument(
+        "-O",
+        "--reward-option",
+        type=int,
+        default=1,
+        help="id of method used to compute reward",
+    )
+
+    parser.add_argument(
+        "-S",
+        "--seed",
+        type=int,
+        default=42,
+        help="seed",
+    )
     return parser.parse_args()
 
 
@@ -148,27 +181,40 @@ def print_info(args):
     print("Population size: ", args.population_size)
     print("Function eval multiplier: ", args.fe_multiplier)
     print("Test mode: ", args.test)
-    print("Compare mode: ", args.compare)
+    print("Mode: ", args.mode)
     print("Weights and Biases entity: ", args.wandb_entity)
     print("Weights and Biases project: ", args.wandb_project)
     print("Agent type: ", args.agent if args.mode != "baselines" else None)
     print("Exponential checkpoint division base: ", args.cdb)
     print("State representation variant: ", args.state_representation)
     print("Forcing restarts: ", args.force_restarts)
+    print("Dimensionality of problems: ", args.dimensionality)
+    print("Number of training epochs: ", args.n_epochs)
+    print("Rewarding option: ", args.reward_option)
 
 
-def test(args, action_space):
-    if os.path.exists(os.path.join("exdata", f"DAS_{args.name}")):
-        shutil.rmtree(os.path.join("exdata", f"DAS_{args.name}"))
-
+def common_options(args) -> Dict[str, Any]:
     options = {
         "n_checkpoints": args.n_checkpoints,
         "n_individuals": args.population_size,
-        "action_space": action_space,
         "cdb": args.cdb,
         "state_representation": args.state_representation,
         "force_restarts": args.force_restarts,
+        "dimensionality": args.dimensionality,
+        "n_epochs": args.n_epochs,
+        "reward_option": args.reward_option,
+        "seed": args.seed,
     }
+    return options
+
+
+def test(args, action_space):
+    if os.path.exists(os.path.join("exdata", f"DAS_{args.name}")):
+        shutil.rmtree(os.path.join("exdata", f"DAS_{args.name}"))
+
+    options = {
+        "action_space": action_space,
+    } | common_options(args)
     # agent_state = torch.load(f)
     if args.agent == "neuroevolution":
         config = neat.Config(
@@ -215,14 +261,10 @@ def run_training(args, action_space):
     coco_bbob_experiment(
         AGENTS_DICT[args.agent],
         {
-            "n_checkpoints": args.n_checkpoints,
-            "n_individuals": args.population_size,
             "run": run,
             "action_space": action_space,
-            "cdb": args.cdb,
-            "state_representation": args.state_representation,
-            "force_restarts": args.force_restarts,
-        },
+        }
+        | common_options(args),
         name=f"DAS_train_{args.name}",
         evaluations_multiplier=args.fe_multiplier,
         train=True,
@@ -236,17 +278,17 @@ def run_training(args, action_space):
 def run_CV(args, action_space):
     if os.path.exists(os.path.join("exdata", f"DAS_CV_{args.name}")):
         shutil.rmtree(os.path.join("exdata", f"DAS_CV_{args.name}"))
+    if args.mode == "CV-LODO" and args.dimensionality != DIMENSIONS:
+        raise ValueError(
+            "FOR Leave-One-Dimension-Out scenario all dimensionalities must be provided."
+        )
     coco_bbob_experiment(
         AGENTS_DICT[args.agent],
         {
-            "n_checkpoints": args.n_checkpoints,
-            "n_individuals": args.population_size,
             "run": None,
             "action_space": action_space,
-            "cdb": args.cdb,
-            "state_representation": args.state_representation,
-            "force_restarts": args.force_restarts,
-        },
+        }
+        | common_options(args),
         name=f"DAS_CV_{args.name}",
         evaluations_multiplier=args.fe_multiplier,
         train=True,
@@ -258,34 +300,45 @@ def run_CV(args, action_space):
 
 def run_baselines(args, action_space):
     for optimizer in action_space:
-        if os.path.exists(os.path.join("exdata", optimizer.__name__)):
-            shutil.rmtree(os.path.join("exdata", optimizer.__name__))
-
-        print(f"--- Running Baseline: {optimizer.__name__} ---")
-
-        # 2. Run experiment for ONLY this optimizer
-        # NOTICE: We pass `[optimizer]` instead of `action_space` here.
-        coco_bbob_experiment(
-            None,
-            {
-                "optimizer_portfolio": [optimizer],  # <--- FIXED: List of 1
-                "n_individuals": args.population_size,
-                "baselines": True,
-                "n_checkpoints": args.n_checkpoints,
-                "cdb": args.cdb,
-                "state_representation": args.state_representation,
-                "force_restarts": args.force_restarts,
-            },
-            name=optimizer.__name__,
-            evaluations_multiplier=args.fe_multiplier,
-            train=False,
-            agent=None,
+        if os.path.exists(
+            os.path.join("exdata", f"{args.name}_baselines_{optimizer.__name__}")
+        ):
+            shutil.rmtree(
+                os.path.join("exdata", f"{args.name}_baselines_{optimizer.__name__}")
+            )
+
+    coco_bbob_experiment(
+        None,
+        {
+            "optimizer_portfolio": action_space,
+            "baselines": True,
+        }
+        | common_options(args),
+        name=f"{args.name}_baselines",
+        evaluations_multiplier=args.fe_multiplier,
+        train=False,
+        agent=None,
+    )
+    for optimizer in action_space:
+        cocopp.main(
+            os.path.join("exdata", f"{args.name}_baselines_{optimizer.__name__}")
         )
-        cocopp.main(os.path.join("exdata", optimizer.__name__))
+
+
+def set_seed(seed):
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    # Torch RNG
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    # Python RNG
+    np.random.seed(seed)
+    set_random_seed(seed)
 
 
 def main():
     args = parse_arguments()
+    set_seed(args.seed)
     print_info(args)
     available_optimizers = optimizers.available_optimizers
     action_space: List[Type[Optimizer]] = []
@@ -294,18 +347,16 @@ def main():
             raise ValueError(f'Unknown optimizer "{optimizer}"')
         else:
             action_space.append(available_optimizers[optimizer])
-    if not os.path.exists("models"):
-        os.mkdir("models")
-    if not os.path.exists("results"):
-        os.mkdir("results")
+    os.makedirs("models", exist_ok=True)
+    os.makedirs("results", exist_ok=True)
     if args.mode.startswith("CV"):
         run_CV(args, action_space)
     else:
-        if args.agent != "random" and args.mode != "baselines":
+        if args.agent not in ["random", "RL-DAS-random"] and args.mode != "baselines":
             run_training(args, action_space)
         if args.test and args.mode != "baselines":
             test(args, action_space)
-    if args.compare or args.mode == "baselines":
+    if args.mode == "baselines":
         run_baselines(args, action_space)
 
 
diff --git a/dynamicalgorithmselection/optimizers/DE/DE.py b/dynamicalgorithmselection/optimizers/DE/DE.py
new file mode 100644
index 0000000..fdbb78b
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/DE.py
@@ -0,0 +1,61 @@
+import numpy as np  # engine for numerical computing
+from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
+
+
+class DE(Optimizer):
+    def __init__(self, problem, options):
+        Optimizer.__init__(self, problem, options)
+        if (
+            self.n_individuals is None
+        ):  # number of offspring, aka offspring population size
+            self.n_individuals = 170
+        assert self.n_individuals > 0
+        self._n_generations = 0  # number of generations
+        self._printed_evaluations = self.n_function_evaluations
+
+    def initialize(self):
+        raise NotImplementedError
+
+    def mutate(self):
+        raise NotImplementedError
+
+    def crossover(self):
+        raise NotImplementedError
+
+    def select(self):
+        raise NotImplementedError
+
+    def iterate(self):
+        raise NotImplementedError
+
+    def _print_verbose_info(self, fitness, y, is_print=False):
+        if y is not None and self.saving_fitness:
+            if not np.isscalar(y):
+                fitness.extend(y)
+            else:
+                fitness.append(y)
+        if self.verbose:
+            is_verbose = (
+                self._printed_evaluations != self.n_function_evaluations
+            )  # to avoid repeated printing
+            is_verbose_1 = (not self._n_generations % self.verbose) and is_verbose
+            is_verbose_2 = self.termination_signal > 0 and is_verbose
+            is_verbose_3 = is_print and is_verbose
+            if is_verbose_1 or is_verbose_2 or is_verbose_3:
+                info = "  * Generation {:d}: best_so_far_y {:7.5e}, min(y) {:7.5e} & Evaluations {:d}"
+                print(
+                    info.format(
+                        self._n_generations,
+                        self.best_so_far_y,
+                        np.min(y),
+                        self.n_function_evaluations,
+                    )
+                )
+                self._printed_evaluations = self.n_function_evaluations
+
+    def _collect(self, fitness=None, y=None):
+        self._print_verbose_info(fitness, y)
+        results = Optimizer._collect(self, fitness)
+        results["_n_generations"] = self._n_generations
+        results.update(self.results)
+        return results
diff --git a/dynamicalgorithmselection/optimizers/DE/JDE21.py b/dynamicalgorithmselection/optimizers/DE/JDE21.py
new file mode 100644
index 0000000..ce15507
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/JDE21.py
@@ -0,0 +1,358 @@
+import numpy as np
+from dynamicalgorithmselection.optimizers.DE.DE import DE
+
+
+class JDE21(DE):
+    start_condition_parameters = ["x", "y", "F", "Cr"]
+
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+
+        # Mathematical minimum population limit to survive RL starvation
+        self.Nmin = 4
+
+        # Population parameters
+        # We start with the base sizes defined in the j21 paper,
+        # though set_data/initialize will override this if the RL agent injects a different size.
+        self.bNP = 160
+        self.sNP = 10
+        self.n_individuals = self.bNP + self.sNP
+
+        # Stagnation and Reset parameters
+        self.age = 0
+        self.eps = 1e-12  # Tolerance for fitness equality
+        self.MyEps = 0.25  # Threshold ratio (25%) for reset
+        self.reductions_done = 0
+
+        # Self-adaptation probabilities
+        self.tau1 = 0.1
+        self.tau2 = 0.1
+        self.Finit = 0.5
+        self.CRinit = 0.9
+
+        # Parameter Limits (Big Population)
+        self.Fl_b = 0.1
+        self.CRl_b = 0.0
+        self.CRu_b = 1.1
+
+        # Parameter Limits (Small Population)
+        self.Fl_s = 0.17
+        self.CRl_s = 0.1
+        self.CRu_s = 0.8
+
+        # Shared Upper Bound for F
+        self.Fu = 1.1
+
+        self.F = np.full(self.n_individuals, self.Finit)
+        self.Cr = np.full(self.n_individuals, self.CRinit)
+
+    def initialize(self, args=None, x=None, y=None):
+        if x is None:
+            x = self.rng_initialization.uniform(
+                self.initial_lower_boundary,
+                self.initial_upper_boundary,
+                (self.n_individuals, self.ndim_problem),
+            )
+        else:
+            self.n_individuals = x.shape[0]
+            self.sNP = min(10, max(1, self.n_individuals // 4))
+            self.bNP = self.n_individuals - self.sNP
+        if y is None:
+            y = np.array(
+                [
+                    self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i])
+                    for i, xi in enumerate(x)
+                ]
+            )
+        return x, y
+
+    def _reflect_bounds(self, v):
+        v = np.where(
+            v < self.initial_lower_boundary, 2 * self.initial_lower_boundary - v, v
+        )
+
+        v = np.where(
+            v > self.initial_upper_boundary, 2 * self.initial_upper_boundary - v, v
+        )
+
+        v = np.clip(v, self.initial_lower_boundary, self.initial_upper_boundary)
+        return v
+
+    def _check_population_reduction(self, x, y):
+        # SYNCHRONIZATION
+        actual_size = len(y)
+        if actual_size != self.n_individuals:
+            self.n_individuals = actual_size
+            self.sNP = min(10, max(1, actual_size // 4))
+            self.bNP = self.n_individuals - self.sNP
+
+            if len(self.F) != actual_size:
+                self.F = np.full(actual_size, self.Finit)
+                self.Cr = np.full(actual_size, self.CRinit)
+
+        # REDUCTION LOGIC
+        thresholds = [0.25, 0.50, 0.75]
+        if self.reductions_done < len(thresholds):
+            progress = self.n_function_evaluations / self.max_function_evaluations
+            if progress >= thresholds[self.reductions_done]:
+                # Calculate the standard halved size for the big population
+                new_bNP = self.bNP // 2
+
+                min_allowed_bNP = max(1, self.Nmin - self.sNP)
+                new_bNP = max(new_bNP, min_allowed_bNP)
+
+                # Only perform the competition if we are actually shrinking the array
+                if new_bNP < self.bNP:
+                    part1_idx = np.arange(new_bNP)
+                    part2_idx = np.arange(new_bNP, 2 * new_bNP)
+
+                    keep_idx = []
+                    for i, j in zip(part1_idx, part2_idx):
+                        if j < self.bNP:
+                            keep_idx.append(i if y[i] <= y[j] else j)
+                        else:
+                            keep_idx.append(i)
+
+                    keep_b_idx = np.array(keep_idx, dtype=int)
+                    s_idx = np.arange(int(self.bNP), int(self.n_individuals), dtype=int)
+
+                    x = np.concatenate([x[keep_b_idx], x[s_idx]], axis=0)
+                    y = np.concatenate([y[keep_b_idx], y[s_idx]], axis=0)
+                    self.F = np.concatenate([self.F[keep_b_idx], self.F[s_idx]], axis=0)
+                    self.Cr = np.concatenate(
+                        [self.Cr[keep_b_idx], self.Cr[s_idx]], axis=0
+                    )
+
+                    # Update sizes for the newly reduced population
+                    self.bNP = int(len(keep_b_idx))
+                    self.n_individuals = int(len(y))
+
+                self.reductions_done += 1
+
+        return x, y
+
+    def _evolve_population(self, x, y, args, is_big=True):
+        if self.n_individuals == 0:
+            return x, y
+
+        start_idx = 0 if is_big else self.bNP
+        end_idx = self.bNP if is_big else self.n_individuals
+
+        f_low = self.Fl_b if is_big else self.Fl_s
+        cr_bound = self.CRu_b if is_big else self.CRu_s
+        cr_low = self.CRl_b if is_big else self.CRl_s
+
+        for i in range(start_idx, end_idx):
+            # Parameter Adaptation
+            new_F = (
+                self.rng_optimization.random() * self.Fu + f_low
+                if self.rng_optimization.random() < self.tau1
+                else self.F[i]
+            )
+            new_Cr = (
+                self.rng_optimization.random() * cr_bound + cr_low
+                if self.rng_optimization.random() < self.tau2
+                else self.Cr[i]
+            )
+
+            # Mutation Pool Selection with Extreme RL Fallbacks
+            if is_big:
+                progress = self.n_function_evaluations / self.max_function_evaluations
+                ms_size = 1 if progress <= 1 / 3 else 2 if progress <= 2 / 3 else 3
+
+                available_sNP = self.n_individuals - self.bNP
+                ms_size = min(ms_size, available_sNP)
+
+                if ms_size > 0:
+                    ms_indices = self.rng_optimization.choice(
+                        range(self.bNP, self.n_individuals), ms_size, replace=False
+                    )
+                else:
+                    ms_indices = np.array([], dtype=int)
+
+                pool_r2_r3 = np.concatenate([np.arange(self.bNP), ms_indices])
+
+                # Helper to safely pick a target or fallback sequentially
+                def safe_choice(preferred_pool, exclude):
+                    valid = [idx for idx in preferred_pool if idx not in exclude]
+                    if not valid:
+                        valid = [
+                            idx
+                            for idx in range(self.n_individuals)
+                            if idx not in exclude
+                        ]
+                    return self.rng_optimization.choice(valid) if valid else i
+
+                r1 = safe_choice(range(self.bNP), [i])
+                r2 = safe_choice(pool_r2_r3, [i, r1])
+                r3 = safe_choice(pool_r2_r3, [i, r1, r2])
+
+            else:
+                pool = [idx for idx in range(self.bNP, self.n_individuals) if idx != i]
+
+                # Normal behavior: P_s has enough individuals
+                if len(pool) >= 3:
+                    r1, r2, r3 = self.rng_optimization.choice(pool, 3, replace=False)
+                else:
+                    # FALLBACK 1: Try borrowing from the full population without replacement
+                    full_pool = [idx for idx in range(self.n_individuals) if idx != i]
+                    if len(full_pool) >= 3:
+                        r1, r2, r3 = self.rng_optimization.choice(
+                            full_pool, 3, replace=False
+                        )
+                    else:
+                        # EXTREME FALLBACK: Population is < 4. We MUST allow replacement.
+                        # If population is literally 1, it will just pick `i` three times.
+                        full_pool_with_i = list(range(self.n_individuals))
+                        r1, r2, r3 = self.rng_optimization.choice(
+                            full_pool_with_i, 3, replace=True
+                        )
+
+            # Mutation and Reflection
+            v = x[r1] + new_F * (x[r2] - x[r3])
+            v = self._reflect_bounds(v)
+
+            # Crossover (Rotational Invariant Strategy)
+            if new_Cr > 1.0:
+                u = v.copy()
+            else:
+                u = x[i].copy()
+                j_rand = self.rng_optimization.integers(0, self.ndim_problem)
+                mask = self.rng_optimization.random(self.ndim_problem) <= new_Cr
+                mask[j_rand] = True
+                u[mask] = v[mask]
+
+            # Evaluate
+            new_y = self._evaluate_fitness(u, args, F=self.F[i], Cr=self.Cr[i])
+
+            # Crowding & Selection
+            if is_big:
+                # Euclidean distance crowding
+                dists = np.sum((x[: self.bNP] - u) ** 2, axis=1)
+                target = np.argmin(dists)
+            else:
+                target = i
+
+            if new_y <= y[target]:
+                x[target], y[target] = u, new_y
+                self.F[target], self.Cr[target] = new_F, new_Cr
+
+                if is_big and new_y < self.best_so_far_y:
+                    self.best_so_far_y = new_y
+                    self.age = 0
+            elif is_big and target == i:
+                self.age += 1
+
+        return x, y
+
+    def iterate(self, x=None, y=None, args=None):
+        x, y = self._check_population_reduction(x, y)
+
+        # P_b Reinitialization Check
+        if self.bNP > 0:
+            best_b_y = np.min(y[: self.bNP])
+            eqs_b = np.sum(np.abs(y[: self.bNP] - best_b_y) < self.eps)
+            age_limit = 0.1 * self.max_function_evaluations
+
+            if (eqs_b >= self.bNP * self.MyEps) or (self.age >= age_limit):
+                x[: self.bNP] = self.rng_initialization.uniform(
+                    self.initial_lower_boundary,
+                    self.initial_upper_boundary,
+                    (self.bNP, self.ndim_problem),
+                )
+                y[: self.bNP] = np.array(
+                    [
+                        self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i])
+                        for i, xi in enumerate(x[: self.bNP])
+                    ]
+                )
+                self.F[: self.bNP] = self.Finit
+                self.Cr[: self.bNP] = self.CRinit
+                self.age = 0
+
+        # P_s Reinitialization Check
+        if self.sNP > 0:
+            # Safely find the best in the small population
+            best_s_idx = self.bNP + np.argmin(y[self.bNP :])
+            eqs_s = np.sum(np.abs(y[self.bNP :] - y[best_s_idx]) < self.eps)
+
+            if eqs_s >= self.sNP * self.MyEps:
+                best_x_s = x[best_s_idx].copy()
+                best_y_s = y[best_s_idx]
+
+                x[self.bNP :] = self.rng_initialization.uniform(
+                    self.initial_lower_boundary,
+                    self.initial_upper_boundary,
+                    (self.sNP, self.ndim_problem),
+                )
+                y[self.bNP :] = np.array(
+                    [
+                        self._evaluate_fitness(xi, args, F=self.F[i], Cr=self.Cr[i])
+                        for i, xi in enumerate(x[self.bNP :])
+                    ]
+                )
+                self.F[self.bNP :] = self.Finit
+                self.Cr[self.bNP :] = self.CRinit
+
+                # Elitism: retain the best small-population individual
+                x[self.bNP], y[self.bNP] = best_x_s, best_y_s
+
+        # Big Population Generation
+        if self.bNP > 0:
+            x, y = self._evolve_population(x, y, args, is_big=True)
+
+        # Migration
+        # The best individual migrates from P_b to P_s
+        if self.bNP > 0 and self.sNP > 0:
+            best_overall_idx = np.argmin(y)
+            if best_overall_idx < self.bNP:
+                worst_s_idx = self.bNP + np.argmax(y[self.bNP :])
+                x[worst_s_idx] = x[best_overall_idx].copy()
+                y[worst_s_idx] = y[best_overall_idx]
+                self.F[worst_s_idx] = self.F[best_overall_idx]
+                self.Cr[worst_s_idx] = self.Cr[best_overall_idx]
+
+        # Small Population Generation (repeats m times)
+        if self.sNP > 0:
+            # m is traditionally bNP // sNP, but must fallback cleanly if bNP is 0
+            m = self.bNP // self.sNP if self.bNP > 0 else 1
+            m = max(1, m)  # Ensure it executes at least once if P_s is all we have
+            for _ in range(m):
+                x, y = self._evolve_population(x, y, args, is_big=False)
+
+        self._n_generations += 1
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        fitness = super().optimize(fitness_function)
+        x, y = self.initialize(
+            args, self.start_conditions.get("x"), self.start_conditions.get("y")
+        )
+
+        self.best_so_far_y = np.min(y)
+
+        while True:
+            old_evals = self.n_function_evaluations
+            x, y = self.iterate(x, y, args)
+            self.results.update({"x": x, "y": y, "Cr": self.Cr[:], "F": self.F[:]})
+            if self._check_terminations() or self.n_function_evaluations == old_evals:
+                break
+
+        return self._collect(fitness, y)
+
+    def set_data(self, x=None, y=None, *args, **kwargs):
+        if x is None or y is None:
+            self.start_conditions = {"x": None, "y": None}
+        elif not isinstance(y, np.ndarray):
+            self.start_conditions = {}
+        else:
+            indices = np.argsort(y)[: self.n_individuals]
+            self.start_conditions = {"x": x[indices], "y": y[indices]}
+            Cr = kwargs.get("Cr")
+            if Cr is not None:
+                self.Cr = Cr[indices]
+            F = kwargs.get("F")
+            if F is not None:
+                self.F = F[indices]
+        self.best_so_far_x = kwargs.get("best_x", None)
+        self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/MADDE.py b/dynamicalgorithmselection/optimizers/DE/MADDE.py
new file mode 100644
index 0000000..27bf266
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/MADDE.py
@@ -0,0 +1,326 @@
+import numpy as np
+from dynamicalgorithmselection.optimizers.DE.DE import DE
+
+
+class MADDE(DE):
+    start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx", "pm"]
+
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+        D = self.ndim_problem
+        # Constants from MadDE paper/original code
+        self.Nmax = int(np.round(2 * (D**2)))
+        # self.Nmax = self.n_individuals if self.n_individuals else 170
+        self.Nmin = options.get("Nmin", 4)
+        self.p = 0.18
+        self.PqBX = 0.01
+
+        # Adaptive strategy probabilities
+        self.pm = np.ones(3) / 3
+
+        # Archive and Memory
+        self.A_rate = 2.30
+        self.NA = int(np.round(self.A_rate * self.Nmax))
+        self.archive = np.empty((0, self.ndim_problem))
+
+        # Memory for F and Cr
+        self.memory_size = 10 * D
+        self.MF = np.ones(self.memory_size) * 0.2
+        self.MCr = np.ones(self.memory_size) * 0.2
+        self.k_idx = 0
+
+    def initialize(self, args=None, x=None, y=None):
+        if x is None:
+            x = self.rng_initialization.uniform(
+                self.initial_lower_boundary,
+                self.initial_upper_boundary,
+                (self.n_individuals, self.ndim_problem),
+            )
+        if y is None:
+            y = np.array(
+                [
+                    self._evaluate_fitness(
+                        xi,
+                        args,
+                    )
+                    for xi in x
+                ]
+            )
+        return x, y
+
+    def _choose_F_Cr(self, NP):
+        indices = self.rng_optimization.integers(0, self.memory_size, size=NP)
+
+        Cr = np.zeros(NP)
+        for i, idx in enumerate(indices):
+            if self.MCr[idx] == -1.0:  # Check for terminal state
+                Cr[i] = 0.0
+            else:
+                Cr[i] = self.rng_optimization.normal(loc=self.MCr[idx], scale=0.1)
+
+        Cr = np.clip(Cr, 0, 1)
+
+        # Cauchy-like sampling for F
+        F = self.MF[indices] + 0.1 * np.tan(
+            np.pi * (self.rng_optimization.random(NP) - 0.5)
+        )
+        while np.any(F <= 0):
+            idx = np.where(F <= 0)[0]
+            F[idx] = self.MF[indices[idx]] + 0.1 * np.tan(
+                np.pi * (self.rng_optimization.random(len(idx)) - 0.5)
+            )
+        return Cr, np.minimum(1.0, F)
+
+    def _mutate(self, x, y, F, strategy_idx, q, Fa):
+        NP = x.shape[0]
+        dim = self.ndim_problem
+        v = np.zeros_like(x)
+
+        # Indices for 3 strategies
+        m0 = strategy_idx == 0
+        m1 = strategy_idx == 1
+        m2 = strategy_idx == 2
+
+        # p-best and q-best sets
+        order = np.argsort(y)
+        p_best = x[order[: max(int(self.p * NP), 2)]]
+        q_best = x[order[: max(int(q * NP), 2)]]
+
+        # Strategy 0: Current-to-pbest/1 with archive
+        if np.any(m0):
+            v[m0] = self._ctb_w_arc(x[m0], p_best, self.archive, F[m0])
+
+        # Strategy 1: Current-to-rand/1 with archive
+        if np.any(m1):
+            v[m1] = self._ctr_w_arc(x[m1], self.archive, F[m1])
+
+        # Strategy 2: Weighted Rand-to-best
+        if np.any(m2):
+            v[m2] = self._weighted_rtb(x[m2], q_best, F[m2], Fa)
+
+        return v
+
+    def iterate(self, x=None, y=None, args=None):
+        if x is None or y is None:
+            raise ValueError("x and y must be provided for iteration.")
+        NP = x.shape[0]
+        dim = self.ndim_problem
+        FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
+
+        # Linear parameters for MadDE
+        q = 2 * self.p - self.p * FEs / MaxFEs
+        Fa = 0.5 + 0.5 * FEs / MaxFEs
+
+        # Parameter sampling
+        Cr, F = self._choose_F_Cr(NP)
+        mu = self.rng_optimization.choice(3, size=NP, p=self.pm)
+
+        # Mutation
+        v = self._mutate(x, y, F, mu, q, Fa)
+
+        # Boundary handling (MadDE specific)
+        low, high = self.lower_boundary, self.upper_boundary
+        v = np.where(v < low, (x + low) / 2, v)
+        v = np.where(v > high, (x + high) / 2, v)
+
+        # Crossover (Binomial + qBX)
+        u = np.zeros_like(x)
+        rvs = self.rng_optimization.random(NP)
+
+        # Standard Binomial
+        bu_idx = rvs > self.PqBX
+        if np.any(bu_idx):
+            u[bu_idx] = self._binomial(x[bu_idx], v[bu_idx], Cr[bu_idx])
+
+        # quasi-Best Crossover (qBX)
+        qu_idx = rvs <= self.PqBX
+        if np.any(qu_idx):
+            # Pick qbest from combined population and archive
+            combined = np.vstack([x, self.archive]) if len(self.archive) > 0 else x
+            q_limit = max(int(q * len(combined)), 2)
+            q_best_combined = combined[
+                np.argsort(np.concatenate([y, np.full(len(self.archive), np.inf)]))[
+                    :q_limit
+                ]
+            ]
+            cross_qbest = q_best_combined[
+                self.rng_optimization.integers(0, len(q_best_combined), np.sum(qu_idx))
+            ]
+            u[qu_idx] = self._binomial(cross_qbest, v[qu_idx], Cr[qu_idx])
+
+        # Evaluation and Selection
+        new_y = np.array(
+            [
+                self._evaluate_fitness(
+                    ui,
+                    args,
+                )
+                for ui in u
+            ]
+        )
+        optim = new_y < y
+
+        if np.any(optim):
+            # Archive update
+            self.archive = np.vstack([self.archive, x[optim]])
+            if len(self.archive) > self.NA:
+                self.archive = self.archive[
+                    self.rng_optimization.choice(
+                        len(self.archive), self.NA, replace=False
+                    )
+                ]
+
+            # Memory and Strategy probability update
+            df = np.maximum(0, y - new_y)
+            self._update_memory(F[optim], Cr[optim], df[optim])
+            self._update_pm(df, mu)
+
+            x[optim], y[optim] = u[optim], new_y[optim]
+
+        x, y = self._lpsr(x, y)
+
+        self._n_generations += 1
+        return x, y
+
+    def _update_pm(self, df, mu):
+        count_S = np.zeros(3)
+        for i in range(3):
+            if np.any(mu == i):
+                count_S[i] = np.mean(df[mu == i])
+
+        if np.sum(count_S) > 0:
+            self.pm = np.maximum(
+                0.1, np.minimum(0.9, count_S / (np.sum(count_S) + 1e-15))
+            )
+            self.pm /= np.sum(self.pm)
+        else:
+            self.pm = np.ones(3) / 3
+
+    def _lpsr(self, x, y):
+        FEs, MaxFEs = self.n_function_evaluations, self.max_function_evaluations
+
+        # Prevent the ratio from exceeding 1.0 if FEs overshoots MaxFEs
+        ratio = min(1.0, FEs / MaxFEs)
+
+        # LPSR formula: N_G = round(N_max - (N_max - N_min) * ratio)
+        new_NP = int(np.round(self.Nmax - (self.Nmax - self.Nmin) * ratio))
+
+        # Clamp to ensure population never drops below Nmin
+        new_NP = max(self.Nmin, new_NP)
+
+        if new_NP < x.shape[0]:
+            idx = np.argsort(y)[:new_NP]
+            x, y = x[idx], y[idx]
+            self.n_individuals = new_NP
+
+            # Dynamically prune the archive size based on the new population
+            self.NA = int(np.round(self.A_rate * new_NP))
+
+            # Ensure NA doesn't go negative (redundant with the max clamp above, but safe)
+            self.NA = max(0, self.NA)
+
+            if len(self.archive) > self.NA:
+                self.archive = self.archive[
+                    self.rng_optimization.choice(
+                        len(self.archive), self.NA, replace=False
+                    )
+                ]
+        return x, y
+
+    # Helper mutation methods (Vectorized)
+    def _ctb_w_arc(self, x, best, archive, F):
+        NP = x.shape[0]
+        xb = best[self.rng_optimization.integers(0, len(best), NP)]
+        r1 = self.rng_optimization.integers(0, NP, NP)
+        combined = np.vstack([x, archive]) if len(archive) > 0 else x
+        r2 = self.rng_optimization.integers(0, len(combined), NP)
+        return (
+            x + F[:, np.newaxis] * (xb - x) + F[:, np.newaxis] * (x[r1] - combined[r2])
+        )
+
+    def _ctr_w_arc(self, x, archive, F):
+        NP = x.shape[0]
+        r1 = self.rng_optimization.integers(0, NP, NP)
+        combined = np.vstack([x, archive]) if len(archive) > 0 else x
+        r2 = self.rng_optimization.integers(0, len(combined), NP)
+        return x + F[:, np.newaxis] * (x[r1] - combined[r2])
+
+    def _weighted_rtb(self, x, best, F, Fa):
+        NP = x.shape[0]
+        xb = best[self.rng_optimization.integers(0, len(best), NP)]
+        r1 = self.rng_optimization.integers(0, NP, NP)
+        r2 = self.rng_optimization.integers(0, NP, NP)
+        return F[:, np.newaxis] * x[r1] + (F * Fa)[:, np.newaxis] * (xb - x[r2])
+
+    def _binomial(self, x, v, Cr):
+        NP, dim = x.shape
+        jrand = self.rng_optimization.integers(dim, size=NP)
+        mask = self.rng_optimization.random((NP, dim)) < Cr[:, np.newaxis]
+        u = np.where(mask, v, x)
+        u[np.arange(NP), jrand] = v[np.arange(NP), jrand]
+        return u
+
+    def _update_memory(self, SF, SCr, df):
+        if len(SF) > 0:
+            w = df / (np.sum(df) + 1e-15)
+
+            # Weighted Lehmer mean for F
+            self.MF[self.k_idx] = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15)
+
+            # Terminal condition check for Cr
+            if self.MCr[self.k_idx] == -1.0 or np.max(SCr) == 0:
+                self.MCr[self.k_idx] = -1.0  # Terminal state \perp
+            else:
+                # Weighted Lehmer mean for Cr
+                self.MCr[self.k_idx] = np.sum(w * (SCr**2)) / (np.sum(w * SCr) + 1e-15)
+
+            self.k_idx = (self.k_idx + 1) % self.memory_size
+        else:
+            # Memory reset rule if no successful trials
+            self.MF[self.k_idx] = 0.5
+            self.MCr[self.k_idx] = 0.5
+
+    def optimize(self, fitness_function=None, args=None):
+        fitness = super().optimize(fitness_function)
+        x, y = self.initialize(
+            args, self.start_conditions.get("x"), self.start_conditions.get("y")
+        )
+        while True:
+            self._print_verbose_info(fitness, y)
+            x, y = self.iterate(x, y, args)
+            self.results.update(
+                {
+                    "x": x,
+                    "y": y,
+                    "archive": self.archive,
+                    "MF": self.MF,
+                    "MCr": self.MCr,
+                    "k_idx": self.k_idx,
+                    "pm": self.pm,
+                }
+            )
+            if self._check_terminations():
+                break
+        return self._collect(fitness, y)
+
+    def set_data(
+        self,
+        x=None,
+        y=None,
+        *args,
+        **kwargs,
+    ):
+        if x is None or y is None:
+            self.start_conditions = {"x": None, "y": None}
+        elif not isinstance(y, np.ndarray):
+            self.start_conditions = {}
+        else:
+            indices = np.argsort(y)[: self.n_individuals]
+            start_conditions = {}
+            start_conditions.update({"x": x[indices], "y": y[indices]})
+            self.start_conditions = start_conditions
+            for var in ["archive", "MF", "MCr", "k_idx", "pm"]:
+                if var in kwargs:
+                    setattr(self, var, kwargs[var])
+        self.best_so_far_x = kwargs.get("best_x", None)
+        self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
new file mode 100644
index 0000000..150ee6d
--- /dev/null
+++ b/dynamicalgorithmselection/optimizers/DE/NL_SHADE_RSP.py
@@ -0,0 +1,284 @@
+import numpy as np
+from dynamicalgorithmselection.optimizers.DE.DE import DE
+
+
+class NL_SHADE_RSP(DE):
+    start_condition_parameters = ["x", "y", "archive", "MF", "MCr", "k_idx", "pa"]
+
+    def __init__(self, problem, options):
+        super().__init__(problem, options)
+        self.Nmax = options.get("Nmax", 30 * self.ndim_problem)
+        self.Nmin = options.get("Nmin", 4)
+        self.n_individuals = self.Nmax
+
+        self.pa = 0.5
+
+        # Archive
+        self.NA = int(self.Nmax * 2.1)
+        self.archive = np.empty((0, self.ndim_problem))
+
+        # Memory MF and MCr
+        self.memory_size = self.ndim_problem * 20
+        self.MF = np.ones(self.memory_size) * 0.2
+        self.MCr = np.ones(self.memory_size) * 0.2
+        self.k_idx = 0
+
+    def initialize(self, args=None, x=None, y=None):
+        if x is None:
+            x = self.rng_initialization.uniform(
+                self.initial_lower_boundary,
+                self.initial_upper_boundary,
+                (self.n_individuals, self.ndim_problem),
+            )
+        if y is None:
+            y = np.array(
+                [
+                    self._evaluate_fitness(
+                        xi,
+                        args,
+                    )
+                    for xi in x
+                ]
+            )
+        self.memory_size = len(self.MF)
+        return x, y
+
+    def _sample_cauchy(self, loc, scale, size):
+        """Manual Cauchy sampling: loc + scale * tan(pi * (rand - 0.5))"""
+        rand = self.rng_optimization.random(size)
+        return loc + scale * np.tan(np.pi * (rand - 0.5))
+
+    def _choose_F_Cr(self, NP):
+        ind_r = self.rng_optimization.integers(0, self.memory_size, size=NP)
+        # Crossover Rate (Normal)
+        Cr = self.rng_optimization.normal(loc=self.MCr[ind_r], scale=0.1, size=NP)
+        Cr = np.clip(Cr, 0, 1)
+        # Step Length (Cauchy)
+        cauchy_locs = self.MF[ind_r]
+        F = self._sample_cauchy(cauchy_locs, 0.1, NP)
+        # Symmetry correction for negative values
+        attempts = 0
+        while np.any(F <= 0) and attempts < 100:
+            idx = np.where(F <= 0)[0]
+            F[idx] = self._sample_cauchy(cauchy_locs[idx], 0.1, len(idx))
+            attempts += 1
+        return Cr, np.minimum(1, F)
+
+    def _update_memory(self, SF, SCr, df):
+        if len(SF) > 0:
+            w = df / np.sum(df)
+            # Weighted Lehmer Mean for F
+            self.MF[self.k_idx] = np.sum(w * (SF**2)) / (np.sum(w * SF) + 1e-15)
+            # Weighted Arithmetic Mean for Cr
+            self.MCr[self.k_idx] = np.sum(w * SCr)
+            self.k_idx = (self.k_idx + 1) % self.memory_size
+
+    def iterate(self, x=None, y=None, args=None):
+        if x is None or y is None:
+            raise ValueError("x and y must be provided for iteration.")
+        NP = x.shape[0]
+
+        # Sort population according to fitness for RSP and Crossover mapping
+        sort_idx = np.argsort(y)
+        x = x[sort_idx]
+        y = y[sort_idx]
+
+        Cr, F = self._choose_F_Cr(NP)
+
+        # Sort Cr so better individuals get smaller Cr (for exponential crossover)
+        Cr = np.sort(Cr)
+
+        # Adaptive greediness pb (from 0.4 to 0.2)
+        nfe_ratio = self.n_function_evaluations / self.max_function_evaluations
+        pb = 0.4 - 0.2 * nfe_ratio
+        pb_upper = max(2, int(np.round(NP * pb)))
+
+        # Adaptive Cr_b for binomial crossover
+        Cr_b = 0.0 if nfe_ratio < 0.5 else 2.0 * (nfe_ratio - 0.5)
+
+        # Rank-based probabilities for r2 (RSP)
+        ranks = np.exp(-np.arange(NP) / NP)
+        pr = ranks / np.sum(ranks)
+
+        x2 = np.zeros_like(x)
+        use_arc = self.rng_optimization.random(NP) < self.pa
+
+        r1 = np.zeros(NP, dtype=int)
+        r2 = np.zeros(NP, dtype=int)
+        pbest_idx = np.zeros(NP, dtype=int)
+
+        for i in range(NP):
+            # pbest index
+            valid_pbest = [j for j in range(pb_upper) if j != i]
+            pb_i = int(self.rng_optimization.choice(valid_pbest)) if valid_pbest else i
+            pbest_idx[i] = pb_i
+
+            # r1 index (uniform)
+            valid_r1 = [j for j in range(NP) if j not in (i, pb_i)]
+            r1_i = (
+                int(self.rng_optimization.choice(valid_r1))
+                if valid_r1
+                else self.rng_optimization.integers(0, NP)
+            )
+            r1[i] = r1_i
+
+            # r2 index (archive or RSP)
+            if use_arc[i] and len(self.archive) > 0:
+                r2[i] = self.rng_optimization.integers(0, len(self.archive))
+                x2[i] = self.archive[r2[i]]
+            else:
+                use_arc[i] = False
+                valid_r2 = [j for j in range(NP) if j not in (i, pb_i, r1_i)]
+
+                if valid_r2:
+                    # Re-normalize RSP probabilities for the remaining valid choices
+                    valid_pr = pr[valid_r2] / np.sum(pr[valid_r2])
+                    r2_i = int(self.rng_optimization.choice(valid_r2, p=valid_pr))
+                else:
+                    r2_i = self.rng_optimization.integers(0, NP)
+
+                r2[i] = r2_i
+                x2[i] = x[r2_i]
+
+        # Generate Trials: current-to-pbest/1
+        x_pbest = x[pbest_idx]
+        vs = x + F[:, np.newaxis] * (x_pbest - x) + F[:, np.newaxis] * (x[r1] - x2)
+        vs = np.clip(vs, self.lower_boundary, self.upper_boundary)
+
+        # Dual Crossover Handling
+        us = np.copy(x)
+        for i in range(NP):
+            if self.rng_optimization.random() < 0.5:
+                # Binomial crossover with Cr_b
+                jrand = self.rng_optimization.integers(self.ndim_problem)
+                for j in range(self.ndim_problem):
+                    if self.rng_optimization.random() < Cr_b or j == jrand:
+                        us[i, j] = vs[i, j]
+            else:
+                # Exponential crossover with Cr_i
+                n1 = self.rng_optimization.integers(self.ndim_problem)
+                n2 = 1
+                while self.rng_optimization.random() < Cr[i] and n2 < self.ndim_problem:
+                    n2 += 1
+                for j in range(n2):
+                    idx = (n1 + j) % self.ndim_problem
+                    us[i, idx] = vs[i, idx]
+
+        # Selection
+
+        new_y = np.array(
+            [
+                self._evaluate_fitness(
+                    ui,
+                    args,
+                )
+                for ui in us
+            ]
+        )
+        better_idx = np.where(new_y < y)[0]
+
+        if len(better_idx) > 0:
+            # Update Archive Probability (pa)
+            df = y[better_idx] - new_y[better_idx]
+            arc_used_better = use_arc[better_idx]
+
+            df_A = np.sum(df[arc_used_better])
+            df_P = np.sum(df[~arc_used_better])
+            n_A_total = np.sum(use_arc)
+            n_P_total = NP - n_A_total
+
+            mean_A = df_A / n_A_total if n_A_total > 0 else 0
+            mean_P = df_P / n_P_total if n_P_total > 0 else 0
+
+            if mean_A + mean_P > 0:
+                self.pa = mean_A / (mean_A + mean_P)
+            self.pa = np.clip(self.pa, 0.1, 0.9)  # Clipping rule applied
+
+            # Update Archive
+            success_x = x[better_idx]
+            self.archive = np.vstack([self.archive, success_x])
+            if len(self.archive) > self.NA:
+                # Remove random individuals
+                remove_idx = self.rng_optimization.choice(
+                    len(self.archive), len(self.archive) - self.NA, replace=False
+                )
+                self.archive = np.delete(self.archive, remove_idx, axis=0)
+
+            # Record successes for memory update
+            self._update_memory(F[better_idx], Cr[better_idx], df)
+
+            x[better_idx] = us[better_idx]
+            y[better_idx] = new_y[better_idx]
+
+        # NLPSR (Non-Linear Population Size Reduction)
+        FEs = self.n_function_evaluations
+        MaxFEs = self.max_function_evaluations
+        nfe_ratio_nlpsr = FEs / MaxFEs
+        new_NP = int(
+            np.round(
+                (self.Nmin - self.Nmax)
+                * np.power(nfe_ratio_nlpsr, 1.0 - nfe_ratio_nlpsr)
+                + self.Nmax
+            )
+        )
+        new_NP = max(self.Nmin, new_NP)
+
+        if new_NP < NP:
+            sort_idx_final = np.argsort(y)
+            x = x[sort_idx_final][:new_NP]
+            y = y[sort_idx_final][:new_NP]
+            self.n_individuals = new_NP
+            self.NA = int(max(new_NP * 2.1, self.Nmin))
+
+        self._n_generations += 1
+        return x, y
+
+    def optimize(self, fitness_function=None, args=None):
+        fitness = DE.optimize(self, fitness_function)
+
+        x = self.start_conditions.get("x", None)
+        y = self.start_conditions.get("y", None)
+
+        x, y = self.initialize(args, x, y)
+
+        while True:
+            old_evals = self.n_function_evaluations
+            self._print_verbose_info(fitness, y)
+            x, y = self.iterate(x, y, args)
+            self.results.update(
+                {
+                    "x": x,
+                    "y": y,
+                    "archive": self.archive[:],
+                    "MF": self.MF[:],
+                    "MCr": self.MCr[:],
+                    "k_idx": self.k_idx,
+                    "pa": self.pa,
+                }
+            )
+            if self._check_terminations() or self.n_function_evaluations == old_evals:
+                break
+
+        return self._collect(fitness, y)
+
+    def set_data(
+        self,
+        x=None,
+        y=None,
+        *args,
+        **kwargs,
+    ):
+        if x is None or y is None:
+            self.start_conditions = {"x": None, "y": None}
+        elif not isinstance(y, np.ndarray):
+            self.start_conditions = {}
+        else:
+            indices = np.argsort(y)[: self.n_individuals]
+            start_conditions = {}
+            start_conditions.update({"x": x[indices], "y": y[indices]})
+            self.start_conditions = start_conditions
+            for var in ["archive", "MF", "MCr", "k_idx", "pa"]:
+                if var in kwargs:
+                    setattr(self, var, kwargs[var])
+        self.best_so_far_x = kwargs.get("best_x", None)
+        self.best_so_far_y = kwargs.get("best_y", float("inf"))
diff --git a/dynamicalgorithmselection/optimizers/DE/__init__.py b/dynamicalgorithmselection/optimizers/DE/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dynamicalgorithmselection/optimizers/DS/POWELL.py b/dynamicalgorithmselection/optimizers/DS/POWELL.py
index decac18..9041bc7 100644
--- a/dynamicalgorithmselection/optimizers/DS/POWELL.py
+++ b/dynamicalgorithmselection/optimizers/DS/POWELL.py
@@ -113,20 +113,16 @@ class POWELL(DS):
     def __init__(self, problem, options):
         DS.__init__(self, problem, options)
         self._func = None  # only for inner line searcher
-        self.y_history = []
-        self.x_history = []
 
     def initialize(self, x=None, y=None, u=None, args=None, is_restart=False):
         x = (
             self._initialize_x(is_restart) if x is None else x
         )  # initial (starting) search point
-        y = self._evaluate_fitness(x, args, u=u) if y is None else y  # fitness
+        y = self._evaluate_fitness(x, args) if y is None else y  # fitness
         u = np.identity(self.ndim_problem) if u is None else u
-        self.y_history.append(y)
-        self.x_history.append(x)
 
         def _wrapper(xx):
-            return self._evaluate_fitness(xx, args, u=u)
+            return self._evaluate_fitness(xx, args)
 
         self._func = _wrapper
         return x, y, u, y
@@ -147,6 +143,8 @@ def _func(alpha):  # only for line search
         return y, x + d, d, yy
 
     def iterate(self, x=None, y=None, u=None, args=None):
+        if x is None or y is None:
+            raise ValueError("x and y must be provided for iteration.")
         xx, yy = np.copy(x), np.copy(y)
         big_ind, delta, ys = 0, 0.0, []
         for i in range(self.ndim_problem):
@@ -154,8 +152,6 @@ def iterate(self, x=None, y=None, u=None, args=None):
                 return x, y, u, ys
             d, diff = u[i], y
             y, x, d, fitness = self._line_search(x, d)
-            self.y_history.append(y)
-            self.x_history.append(x)
             ys.extend(fitness)
             diff -= y
             if diff > delta:
@@ -163,9 +159,7 @@ def iterate(self, x=None, y=None, u=None, args=None):
         d = x - xx  # extrapolated point
         _, ratio_e = _line_for_search(x, d, self.lower_boundary, self.upper_boundary)
         xxx = x + min(ratio_e, 1.0) * d
-        yyy = self.fitness_function(xxx)
-        self.y_history.append(yyy)
-        self.x_history.append(xxx)
+        yyy = self._evaluate_fitness(xxx, args=args)
         if yy > yyy:
             t, temp = 2.0 * (yy + yyy - 2.0 * y), yy - y - delta
             t *= np.square(temp)
@@ -173,8 +167,6 @@ def iterate(self, x=None, y=None, u=None, args=None):
             t -= delta * np.square(temp)
             if t < 0.0:
                 y, x, d, fitness = self._line_search(x, d)
-                self.y_history.append(y)
-                self.x_history.append(x)
                 ys.extend(fitness)
                 if np.any(d):
                     u[big_ind] = u[-1]
@@ -215,26 +207,21 @@ def set_data(
             u = None
         self.start_conditions = {
             "x": x,
-            "Y": y,
+            "y": y,
             "u": u,
         }
         self.best_so_far_x = kwargs.get("best_x", None)
         self.best_so_far_y = kwargs.get("best_y", float("inf"))
 
     def get_data(self, n_individuals: Optional[int] = None):
-        best_indices = sorted(
-            [i for i in range(len(self.y_history))],
-            key=lambda x: self.y_history[x],
-        )[:n_individuals]
-        x = np.array(self.x_history)[best_indices]
-        y = np.array(self.y_history)[best_indices]
+        indices = np.argsort(self.y_history)[: min(len(self.y_history), 200)]
+        x = np.array(self.x_history)[indices]
+        y = np.array(self.y_history)[indices]
         return (
             self.results
             | {
-                {
-                    "x": x,
-                    "Y": y,
-                }
+                "x": x,
+                "y": y,
             }
             or self.start_conditions
         )
diff --git a/dynamicalgorithmselection/optimizers/ES/CMAES.py b/dynamicalgorithmselection/optimizers/ES/CMAES.py
index 70cb864..fa10171 100644
--- a/dynamicalgorithmselection/optimizers/ES/CMAES.py
+++ b/dynamicalgorithmselection/optimizers/ES/CMAES.py
@@ -41,9 +41,6 @@ def __init__(self, problem, options):
             None,
             2.0,
         )  # for CMA (c_w -> c_μ)
-        self._save_eig = options.get(
-            "_save_eig", False
-        )  # whether or not save eigenvalues and eigenvectors
 
     def _set_c_c(self):
         """Set decay rate of evolution path for the rank-one update of CMA."""
@@ -161,7 +158,11 @@ def iterate(
             )  # Gaussian noise for mutation
             d[k] = np.dot(e_ve @ np.diag(e_va), z)
             x[k] = mean + self.sigma * d[k]  # offspring individual
-            y[k] = self._evaluate_fitness(x[k], args, d=d[k], e_ve=e_ve, e_va=e_va)
+            y[k] = self._evaluate_fitness(
+                x[k],
+                args,
+                d=d[k],
+            )
         return x, y, d
 
     def update_distribution(
@@ -314,23 +315,8 @@ def optimize(
                 "mean": mean,
             }
         )
-        self.results.update(
-            {
-                "p_c": p_c,
-                "p_s": p_s,
-                "cm": cm,
-                "e_va": e_va,
-                "e_ve": e_ve,
-                "d": d,
-                "x": x,
-                "y": y,
-                "mean": mean,
-            }
-        )
         results = self._collect(fitness, y, mean)
-        # by default do *NOT* save eigenvalues and eigenvectors (with *quadratic* space complexity)
-        if self._save_eig:
-            results["e_va"], results["e_ve"] = e_va, e_ve
+        results["e_va"], results["e_ve"] = e_va, e_ve
         return results
 
     def set_data(
@@ -373,12 +359,13 @@ def set_data(
                     "cm",
                     "e_ve",
                     "e_va",
-                    "d",
                 ]
             }
+            start_conditions["d"] = d[indices] if d is not None else None
+
             mean = x[indices].mean(axis=0)
             stds = np.std(x[indices], axis=0)
-            sigma = np.max(stds)
+            sigma: float = np.max(stds)
             sigma = max(sigma, 1e-8)
             start_conditions.update(
                 {"x": x[indices], "y": y[indices], "mean": mean, "sigma": sigma}
diff --git a/dynamicalgorithmselection/optimizers/ES/LMCMAES.py b/dynamicalgorithmselection/optimizers/ES/LMCMAES.py
index 7f3f7e5..d857312 100644
--- a/dynamicalgorithmselection/optimizers/ES/LMCMAES.py
+++ b/dynamicalgorithmselection/optimizers/ES/LMCMAES.py
@@ -86,14 +86,13 @@ def iterate(self, mean=None, x=None, pm=None, vm=None, y=None, b=None, args=None
                 z = self.rng_optimization.standard_normal((self.ndim_problem,))
                 a_z = self._a_z(z, pm, vm, b)
 
-            # FIX 2: Check for potential overflow before update
             mutation_step = sign * self.sigma * a_z
             if np.any(np.isnan(mutation_step)) or np.any(np.isinf(mutation_step)):
                 # Fallback to prevent crash, effectively skipping this mutation
                 mutation_step = np.zeros_like(mutation_step)
 
             x[k] = mean + mutation_step
-            y[k] = self._evaluate_fitness(x[k], args, pm=pm, vm=vm, b=b)
+            y[k] = self._evaluate_fitness(x[k], args)
             sign *= -1
         return x, y
 
@@ -119,7 +118,6 @@ def _update_distribution(
     ):
         mean_bak = np.dot(self._w, x[np.argsort(y)[: self.n_parents]])
 
-        # FIX 3: Safety clamp for sigma to prevent division by zero or overflow
         safe_sigma = np.clip(self.sigma, 1e-20, 1e20)
 
         p_c = self._p_c_1 * p_c + self._p_c_2 * (mean_bak - mean) / safe_sigma
@@ -147,7 +145,6 @@ def _update_distribution(
             vm[self._j[i]] = self._a_inv_z(pm[self._j[i]], vm, d, i)
             v_n = np.dot(vm[self._j[i]], vm[self._j[i]])
 
-            # FIX 4: Safety clamp for v_n (denominator safety)
             # If v_n is 0 or NaN, b and d will explode.
             if v_n < 1e-20:
                 v_n = 1e-20
diff --git a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
index c1b4d08..7dfa552 100644
--- a/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
+++ b/dynamicalgorithmselection/optimizers/ES/OPOA2015.py
@@ -23,8 +23,6 @@ def cholesky_update(rm, z, downdate):
 
 class OPOA2015(ES):
     def __init__(self, problem, options):
-        self.mean_history = []
-        self.y_history = []
         options["n_individuals"] = 1  # mandatory setting
         options["n_parents"] = 1  # mandatory setting
         ES.__init__(self, problem, options | {"sigma": 0.9})
@@ -56,9 +54,6 @@ def initialize(
             self._evaluate_fitness(
                 x=mean,
                 args=args,
-                cf=cf,
-                p_s=p_s,
-                p_c=p_c,
             )
             if y is None
             else y
@@ -99,9 +94,6 @@ def iterate(
         y = self._evaluate_fitness(
             x=x,
             args=args,
-            cf=cf,
-            p_s=p_s,
-            p_c=p_c,
         )
         if y <= best_so_far_y:
             self._ancestors.append(y)
@@ -125,14 +117,14 @@ def iterate(
         self._n_generations += 1
         self.results.update(
             {
+                "x": np.array([x]),
+                "mean": mean,
                 "cf": cf,
                 "best_so_far_y": best_so_far_y,
                 "p_s": p_s,
                 "p_c": p_c,
             }
         )
-        self.mean_history.append(mean)
-        self.y_history.append(y)
         return mean, y, cf, best_so_far_y, p_s, p_c
 
     def restart_reinitialize(
@@ -161,7 +153,9 @@ def restart_reinitialize(
             self._list_generations.append(self._n_generations)  # for each restart
             self._n_generations = 0
             self.sigma = np.copy(self._sigma_bak)
-            mean, y, cf, best_so_far_y, p_s, p_c = self.initialize(args, True)
+            mean, y, cf, best_so_far_y, p_s, p_c = self.initialize(
+                args, is_restart=True
+            )
             self._list_fitness = [best_so_far_y]
             self._ancestors = []
         return mean, y, cf, best_so_far_y, p_s, p_c
@@ -177,7 +171,13 @@ def optimize(self, fitness_function=None, args=None):
         y = self.start_conditions.get("y", None)
 
         mean, y, cf, best_so_far_y, p_s, p_c = self.initialize(
-            mean, y, cf, best_so_far_y, p_s, p_c, args
+            mean=mean,
+            y=y,
+            cf=cf,
+            best_so_far_y=best_so_far_y,
+            p_s=p_s,
+            p_c=p_c,
+            args=args,
         )
         while not self._check_terminations():
             self._print_verbose_info(fitness, y)
@@ -201,15 +201,24 @@ def set_data(
         *args,
         **kwargs,
     ):
+        if isinstance(y, np.ndarray) and x is not None:
+            best_idx = np.argmin(y)
+            y_val = float(y[best_idx])
+            x_val = x[best_idx]
+            mean = x_val if mean is None else mean
+        else:
+            y_val = y if isinstance(y, float) else None
+            x_val = x if x is not None else mean
+
         mean = (
             mean
             if mean is not None
             else (np.mean(x, axis=0) if x is not None else None)
         )
-        y = y if isinstance(y, float) else None
         self.start_conditions = {
+            "x": x_val,
             "mean": mean,
-            "y": y,
+            "y": y_val,
             "cf": cf,
             "best_so_far_y": best_so_far_y,
             "p_s": p_s,
@@ -220,11 +229,7 @@ def set_data(
         self.best_so_far_y = kwargs.get("best_y", float("inf"))
 
     def get_data(self, n_individuals: Optional[int] = None):
-        pop_data = ["x", "y"]
-        best_indices = sorted(
-            [i for i in range(len(self.y_history))],
-            key=lambda x: self.y_history[x],
-        )[:n_individuals]
-        x = np.array(self.mean_history)[best_indices]
-        y = np.array(self.y_history)[best_indices]
+        indices = np.argsort(self.y_history)[: min(len(self.y_history), 200)]
+        x = np.array(self.x_history)[indices]
+        y = np.array(self.y_history)[indices]
         return self.results | {"x": x, "y": y} or self.start_conditions
diff --git a/dynamicalgorithmselection/optimizers/Optimizer.py b/dynamicalgorithmselection/optimizers/Optimizer.py
index 13a8edd..f989f31 100644
--- a/dynamicalgorithmselection/optimizers/Optimizer.py
+++ b/dynamicalgorithmselection/optimizers/Optimizer.py
@@ -1,8 +1,8 @@
 import time
-from typing import Optional
+from typing import Optional, Any, Dict, List, Tuple
 
 import numpy as np
-from pypop7.optimizers.core import Optimizer as BaseOptimizer
+from pypop7.optimizers.core import Optimizer as BaseOptimizer, Terminations
 
 
 class Optimizer(BaseOptimizer):
@@ -10,55 +10,64 @@ class Optimizer(BaseOptimizer):
 
     def __init__(self, problem, options):
         BaseOptimizer.__init__(self, problem, options)
-        self.fitness_history = []
-        self.start_conditions = dict()
-        self.results = dict()
-        self.worst_so_far_y, self.worst_so_far_x = (
-            options.get("worst_so_far_y", -np.inf),
-            None,
+        self.best_so_far_y: float = options.get("best_so_far_y", float("inf"))
+        self.best_so_far_x: Optional[np.ndarray] = None
+        self._base_early_stopping: float = self.best_so_far_y
+        self._counter_early_stopping: int = 0
+        self.early_stopping_threshold: float = options.get(
+            "early_stopping_threshold", 1e-10
         )
-        self.x_history, self.y_history = [], []
-        # [Added] Dictionary to store histories of generic parameters
-        self.parameter_history = {}
+        self.fitness_history: List[Tuple[int, float]] = []
+
+        self.start_conditions: Dict[str, Any] = dict()
+        self.results: Dict[str, Any] = dict()
+
+        self.worst_so_far_y: float = options.get("worst_so_far_y", -np.inf)
+        self.worst_so_far_x: Optional[np.ndarray] = None
+        self.x_history: List[np.ndarray] = []
+        self.y_history: List[float] = []
+        self.parameter_history: Dict[str, List[Any]] = {}
+        self.target_FE: int | float = float("inf")
 
-    # [Modified] Accept generic kwargs for history tracking
     def _evaluate_fitness(self, x, args=None, **kwargs):
         self.start_function_evaluations = time.time()
         if args is None:
             y = self.fitness_function(x)
         else:
             y = self.fitness_function(x, args=args)
+
+        y_val = float(y)
         self.time_function_evaluations += time.time() - self.start_function_evaluations
         self.n_function_evaluations += 1
-        # update best-so-far solution (x) and fitness (y)
-        if y < self.best_so_far_y:
-            self.best_so_far_x, self.best_so_far_y = np.copy(x), y
-            self.fitness_history.append((self.n_function_evaluations, float(y)))
-        if y > self.worst_so_far_y:
-            self.worst_so_far_x, self.worst_so_far_y = np.copy(x), y
+
+        # update best-so-far solution
+        if y_val < self.best_so_far_y:
+            self.best_so_far_x, self.best_so_far_y = np.copy(x), y_val
+            self.fitness_history.append((self.n_function_evaluations, y_val))
+
+        if y_val > self.worst_so_far_y:
+            self.worst_so_far_x, self.worst_so_far_y = np.copy(x), y_val
+
         # update all settings related to early stopping
-        if (self._base_early_stopping - y) <= self.early_stopping_threshold:
+        if (self._base_early_stopping - y_val) <= self.early_stopping_threshold:
             self._counter_early_stopping += 1
         else:
-            self._counter_early_stopping, self._base_early_stopping = 0, y
+            self._counter_early_stopping, self._base_early_stopping = 0, y_val
 
         self.x_history.append(np.copy(x))
-        self.y_history.append(float(y))
+        self.y_history.append(y_val)
 
-        # [Added] Generic storage for any extra parameters passed
         for key, val in kwargs.items():
             if key not in self.parameter_history:
                 self.parameter_history[key] = []
-
-            # Store copy if it's an array to prevent reference issues
             if isinstance(val, np.ndarray):
                 self.parameter_history[key].append(np.copy(val))
             else:
                 self.parameter_history[key].append(val)
 
-        return float(y)
+        return y_val
 
-    def _check_success(self):
+    def _check_success(self) -> bool:
         if (
             (self.upper_boundary is not None)
             and (self.lower_boundary is not None)
@@ -78,7 +87,7 @@ def _check_success(self):
             return False
         return True
 
-    def _collect(self, fitness):
+    def _collect(self, fitness: List[float]) -> Dict[str, Any]:  # Added type hints
         result = BaseOptimizer._collect(self, fitness)
         result.update(
             {
@@ -92,28 +101,35 @@ def _collect(self, fitness):
             }
         )
 
-        # [Added] Inject generic parameter histories into result
-        # Keys will be named like 'v_history', 'p_x_history' automatically
         for key, history in self.parameter_history.items():
             result[f"{key}_history"] = np.array(history, dtype=np.float32)
         return result
 
     def set_data(self, x=None, y=None, best_x=None, best_y=None, *args, **kwargs):
+        n_ind = getattr(self, "n_individuals", 0)
         self.start_conditions = {
-            "x": x[: self.n_individuals] if x is not None else x,
-            "y": (y[: self.n_individuals] if isinstance(y, np.ndarray) else None),
+            "x": x[:n_ind] if x is not None else x,
+            "y": (y[:n_ind] if isinstance(y, np.ndarray) else None),
             "best_x": best_x,
             "best_y": best_y,
         }
         self.best_so_far_x = best_x
-        self.best_so_far_y = best_y
+        self.best_so_far_y = float(best_y) if best_y is not None else float("inf")
 
-    def get_data(self, n_individuals: Optional[int] = None):
+    def get_data(self, n_individuals: Optional[int] = None) -> Dict[str, Any]:
         return self.results or self.start_conditions
 
-    def optimize(self, fitness_function=None):
+    def optimize(self, fitness_function=None) -> List[float]:
         self.start_time = time.time()
         if fitness_function is not None:
             self.fitness_function = fitness_function
-        fitness = []  # to store all fitness generated during evolution/optimization
+        fitness: List[float] = []
         return fitness
+
+    def _check_terminations(self) -> bool:
+        termination_signal = super()._check_terminations()
+        if not termination_signal:
+            termination_signal = bool(self.n_function_evaluations >= self.target_FE)
+            if termination_signal:
+                self.termination_signal = Terminations.MAX_FUNCTION_EVALUATIONS
+        return termination_signal
diff --git a/dynamicalgorithmselection/optimizers/RestartOptimizer.py b/dynamicalgorithmselection/optimizers/RestartOptimizer.py
index b26a8b0..8e6a61a 100644
--- a/dynamicalgorithmselection/optimizers/RestartOptimizer.py
+++ b/dynamicalgorithmselection/optimizers/RestartOptimizer.py
@@ -1,14 +1,22 @@
-from typing import Type
+from typing import Type, TypeVar
 
 from dynamicalgorithmselection.optimizers.Optimizer import Optimizer
 
+# Create a TypeVar that is bound to the Optimizer base class
+T = TypeVar("T", bound=Optimizer)
 
-def restart_optimizer(base: Type[Optimizer]):
-    class RestartOptimizer(base):
+
+def restart_optimizer(base: Type[T]) -> Type[T]:
+    class RestartOptimizer(base):  # type: ignore[misc, valid-type]
         def set_data(self, x=None, y=None, best_x=None, best_y=None, *args, **kwargs):
+            # We override this to do nothing, effectively "restarting"
+            # or ignoring previous state transitions.
             pass
 
     new_name = f"{base.__name__}Restart"
     RestartOptimizer.__name__ = new_name
     RestartOptimizer.__qualname__ = new_name
+
+    # Casting or returning as Type[T] ensures mypy sees the
+    # result as the same category of class as the input.
     return RestartOptimizer
diff --git a/portfolio_study.slurm b/portfolio_study.slurm
new file mode 100644
index 0000000..4282edd
--- /dev/null
+++ b/portfolio_study.slurm
@@ -0,0 +1,67 @@
+#!/bin/bash
+#SBATCH --job-name=rl_das_experiment
+#SBATCH --output=logs/experiment_%A_%a.out
+#SBATCH --error=logs/experiment_%A_%a.err
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=32G
+#SBATCH --time=48:00:00
+#SBATCH --partition=plgrid-gpu-a100
+#SBATCH --array=0-9           # 10 tasks total
+
+CDB_VAL=${1:-1.5}
+
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+if [ "$#" -eq 0 ]; then
+    PORTFOLIO=('MADDE' 'CMAES' 'SPSO')
+else
+    PORTFOLIO=("$@")
+fi
+PORTFOLIO_STR=$(IFS="_"; echo "${PORTFOLIO[*]}")
+
+
+# CONFIGURATION
+ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
+source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 2. Dimension-specific CV-LOPO (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 3. Multidimensional CV-LOIO (Index 8)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 8 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 4. Multidimensional CV-LOPO (Index 9)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 9 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py ${PORTFOLIO_STR}_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r ELA --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+fi
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 0e54b8a..dece663 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "torch>=2.9.0",
     "numpy>=1.20.0, <2.0",
     "pflacco>=1.0.0",
+    "ty>=0.0.17",
 ]
 
 [build-system]
@@ -25,6 +26,8 @@ build-backend = "hatchling.build"
 
 [dependency-groups]
 dev = [
+    "pre-commit>=4.5.1",
+    "py-spy>=0.4.1",
     "pytest>=9.0.2",
     "ruff>=0.14.5",
 ]
diff --git a/runner.slurm b/runner.slurm
index 91d3a60..d0bbca4 100644
--- a/runner.slurm
+++ b/runner.slurm
@@ -7,12 +7,96 @@
 #SBATCH --mem=32G
 #SBATCH --time=48:00:00
 #SBATCH --partition=plgrid-gpu-a100
-#SBATCH -A plgautopt26-gpu-a100
+#SBATCH --array=0-23           # Increased to 24 tasks total to split sequential runs
+
+CDB_VAL=${1:-1.5}
+
+if [ "$#" -gt 0 ]; then
+    shift
+fi
+
+# Store the remaining arguments as an array called PORTFOLIO.
+# If no additional arguments were provided, fall back to your default.
+
+PORTFOLIO=('JDE21' 'MADDE' 'NL_SHADE_RSP')
 
 # CONFIGURATION
 ENV_PATH="$SCRATCH/DynamicAlgorithmSelection/.venv/bin/activate"
-
 source "$ENV_PATH"
+mkdir -p logs
+
+# Array of Dimensions
+DIMS=(2 3 5 10)
+
+# 1. Dimension-specific CV-LOIO | RL-DAS (Indices 0-3)
+if [[ $SLURM_ARRAY_TASK_ID -ge 0 && $SLURM_ARRAY_TASK_ID -le 3 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$SLURM_ARRAY_TASK_ID]}
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+
+# 2. Dimension-specific CV-LOIO | Policy Gradient (Indices 4-7)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 4 && $SLURM_ARRAY_TASK_ID -le 7 ]]; then
+    MODE="CV-LOIO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 4))]}
+    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 3. Dimension-specific CV-LOPO | RL-DAS (Indices 8-11)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 8 && $SLURM_ARRAY_TASK_ID -le 11 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 8))]}
+    echo "Running Mode: $MODE | Agent: RL-DAS | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RLDAS_${MODE}_${DIM} \
+      -p "${PORTFOLIO[@]}"  --mode $MODE --dimensionality $DIM --n_epochs 40 --agent RL-DAS
+
+# 4. Dimension-specific CV-LOPO | Policy Gradient (Indices 12-15)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 12 && $SLURM_ARRAY_TASK_ID -le 15 ]]; then
+    MODE="CV-LOPO"
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 12))]}
+    echo "Running Mode: $MODE | Agent: Policy Gradient | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_${MODE}_${CDB_VAL}_${DIM} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --dimensionality $DIM \
+      --cdb $CDB_VAL --n_epochs 3 --agent policy-gradient
+
+# 5. Dimension-specific RL-DAS-random (Indices 16-19)
+elif [[ $SLURM_ARRAY_TASK_ID -ge 16 && $SLURM_ARRAY_TASK_ID -le 19 ]]; then
+    DIM=${DIMS[$((SLURM_ARRAY_TASK_ID - 16))]}
+    echo "Running Mode: Random Agent - RLDAS variant | Dimension: $DIM"
+
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_DAS_${DIM} \
+      -p 'JDE21' 'MADDE' 'NL_SHADE_RSP' --agent RL-DAS-random --dimensionality $DIM
+
+# 6. Multidimensional CV-LOIO (Index 20)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 20 ]]; then
+    MODE="CV-LOIO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 7. Multidimensional CV-LOPO (Index 21)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 21 ]]; then
+    MODE="CV-LOPO"
+    echo "Running Mode: $MODE | Multidimensional PG"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_PG_MULTIDIMENSIONAL_${MODE}_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" -r custom --mode $MODE --cdb $CDB_VAL --agent policy-gradient
+
+# 8. Global Random Agent (Index 22)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 22 ]]; then
+    echo "Running Mode: Global Random Agent"
+    python3 dynamicalgorithmselection/main.py JDE21_MADDE_NL_SHADE_RSP_RANDOM_${CDB_VAL} \
+      -p "${PORTFOLIO[@]}" --cdb $CDB_VAL --agent random
 
-# Run Experiment
-python3 dynamicalgorithmselection/main.py example -p 'LMCMAES' 'SPSO' 'G3PCX' --wandb_project RL-DAS --wandb_entity niecwladek-agh --mode LOIO -s 10
+# 9. Global Baselines (Index 23)
+elif [[ $SLURM_ARRAY_TASK_ID -eq 23 ]]; then
+    echo "Running Mode: Baselines"
+    python3 dynamicalgorithmselection/main.py BASELINES \
+      -p "${PORTFOLIO[@]}" --mode baselines
+fi
\ No newline at end of file
diff --git a/tests/test_agent.py b/tests/test_agent.py
index eecbb27..9545f15 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -50,7 +50,7 @@ def test_get_reward_logic(self, mock_problem, basic_options):
             return_value=(MagicMock(), 5),
         ):
             agent = Agent(mock_problem, basic_options)
-            agent.initial_value_range = 10.0
+            agent.initial_value_range = (10.0, 20.0)
 
             reward_good = agent.get_reward(new_best_y=15.0, old_best_y=20.0)
             # 5.0 / 10.0 = 0.5 -> log(0.5)
diff --git a/tests/test_cross_validation.py b/tests/test_cross_validation.py
index 3bd9c79..a8c1e58 100644
--- a/tests/test_cross_validation.py
+++ b/tests/test_cross_validation.py
@@ -24,7 +24,7 @@ def test_get_cv_folds_structure(self, mock_suite, mock_miniprint):
         # so we check if it returns lists of correct length/structure.
 
         n_folds = 4
-        suite, folds = _get_cv_folds(n_folds, is_loio=True)
+        suite, folds = _get_cv_folds(n_folds, leaving_mode="LOIO", dim=[10])
 
         self.assertIsInstance(suite, MagicMock)  # Should return the mocked suite
         self.assertEqual(len(folds), n_folds)
@@ -67,7 +67,7 @@ def test_run_cross_validation_flow(
 
         # Execute
         res_folder = run_cross_validation(
-            self.optimizer_mock, self.options, self.eval_mult, is_loio=True
+            self.optimizer_mock, self.options, self.eval_mult, leaving_mode="LOIO"
         )
 
         # Assertions
diff --git a/tests/test_experiment.py b/tests/test_experiment.py
index 825ede8..809dc31 100644
--- a/tests/test_experiment.py
+++ b/tests/test_experiment.py
@@ -1,17 +1,22 @@
 import unittest
 from unittest.mock import MagicMock, patch, mock_open
-import os
+from typing import Any, cast, Type  # Added imports
 
 from dynamicalgorithmselection.experiments.experiment import (
     coco_bbob_experiment,
     run_comparison,
     dump_extreme_stats,
 )
+from dynamicalgorithmselection.optimizers.Optimizer import (
+    Optimizer,
+)  # Import the base class
 
 
 class TestExperiment(unittest.TestCase):
     def setUp(self):
-        self.optimizer_mock = MagicMock()
+        self.optimizer_mock = MagicMock(
+            spec=Type[Optimizer]
+        )  # Use spec for better type safety
         self.optimizer_mock.__name__ = "MockOpt"
         self.options = {
             "name": "experiment_test",
@@ -27,7 +32,7 @@ def test_coco_bbob_experiment_dispatch_cv(self, mock_cv):
             self.optimizer_mock, self.options, "test_exp", mode="CV_LOIO"
         )
         mock_cv.assert_called_once()
-        self.assertTrue(mock_cv.call_args[1]["is_loio"])
+        self.assertTrue(mock_cv.call_args[1]["leaving_mode"])
 
     @patch("dynamicalgorithmselection.experiments.experiment._coco_bbob_test_all")
     def test_coco_bbob_experiment_dispatch_random(self, mock_test_all):
@@ -87,7 +92,7 @@ def test_run_comparison(
         opt1.__name__ = "Opt1"
         opt2 = MagicMock()
         opt2.__name__ = "Opt2"
-        portfolio = [opt1, opt2]
+        portfolio = cast(list[Type[Optimizer]], [opt1, opt2])
 
         # Mock Suite
         mock_suite_obj = MagicMock()
@@ -95,11 +100,8 @@ def test_run_comparison(
         mock_problem.dimension = 2
         mock_suite_obj.get_problem.return_value = mock_problem
 
-        # get_suite returns (suite, problem_ids)
         mock_get_suite.return_value = (mock_suite_obj, ["p1"])
 
-        # FIXED: Return a dictionary, NOT a tuple.
-        # run_comparison treats results as a dictionary directly in one of the lines.
         mock_single_func.return_value = {"fitness_history": [1, 2]}
 
         # Execute
@@ -111,22 +113,20 @@ def test_run_comparison(
         self.assertEqual(mock_dump_stats.call_count, 2)
         mock_dump_extreme.assert_called_once()
 
-    @patch("dynamicalgorithmselection.experiments.experiment.get_checkpoints")
     @patch("dynamicalgorithmselection.experiments.experiment.get_extreme_stats")
     @patch("builtins.open", new_callable=mock_open)
     @patch("json.dump")
-    def test_dump_extreme_stats(
-        self, mock_json_dump, mock_file, mock_get_extreme, mock_get_checkpoints
-    ):
-        stats = {"Opt1": [], "Opt2": []}
-        portfolio = [MagicMock(__name__="Opt1"), MagicMock(__name__="Opt2")]
+    def test_dump_extreme_stats(self, mock_json_dump, mock_file, mock_get_extreme):
+        stats: dict[str, list[Any]] = {"Opt1": [], "Opt2": []}
+
         mock_get_extreme.return_value = ({"best": 1}, {"worst": 0})
+        case_name = "OPT1_OPT2_OPT3"
 
-        dump_extreme_stats(portfolio, stats, "p1", 100, 5, 10, 0.5)
+        dump_extreme_stats(case_name, stats, "p1", 100)
 
         self.assertEqual(mock_file.call_count, 2)
         self.assertEqual(mock_json_dump.call_count, 2)
 
         args_list = mock_file.call_args_list
-        self.assertIn("Opt1_Opt2_best", args_list[0][0][0])
-        self.assertIn("Opt1_Opt2_worst", args_list[1][0][0])
+        self.assertIn(f"{case_name}_best", args_list[0][0][0])
+        self.assertIn(f"{case_name}_worst", args_list[1][0][0])
diff --git a/tests/test_experiments_core.py b/tests/test_experiments_core.py
index b2b6b25..83a8ee6 100644
--- a/tests/test_experiments_core.py
+++ b/tests/test_experiments_core.py
@@ -13,6 +13,7 @@ def setUp(self):
             "n_checkpoints": 5,
             "n_individuals": 10,
             "cdb": 0.5,
+            "n_epochs": 1,
         }
         self.eval_multiplier = 10
         self.problem_ids = ["p1", "p2"]
@@ -24,7 +25,10 @@ def setUp(self):
         self.suite_mock.get_problem.return_value = self.problem_mock
         self.observer_mock = MagicMock()
 
-    @patch("dynamicalgorithmselection.experiments.core.tqdm", side_effect=lambda x: x)
+    @patch(
+        "dynamicalgorithmselection.experiments.core.tqdm",
+        side_effect=lambda x, smoothing: x,
+    )
     @patch("dynamicalgorithmselection.experiments.core.dump_stats")
     @patch("dynamicalgorithmselection.experiments.core.coco_bbob_single_function")
     def test_run_testing(self, mock_single_func, mock_dump_stats, mock_tqdm):
@@ -50,7 +54,10 @@ def test_run_testing(self, mock_single_func, mock_dump_stats, mock_tqdm):
         self.assertEqual(self.options["max_function_evaluations"], expected_max_fe)
         self.assertFalse(self.options["train_mode"])
 
-    @patch("dynamicalgorithmselection.experiments.core.tqdm", side_effect=lambda x: x)
+    @patch(
+        "dynamicalgorithmselection.experiments.core.tqdm",
+        side_effect=lambda x, smoothing: x,
+    )
     @patch("dynamicalgorithmselection.experiments.core.coco_bbob_single_function")
     def test_run_training(self, mock_single_func, mock_tqdm):
         fake_state_1 = {
diff --git a/tests/test_policy_gradient_agent.py b/tests/test_policy_gradient_agent.py
index 802004f..d0ffebc 100644
--- a/tests/test_policy_gradient_agent.py
+++ b/tests/test_policy_gradient_agent.py
@@ -24,6 +24,8 @@ def ppo_options(self):
             "reward_normalizer": MagicMock(),
             "state_normalizer": MagicMock(),
             "buffer": MagicMock(),
+            "n_problems": 1000,
+            "n_epochs": 1,
         }
 
     @pytest.fixture
@@ -49,19 +51,7 @@ def test_select_action_tensor_shape(self, mock_problem, ppo_options):
             return_value=(MagicMock(), 10),
         ):
             agent = PolicyGradientAgent(mock_problem, ppo_options)
-
-            # Przygotuj dummy state (tensor)
-            state = torch.randn(1, 10).to(torch.float32)  # Zakładamy wymiar stanu 10
-
-            # Wymuszamy, żeby sieci zwracały poprawne kształty (jeśli nie używamy prawdziwych wag)
-            # Ale PolicyGradientAgent tworzy prawdziwe sieci w __init__, więc powinny działać "z pudełka"
-            # o ile ppo_utils.Actor/Critic są poprawne.
-
-            # Jeśli ppo_utils wymaga GPU, a testujesz na CPU, upewnij się że DEVICE w ppo_utils.py to 'cpu'
-            # lub nadpisz go w teście.
-
-            # Testujemy metodę
-            # full_buffer=False -> exploration mode (losowe lub uniform)
+            state = torch.randn(1, 10).to(torch.float32)
             action, log_prob, value = agent._select_action(state, full_buffer=True)
 
             assert isinstance(action, (int, np.integer))
@@ -112,7 +102,6 @@ def test_execute_action_instantiation(self, mock_problem, ppo_options):
         mock_optimizer_instance = MagicMock()
         MockOptimizerClass.return_value = mock_optimizer_instance
 
-        # Konfigurujemy instancję
         mock_optimizer_instance.n_function_evaluations = 100
         mock_optimizer_instance.best_so_far_y = 5.0
         # iterate zwraca słownik wyników
@@ -125,12 +114,8 @@ def test_execute_action_instantiation(self, mock_problem, ppo_options):
             return_value=(MagicMock(), 10),
         ):
             agent = PolicyGradientAgent(mock_problem, ppo_options)
-            agent.actions = [
-                MockOptimizerClass
-            ]  # Podmieniamy akcje na naszą klasę mocka
-            agent.iterate = MagicMock(
-                return_value={"result": "ok"}
-            )  # Mockujemy wywołanie iterate
+            agent.actions = [MockOptimizerClass]
+            agent.iterate = MagicMock(return_value={"result": "ok"})
 
             iteration_result = {"x": None, "y": None}
             result, optimizer = agent._execute_action(0, iteration_result)
@@ -175,6 +160,8 @@ def test_stagnation_logic(self, mock_problem, ppo_options):
     def test_buffer_add(self, mock_problem, ppo_options):
         mock_buffer = MagicMock()
         ppo_options["buffer"] = mock_buffer
+        ppo_options["n_problems"] = 1000
+        ppo_options["n_epochs"] = 1
 
         with patch(
             "dynamicalgorithmselection.agents.agent.get_state_representation",
diff --git a/uv.lock b/uv.lock
index c4bc2bc..294498f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -52,6 +52,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
 ]
 
+[[package]]
+name = "cfgv"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.4"
@@ -279,6 +288,15 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/42/b0/876bc174ff34a0b2e3b75f10d7c3c9a267a1f56dbac59e943b6f682f6aa8/directsearch-1.0.tar.gz", hash = "sha256:8093ecc401a3d5eff28f053d4ef1b726f5a9c577bd33d6a8b2413a5ba753c734", size = 13605, upload-time = "2022-04-01T03:06:11.488Z" }
 
+[[package]]
+name = "distlib"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
+]
+
 [[package]]
 name = "dynamicalgorithmselection"
 version = "0.1.0"
@@ -293,12 +311,15 @@ dependencies = [
     { name = "tenacity" },
     { name = "torch" },
     { name = "tqdm" },
+    { name = "ty" },
     { name = "wandb" },
     { name = "wandb-workspaces" },
 ]
 
 [package.dev-dependencies]
 dev = [
+    { name = "pre-commit" },
+    { name = "py-spy" },
     { name = "pytest" },
     { name = "ruff" },
 ]
@@ -314,12 +335,15 @@ requires-dist = [
     { name = "tenacity", specifier = ">=9.1.2,<10" },
     { name = "torch", specifier = ">=2.9.0" },
     { name = "tqdm", specifier = ">=4.67.1,<5" },
+    { name = "ty", specifier = ">=0.0.17" },
     { name = "wandb", specifier = ">=0.22.2,<0.23" },
     { name = "wandb-workspaces", specifier = ">=0.1.19,<0.2" },
 ]
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "pre-commit", specifier = ">=4.5.1" },
+    { name = "py-spy", specifier = ">=0.4.1" },
     { name = "pytest", specifier = ">=9.0.2" },
     { name = "ruff", specifier = ">=0.14.5" },
 ]
@@ -444,6 +468,15 @@ classic-control = [
     { name = "pygame" },
 ]
 
+[[package]]
+name = "identify"
+version = "2.6.16"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5b/8d/e8b97e6bd3fb6fb271346f7981362f1e04d6a7463abd0de79e1fda17c067/identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980", size = 99360, upload-time = "2026-01-12T18:58:58.201Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/58/40fbbcefeda82364720eba5cf2270f98496bdfa19ea75b4cccae79c698e6/identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", size = 99202, upload-time = "2026-01-12T18:58:56.627Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.11"
@@ -811,6 +844,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/13/8267afdb84a890d7fc3e6f0eef170b0323915c28879e79e8184f7257cf8a/nevergrad-1.0.12-py3-none-any.whl", hash = "sha256:56ff65d6a2f497ecd79af5a796968ee946c05705a6a69ca616eae5988cc5d999", size = 506324, upload-time = "2025-04-23T15:34:16.012Z" },
 ]
 
+[[package]]
+name = "nodeenv"
+version = "1.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
+]
+
 [[package]]
 name = "numba"
 version = "0.62.1"
@@ -1151,6 +1193,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "pre-commit"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cfgv" },
+    { name = "identify" },
+    { name = "nodeenv" },
+    { name = "pyyaml" },
+    { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload-time = "2025-12-16T21:14:33.552Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" },
+]
+
 [[package]]
 name = "protobuf"
 version = "6.33.0"
@@ -1166,6 +1224,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
 ]
 
+[[package]]
+name = "py-spy"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/e2/ff811a367028b87e86714945bb9ecb5c1cc69114a8039a67b3a862cef921/py_spy-0.4.1.tar.gz", hash = "sha256:e53aa53daa2e47c2eef97dd2455b47bb3a7e7f962796a86cc3e7dbde8e6f4db4", size = 244726, upload-time = "2025-07-31T19:33:25.172Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/e3/3a32500d845bdd94f6a2b4ed6244982f42ec2bc64602ea8fcfe900678ae7/py_spy-0.4.1-py2.py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:809094208c6256c8f4ccadd31e9a513fe2429253f48e20066879239ba12cd8cc", size = 3682508, upload-time = "2025-07-31T19:33:13.753Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bf/e4d280e9e0bec71d39fc646654097027d4bbe8e04af18fb68e49afcff404/py_spy-0.4.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:1fb8bf71ab8df95a95cc387deed6552934c50feef2cf6456bc06692a5508fd0c", size = 1796395, upload-time = "2025-07-31T19:33:15.325Z" },
+    { url = "https://files.pythonhosted.org/packages/df/79/9ed50bb0a9de63ed023aa2db8b6265b04a7760d98c61eb54def6a5fddb68/py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee776b9d512a011d1ad3907ed53ae32ce2f3d9ff3e1782236554e22103b5c084", size = 2034938, upload-time = "2025-07-31T19:33:17.194Z" },
+    { url = "https://files.pythonhosted.org/packages/53/a5/36862e3eea59f729dfb70ee6f9e14b051d8ddce1aa7e70e0b81d9fe18536/py_spy-0.4.1-py2.py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:532d3525538254d1859b49de1fbe9744df6b8865657c9f0e444bf36ce3f19226", size = 2658968, upload-time = "2025-07-31T19:33:18.916Z" },
+    { url = "https://files.pythonhosted.org/packages/08/f8/9ea0b586b065a623f591e5e7961282ec944b5fbbdca33186c7c0296645b3/py_spy-0.4.1-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4972c21890b6814017e39ac233c22572c4a61fd874524ebc5ccab0f2237aee0a", size = 2147541, upload-time = "2025-07-31T19:33:20.565Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fb/bc7f639aed026bca6e7beb1e33f6951e16b7d315594e7635a4f7d21d63f4/py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6a80ec05eb8a6883863a367c6a4d4f2d57de68466f7956b6367d4edd5c61bb29", size = 2763338, upload-time = "2025-07-31T19:33:22.202Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/da/fcc9a9fcd4ca946ff402cff20348e838b051d69f50f5d1f5dca4cd3c5eb8/py_spy-0.4.1-py2.py3-none-win_amd64.whl", hash = "sha256:d92e522bd40e9bf7d87c204033ce5bb5c828fca45fa28d970f58d71128069fdc", size = 1818784, upload-time = "2025-07-31T19:33:23.802Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.2"
@@ -1701,6 +1774,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" },
 ]
 
+[[package]]
+name = "ty"
+version = "0.0.17"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/c3/41ae6346443eedb65b96761abfab890a48ce2aa5a8a27af69c5c5d99064d/ty-0.0.17.tar.gz", hash = "sha256:847ed6c120913e280bf9b54d8eaa7a1049708acb8824ad234e71498e8ad09f97", size = 5167209, upload-time = "2026-02-13T13:26:36.835Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/01/0ef15c22a1c54b0f728ceff3f62d478dbf8b0dcf8ff7b80b954f79584f3e/ty-0.0.17-py3-none-linux_armv6l.whl", hash = "sha256:64a9a16555cc8867d35c2647c2f1afbd3cae55f68fd95283a574d1bb04fe93e0", size = 10192793, upload-time = "2026-02-13T13:27:13.943Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/2c/f4c322d9cded56edc016b1092c14b95cf58c8a33b4787316ea752bb9418e/ty-0.0.17-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:eb2dbd8acd5c5a55f4af0d479523e7c7265a88542efe73ed3d696eb1ba7b6454", size = 10051977, upload-time = "2026-02-13T13:26:57.741Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/a5/43746c1ff81e784f5fc303afc61fe5bcd85d0fcf3ef65cb2cef78c7486c7/ty-0.0.17-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f18f5fd927bc628deb9ea2df40f06b5f79c5ccf355db732025a3e8e7152801f6", size = 9564639, upload-time = "2026-02-13T13:26:42.781Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/b8/280b04e14a9c0474af574f929fba2398b5e1c123c1e7735893b4cd73d13c/ty-0.0.17-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5383814d1d7a5cc53b3b07661856bab04bb2aac7a677c8d33c55169acdaa83df", size = 10061204, upload-time = "2026-02-13T13:27:00.152Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/d7/493e1607d8dfe48288d8a768a2adc38ee27ef50e57f0af41ff273987cda0/ty-0.0.17-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9c20423b8744b484f93e7bf2ef8a9724bca2657873593f9f41d08bd9f83444c9", size = 10013116, upload-time = "2026-02-13T13:26:34.543Z" },
+    { url = "https://files.pythonhosted.org/packages/80/ef/22f3ed401520afac90dbdf1f9b8b7755d85b0d5c35c1cb35cf5bd11b59c2/ty-0.0.17-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6f5b1aba97db9af86517b911674b02f5bc310750485dc47603a105bd0e83ddd", size = 10533623, upload-time = "2026-02-13T13:26:31.449Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ce/744b15279a11ac7138832e3a55595706b4a8a209c9f878e3ab8e571d9032/ty-0.0.17-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:488bce1a9bea80b851a97cd34c4d2ffcd69593d6c3f54a72ae02e5c6e47f3d0c", size = 11069750, upload-time = "2026-02-13T13:26:48.638Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/be/1133c91f15a0e00d466c24f80df486d630d95d1b2af63296941f7473812f/ty-0.0.17-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8df66b91ec84239420985ec215e7f7549bfda2ac036a3b3c065f119d1c06825a", size = 10870862, upload-time = "2026-02-13T13:26:54.715Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/4a/a2ed209ef215b62b2d3246e07e833081e07d913adf7e0448fc204be443d6/ty-0.0.17-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:002139e807c53002790dfefe6e2f45ab0e04012e76db3d7c8286f96ec121af8f", size = 10628118, upload-time = "2026-02-13T13:26:45.439Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/0c/87476004cb5228e9719b98afffad82c3ef1f84334bde8527bcacba7b18cb/ty-0.0.17-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6c4e01f05ce82e5d489ab3900ca0899a56c4ccb52659453780c83e5b19e2b64c", size = 10038185, upload-time = "2026-02-13T13:27:02.693Z" },
+    { url = "https://files.pythonhosted.org/packages/46/4b/98f0b3ba9aef53c1f0305519536967a4aa793a69ed72677b0a625c5313ac/ty-0.0.17-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2b226dd1e99c0d2152d218c7e440150d1a47ce3c431871f0efa073bbf899e881", size = 10047644, upload-time = "2026-02-13T13:27:05.474Z" },
+    { url = "https://files.pythonhosted.org/packages/93/e0/06737bb80aa1a9103b8651d2eb691a7e53f1ed54111152be25f4a02745db/ty-0.0.17-py3-none-musllinux_1_2_i686.whl", hash = "sha256:8b11f1da7859e0ad69e84b3c5ef9a7b055ceed376a432fad44231bdfc48061c2", size = 10231140, upload-time = "2026-02-13T13:27:10.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/79/e2a606bd8852383ba9abfdd578f4a227bd18504145381a10a5f886b4e751/ty-0.0.17-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c04e196809ff570559054d3e011425fd7c04161529eb551b3625654e5f2434cb", size = 10718344, upload-time = "2026-02-13T13:26:51.66Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/2d/2663984ac11de6d78f74432b8b14ba64d170b45194312852b7543cf7fd56/ty-0.0.17-py3-none-win32.whl", hash = "sha256:305b6ed150b2740d00a817b193373d21f0767e10f94ac47abfc3b2e5a5aec809", size = 9672932, upload-time = "2026-02-13T13:27:08.522Z" },
+    { url = "https://files.pythonhosted.org/packages/de/b5/39be78f30b31ee9f5a585969930c7248354db90494ff5e3d0756560fb731/ty-0.0.17-py3-none-win_amd64.whl", hash = "sha256:531828267527aee7a63e972f54e5eee21d9281b72baf18e5c2850c6b862add83", size = 10542138, upload-time = "2026-02-13T13:27:17.084Z" },
+    { url = "https://files.pythonhosted.org/packages/40/b7/f875c729c5d0079640c75bad2c7e5d43edc90f16ba242f28a11966df8f65/ty-0.0.17-py3-none-win_arm64.whl", hash = "sha256:de9810234c0c8d75073457e10a84825b9cd72e6629826b7f01c7a0b266ae25b1", size = 10023068, upload-time = "2026-02-13T13:26:39.637Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
@@ -1740,6 +1837,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
 ]
 
+[[package]]
+name = "virtualenv"
+version = "20.35.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "distlib" },
+    { name = "filelock" },
+    { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/20/28/e6f1a6f655d620846bd9df527390ecc26b3805a0c5989048c210e22c5ca9/virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c", size = 6028799, upload-time = "2025-10-29T06:57:40.511Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/0c/c05523fa3181fdf0c9c52a6ba91a23fbf3246cc095f26f6516f9c60e6771/virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b", size = 6005095, upload-time = "2025-10-29T06:57:37.598Z" },
+]
+
 [[package]]
 name = "wandb"
 version = "0.22.2"