diff --git a/.gitignore b/.gitignore
index b7faf40..a136db4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,72 @@
+# Created by https://www.toptal.com/developers/gitignore/api/python,linux,windows,macos,jupyternotebooks,visualstudiocode
+# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,windows,macos,jupyternotebooks,visualstudiocode
+
+### JupyterNotebooks ###
+# gitignore template for Jupyter Notebooks
+# website: http://jupyter.org/
+
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# IPython
+profile_default/
+ipython_config.py
+
+# Remove previous ipynb_checkpoints
+#   git rm -r .ipynb_checkpoints/
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### macOS Patch ###
+# iCloud generated files
+*.icloud
+
+### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
-*.py[codz]
+*.py[cod]
 *$py.class
 
 # C extensions
@@ -46,7 +112,7 @@ htmlcov/
 nosetests.xml
 coverage.xml
 *.cover
-*.py.cover
+*.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
@@ -76,11 +142,8 @@ docs/_build/
 target/
 
 # Jupyter Notebook
-.ipynb_checkpoints
 
 # IPython
-profile_default/
-ipython_config.py
 
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
@@ -94,35 +157,20 @@ ipython_config.py
 #   install all needed dependencies.
 #Pipfile.lock
 
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
-#poetry.toml
 
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
-#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
 #pdm.lock
-#pdm.toml
-.pdm-python
-.pdm-build/
-
-# pixi
-#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
-#pixi.lock
-#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
-#   in the .venv directory. It is recommended not to include this directory in version control.
-.pixi
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
 
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
@@ -136,7 +184,6 @@ celerybeat.pid
 
 # Environments
 .env
-.envrc
 .venv
 env/
 venv/
@@ -175,33 +222,99 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-# Abstra
-# Abstra is an AI-powered process automation framework.
-# Ignore directories containing user credentials, local state, and settings.
-# Learn more at https://abstra.io/docs
-.abstra/
-
-# Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
-#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
-#  you could uncomment the following to ignore the entire vscode folder
-# .vscode/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
 
-# Ruff stuff:
+# ruff
 .ruff_cache/
 
-# PyPI configuration file
-.pypirc
+# LSP config files
+pyrightconfig.json
+
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+
+# Local History for Visual Studio Code
+.history/
+
+# Built Visual Studio Code Extensions
+*.vsix
+
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# End of https://www.toptal.com/developers/gitignore/api/python,linux,windows,macos,jupyternotebooks,visualstudiocode
+
+
+lightning_logs/
+logs/
+tmp/
+
+
+#/**/data/**
+#!/**/data/**/
+
+#/**/models/**
+#!/**/models/**/
+
+
+#*.json
+#*.h5
+#*.pickle
+*.csv
+*.pkl
+*.zip
+*.h5
+*.hdf5
+*.joblib
+*.feather
+*.parquet
+
+auto_model/
 
-# Cursor
-#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
-#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
-#  refer to https://docs.cursor.com/context/ignore-files
-.cursorignore
-.cursorindexingignore
+#*.ckpt
+#*.joblib
+#*.pkl
+#*.pq
+#*.parquet
 
-# Marimo
-marimo/_static/
-marimo/_lsp/
-__marimo__/
+#!**/.gitkeep
+#!**/dvc.yml
+#!**/dvc.yaml
+#!**/*.dvc
+#!**/*.py
+#!**/dvc.lock
diff --git a/CountLOC.py b/CountLOC.py
new file mode 100644
index 0000000..e46255a
--- /dev/null
+++ b/CountLOC.py
@@ -0,0 +1,171 @@
+from pathlib import Path
+import re
+import json
+import os
+from rich.console import Console
+from rich.syntax import Syntax
+from rich.text import Text
+import sys
+
+NOTEBOOKS_PATH = Path("./JupyterNotebooks")
+CONFIGS_PATH = Path("./LOCALIZE_Configs")
+
+def is_blank(line):
+    return line.strip() == ""
+
+def strip_comment(line):
+    line = line.split("#", 1)[0]
+    line = re.sub(r'("""|\'\'\')(.*?)\1', '', line)
+    return line.rstrip()
+
+def strip_structural(line):
+    return "".join([token for token in line if token not in "{}[](),"])
+
+def is_meaningful_line(line):
+    if is_blank(line):
+        return False
+    if is_blank(strip_comment(line)):
+        return False
+    if is_blank(strip_structural(line)):
+        return False
+    return True
+
+def strip_meaningless(lines):
+    return [strip_comment(line) for line in lines if is_meaningful_line(line)]
+
+def extract_code_lines_notebook(ipynb_path):
+    """Extracts code lines from a Jupyter notebook (.ipynb) as a list of strings."""
+    with open(ipynb_path, 'r', encoding='utf-8') as f:
+        notebook = json.load(f)
+
+    lines = []
+    for cell in notebook.get("cells", []):
+        if cell.get("cell_type") == "code":
+            lines.extend(cell.get("source", []))  # source is already a list of lines
+            lines.append("\n")  # separate cells with a newline
+    return lines
+
+def keep_unique_lines(lines, other):
+    out = []
+    other = [line.strip() for line in other]
+    for line in lines:
+        stripped = line.strip()
+        if stripped in other:
+            other.remove(stripped)
+        else:
+            out.append(line)
+    return out
+
+def remove_unwanted_lines(lines, notebook):
+    return [line for line in lines if notebook.stem not in line]
+
+def color_diff_lines(lines, prefix, prefix_style):
+    styled = []
+    for line in lines:
+        # Create a Text object
+        text = Text(end = "")
+        text.append(f"{prefix} ", style=prefix_style)
+        code = line.strip("\n")
+        syntax = Syntax(code, "python", theme="monokai", line_numbers=False, word_wrap=False) 
+        hlcd = syntax.highlight(code)
+        hlcd.rstrip()
+        text.append(hlcd)  # only highlight the actual code
+        styled.append(text)
+    return styled
+
+notebooks = sorted([Path(file) for file in os.listdir(NOTEBOOKS_PATH) if file.endswith(".ipynb")])
+
+pr = strip_meaningless(extract_code_lines_notebook(NOTEBOOKS_PATH / notebooks[0]))
+pr = remove_unwanted_lines(pr, notebooks[0])
+
+chj = {
+    "deleted": [],
+    "added": []
+}
+
+print("Jupyter:")
+for notebook in notebooks[1:5]:
+    c = strip_meaningless(extract_code_lines_notebook(NOTEBOOKS_PATH / notebook))
+    c = remove_unwanted_lines(c, notebook)
+
+    deleted = keep_unique_lines(pr, c)
+    added   = keep_unique_lines(c, pr)
+
+    deleted_styled = color_diff_lines(deleted, "-", "bold red")
+    added_styled   = color_diff_lines(added, "+", "bold green")
+
+    pr = c
+
+    console = Console(
+        force_jupyter=False,
+        force_terminal=True,
+        file=sys.stdout,
+    )
+
+    console.print(f"  [bold red]lines deleted:[/] {len(deleted)}")
+    console.print(f"  [green]lines added:[/] {len(added)}")
+    print("____________________\n")
+    chj["deleted"].append(len(deleted))
+    chj["added"].append(len(added))
+
+configs = sorted([Path(file) for file in os.listdir(CONFIGS_PATH)])[1:6]
+
+def extract_code_lines_yaml(path):
+    with open(path, 'r', encoding='utf-8') as f:
+        return f.readlines()
+
+def color_diff_lines(lines, prefix, prefix_style):
+    styled = []
+    for line in lines:
+        # Create a Text object
+        text = Text(end = "")
+        text.append(f"{prefix} ", style=prefix_style)
+        code = line.strip("\n")
+        syntax = Syntax(code, "yaml", theme="monokai", line_numbers=False, word_wrap=False) 
+        hlcd = syntax.highlight(code)
+        hlcd.rstrip()
+        text.append(hlcd)  # only highlight the actual code
+        styled.append(text)
+    return styled
+
+print("Framework:")
+chf = {
+    "dvc.yaml":{
+        "deleted": [],
+        "added": []
+    },
+    "params.yaml":{
+        "deleted": [],
+        "added": []
+    }
+}
+for cnf in ["dvc.yaml", "params.yaml"]:
+    pr = strip_meaningless(extract_code_lines_yaml(CONFIGS_PATH / configs[0] / cnf))
+    pr = remove_unwanted_lines(pr, configs[0])
+    for config in configs[1:]:
+        print(cnf)
+        c = strip_meaningless(extract_code_lines_yaml(CONFIGS_PATH / config / cnf))
+        c = remove_unwanted_lines(c, config)
+
+        deleted = keep_unique_lines(pr, c)
+        added   = keep_unique_lines(c, pr)
+
+        deleted_styled = color_diff_lines(deleted, "-", "bold red")
+        added_styled   = color_diff_lines(added, "+", "bold green")
+
+        pr = c
+
+        console = Console(
+            force_jupyter=False,
+            force_terminal=True,
+            file=sys.stdout,
+            width = 120
+        )
+
+        console.print(f"  [bold red]lines deleted:[/] {len(deleted)}")
+        console.print(f"  [green]lines added:[/] {len(added)}")
+
+        chf[cnf]["deleted"].append(len(deleted))
+        chf[cnf]["added"].append(len(added))
+
+    print("____________________\n")
\ No newline at end of file
diff --git a/JupyterNotebooks/00-Initial.ipynb b/JupyterNotebooks/00-Initial.ipynb
new file mode 100644
index 0000000..2b31707
--- /dev/null
+++ b/JupyterNotebooks/00-Initial.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91d230e0-cc84-4ad5-9005-9f166d483d03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0304a74-ede7-4e21-8929-8e06206f4d5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = Path(\"DataSets/umu\")\n",
+    "results_path = Path(\"00-Initial-results\")\n",
+    "random_seed = 42\n",
+    "n_splits = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e320cd24-f026-4fae-ab48-0604c6ec8de0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Praparation\n",
+    "# We assume the dataset has been downloaded and unzipped manually\n",
+    "\n",
+    "df = pd.read_excel(data / \"umu\" / \"tcp_nokia_20240325.xlsx\")\n",
+    "\n",
+    "df = df[\n",
+    "    [\"Column7\",\"Column8\",\"Column14\",\"Column15\",\"Column42\",\"Column43\",\"Column45\",\n",
+    "     \"Column46\",\"Column47\",\"Column48\",\"Column87\",\"Column88\",\"Column78\",\"Column79\"]\n",
+    "]\n",
+    "df.columns = df.iloc[0]\n",
+    "df = df[1:]\n",
+    "\n",
+    "# convert all columns to numeric\n",
+    "for column in df.columns:\n",
+    "    if df[column].dtype == \"object\":\n",
+    "        df[column] = pd.to_numeric(df[column], errors=\"coerce\")\n",
+    "\n",
+    "df.columns = [col.replace(\"nas_value_nr5g_\", \"\") for col in df.columns]\n",
+    "\n",
+    "df = df.dropna()  # drom the ~2 rows with NaN\n",
+    "df = df.loc[:, df.nunique() > 1]  # keep only columns with more than one uniqe value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "814ec0fe-4140-49f3-ae9b-8ab752ab54ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Feature generation\n",
+    "import math\n",
+    "\n",
+    "def lat_lon_to_meters(origin_lat, origin_lon, point_lat, point_lon) -> tuple[float, float]:\n",
+    "    \"\"\"Works \"fine\" for distances less than 100km\"\"\"\n",
+    "\n",
+    "    R = 6_378_137 # Earth's radius in meters\n",
+    "\n",
+    "    origin_lat_rad = math.radians(origin_lat) # Convert latitude and longitude from degrees to radians\n",
+    "    delta_lat_rad = math.radians(point_lat - origin_lat)\n",
+    "    delta_lon_rad = math.radians(point_lon - origin_lon)\n",
+    "\n",
+    "    delta_meters_lat = delta_lat_rad * R # Calculate distance in the latitude direction (North-South)\n",
+    "\n",
+    "    delta_meters_lon = delta_lon_rad * R * math.cos(origin_lat_rad) # Calculate distance in the longitude direction (East-West)\n",
+    "\n",
+    "    return delta_meters_lat, delta_meters_lon\n",
+    "\n",
+    "origin_lat, origin_lon = df.gpsd_tpv_lat.min(), df.gpsd_tpv_lon.min()\n",
+    "\n",
+    "df[[\"target_x\", \"target_y\"]] = df.apply(\n",
+    "    lambda row: lat_lon_to_meters(origin_lat, origin_lon, row[\"gpsd_tpv_lat\"], row[\"gpsd_tpv_lon\"]),\n",
+    "    axis=1,\n",
+    "    result_type=\"expand\",\n",
+    ")\n",
+    "\n",
+    "df.drop(columns=[\"gpsd_tpv_lat\", \"gpsd_tpv_lon\"], inplace=True)\n",
+    "\n",
+    "targets = [\"target_x\", \"target_y\"] # Find target column(s)\n",
+    "\n",
+    "features, targets = df.drop(targets, axis=1), df[targets] # X are features, y are target(s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "997a14f2-afe8-4f6c-b2dd-7312a31a54b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Split generation\n",
+    "from sklearn import model_selection\n",
+    "groups = None\n",
+    "\n",
+    "cv = model_selection.KFold(\n",
+    "    n_splits=n_splits,\n",
+    "    shuffle=True,\n",
+    "    random_state=random_seed,\n",
+    ")\n",
+    "\n",
+    "indices = []\n",
+    "\n",
+    "for train_indices, test_indices in cv.split(features, targets, groups):\n",
+    "        indices.append((train_indices, test_indices))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1593cd46-3eb7-4a64-b358-fb3b7b2bd821",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PredefinedSplit(model_selection.BaseCrossValidator):\n",
+    "    \"\"\"Simple cross-validator for predefined train-test splits.\"\"\"\n",
+    "\n",
+    "    def __init__(self, indices_pairs: list[tuple[np.ndarray, np.ndarray]]):\n",
+    "        self.idx_pairs = indices_pairs\n",
+    "\n",
+    "    def get_n_splits(self, X=None, y=None, groups=None):\n",
+    "        \"\"\"Return the number of splitting iterations in the cross-validator\"\"\"\n",
+    "        return len(self.idx_pairs)\n",
+    "\n",
+    "    def split(self, X, y=None, groups=None):\n",
+    "        \"\"\"Generate indices to split data into training and test set.\"\"\"\n",
+    "        for train_idx, test_idx in self.idx_pairs:\n",
+    "            yield train_idx, test_idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d3ec933-69ce-4259-baee-2d539bcfa6b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Train&Evaluate\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error\n",
+    "cv = PredefinedSplit(indices)\n",
+    "\n",
+    "gs = model_selection.GridSearchCV(\n",
+    "    estimator = LinearRegression(),\n",
+    "    param_grid = {\n",
+    "        \"fit_intercept\":[True, False]\n",
+    "    },\n",
+    "    n_jobs = 4,\n",
+    "    error_score = \"raise\",\n",
+    "    refit = True,\n",
+    "    scoring = make_scorer(root_mean_squared_error, greater_is_better=False),\n",
+    "    cv = cv,\n",
+    ")\n",
+    "\n",
+    "gs.fit(features, targets)\n",
+    "\n",
+    "results_df = pd.DataFrame(gs.cv_results_)\n",
+    "\n",
+    "# Select key columns to display\n",
+    "cols_to_show = [\n",
+    "    'params',\n",
+    "    'mean_test_score',\n",
+    "    'std_test_score',\n",
+    "    'rank_test_score',\n",
+    "    'mean_fit_time',\n",
+    "    'mean_score_time',\n",
+    "]\n",
+    "\n",
+    "# Print as a table\n",
+    "print(results_df[cols_to_show].to_string(index=False))\n",
+    "Path(results_path).mkdir(parents=True, exist_ok=True)\n",
+    "joblib.dump(gs.best_estimator_, results_path / f\"Model_{estimators[index].__class__.__name__}-KFoldSplit.pkl\") # Note it's only possible to get the best estimator, as the framework uses a modified version of the class to save all the models\n",
+    "joblib.dump(results_df, results_path / f\"Results_{estimators[index].__class__.__name__}-KFoldSplit.pkl\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (nancy)",
+   "language": "python",
+   "name": "nancy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/JupyterNotebooks/01-Changed_and_added_model.ipynb b/JupyterNotebooks/01-Changed_and_added_model.ipynb
new file mode 100644
index 0000000..8ff3091
--- /dev/null
+++ b/JupyterNotebooks/01-Changed_and_added_model.ipynb
@@ -0,0 +1,231 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91d230e0-cc84-4ad5-9005-9f166d483d03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0304a74-ede7-4e21-8929-8e06206f4d5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = Path(\"DataSets/umu\")\n",
+    "results_path = Path(\"01-Changed_and_added_model-results\")\n",
+    "random_seed = 42\n",
+    "n_splits = 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e320cd24-f026-4fae-ab48-0604c6ec8de0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Praparation\n",
+    "# We assume the dataset has been downloaded and unzipped manually\n",
+    "\n",
+    "df = pd.read_excel(data / \"umu\" / \"tcp_nokia_20240325.xlsx\")\n",
+    "\n",
+    "df = df[\n",
+    "    [\"Column7\",\"Column8\",\"Column14\",\"Column15\",\"Column42\",\"Column43\",\"Column45\",\n",
+    "     \"Column46\",\"Column47\",\"Column48\",\"Column87\",\"Column88\",\"Column78\",\"Column79\"]\n",
+    "]\n",
+    "df.columns = df.iloc[0]\n",
+    "df = df[1:]\n",
+    "\n",
+    "# convert all columns to numeric\n",
+    "for column in df.columns:\n",
+    "    if df[column].dtype == \"object\":\n",
+    "        df[column] = pd.to_numeric(df[column], errors=\"coerce\")\n",
+    "\n",
+    "df.columns = [col.replace(\"nas_value_nr5g_\", \"\") for col in df.columns]\n",
+    "\n",
+    "df = df.dropna()  # drom the ~2 rows with NaN\n",
+    "df = df.loc[:, df.nunique() > 1]  # keep only columns with more than one uniqe value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "814ec0fe-4140-49f3-ae9b-8ab752ab54ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Feature generation\n",
+    "import math\n",
+    "\n",
+    "def lat_lon_to_meters(origin_lat, origin_lon, point_lat, point_lon) -> tuple[float, float]:\n",
+    "    \"\"\"Works \"fine\" for distances less than 100km\"\"\"\n",
+    "\n",
+    "    R = 6_378_137 # Earth's radius in meters\n",
+    "\n",
+    "    origin_lat_rad = math.radians(origin_lat) # Convert latitude and longitude from degrees to radians\n",
+    "    delta_lat_rad = math.radians(point_lat - origin_lat)\n",
+    "    delta_lon_rad = math.radians(point_lon - origin_lon)\n",
+    "\n",
+    "    delta_meters_lat = delta_lat_rad * R # Calculate distance in the latitude direction (North-South)\n",
+    "\n",
+    "    delta_meters_lon = delta_lon_rad * R * math.cos(origin_lat_rad) # Calculate distance in the longitude direction (East-West)\n",
+    "\n",
+    "    return delta_meters_lat, delta_meters_lon\n",
+    "\n",
+    "origin_lat, origin_lon = df.gpsd_tpv_lat.min(), df.gpsd_tpv_lon.min()\n",
+    "\n",
+    "df[[\"target_x\", \"target_y\"]] = df.apply(\n",
+    "    lambda row: lat_lon_to_meters(origin_lat, origin_lon, row[\"gpsd_tpv_lat\"], row[\"gpsd_tpv_lon\"]),\n",
+    "    axis=1,\n",
+    "    result_type=\"expand\",\n",
+    ")\n",
+    "\n",
+    "df.drop(columns=[\"gpsd_tpv_lat\", \"gpsd_tpv_lon\"], inplace=True)\n",
+    "\n",
+    "targets = [\"target_x\", \"target_y\"] # Find target column(s)\n",
+    "\n",
+    "features, targets = df.drop(targets, axis=1), df[targets] # X are features, y are target(s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "997a14f2-afe8-4f6c-b2dd-7312a31a54b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Split generation\n",
+    "from sklearn import model_selection\n",
+    "groups = None\n",
+    "\n",
+    "cv = model_selection.KFold(\n",
+    "    n_splits=n_splits,\n",
+    "    shuffle=True,\n",
+    "    random_state=random_seed,\n",
+    ")\n",
+    "\n",
+    "indices = []\n",
+    "\n",
+    "for train_indices, test_indices in cv.split(features, targets, groups):\n",
+    "        indices.append((train_indices, test_indices))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1593cd46-3eb7-4a64-b358-fb3b7b2bd821",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PredefinedSplit(model_selection.BaseCrossValidator):\n",
+    "    \"\"\"Simple cross-validator for predefined train-test splits.\"\"\"\n",
+    "\n",
+    "    def __init__(self, indices_pairs: list[tuple[np.ndarray, np.ndarray]]):\n",
+    "        self.idx_pairs = indices_pairs\n",
+    "\n",
+    "    def get_n_splits(self, X=None, y=None, groups=None):\n",
+    "        \"\"\"Return the number of splitting iterations in the cross-validator\"\"\"\n",
+    "        return len(self.idx_pairs)\n",
+    "\n",
+    "    def split(self, X, y=None, groups=None):\n",
+    "        \"\"\"Generate indices to split data into training and test set.\"\"\"\n",
+    "        for train_idx, test_idx in self.idx_pairs:\n",
+    "            yield train_idx, test_idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d3ec933-69ce-4259-baee-2d539bcfa6b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Train&Evaluate\n",
+    "from sklearn.ensemble import RandomForestRegressor \n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error\n",
+    "cv = PredefinedSplit(indices)\n",
+    "\n",
+    "estimators = [\n",
+    "    RandomForestRegressor(random_state=42),\n",
+    "    KNeighborsRegressor()\n",
+    "]\n",
+    "params=[\n",
+    "    {\n",
+    "        \"n_estimators\": [10, 50, 100, 250, 400], \n",
+    "        \"max_depth\": [5, 10, 30, 50, 150, 200, None]\n",
+    "    },\n",
+    "    {\n",
+    "        \"n_neighbors\": [3, 5, 10], \n",
+    "        \"weights\": [\"uniform\", \"distance\"], \n",
+    "        \"p\": [1, 2], \n",
+    "        \"leaf_size\": [10, 15, 30], \n",
+    "        \"metric\": [\"minkowski\", \"euclidean\"] \n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "for index in range(2):\n",
+    "    gs = model_selection.GridSearchCV(\n",
+    "        estimator = estimators[index],\n",
+    "        param_grid = params[index],\n",
+    "        n_jobs = 4,\n",
+    "        error_score = \"raise\",\n",
+    "        refit = True,\n",
+    "        scoring = make_scorer(root_mean_squared_error, greater_is_better=False),\n",
+    "        cv = cv,\n",
+    "    )\n",
+    "    \n",
+    "    gs.fit(features, targets)\n",
+    "    \n",
+    "    results_df = pd.DataFrame(gs.cv_results_)\n",
+    "    \n",
+    "    # Select key columns to display\n",
+    "    cols_to_show = [\n",
+    "        'params',\n",
+    "        'mean_test_score',\n",
+    "        'std_test_score',\n",
+    "        'rank_test_score',\n",
+    "        'mean_fit_time',\n",
+    "        'mean_score_time',\n",
+    "    ]\n",
+    "    \n",
+    "    # Print as a table\n",
+    "    print(results_df[cols_to_show].to_string(index=False))\n",
+    "    Path(results_path).mkdir(parents=True, exist_ok=True)\n",
+    "    joblib.dump(gs.best_estimator_, results_path / f\"Model_{estimators[index].__class__.__name__}-KFoldSplit.pkl\") # Note it's only possible to get the best estimator, as the framework uses a modified version of the class to save all the models\n",
+    "    joblib.dump(results_df, results_path / f\"Results_{estimators[index].__class__.__name__}-KFoldSplit.pkl\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (nancy)",
+   "language": "python",
+   "name": "nancy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/JupyterNotebooks/02-Changed_dataset_to_logatec.ipynb b/JupyterNotebooks/02-Changed_dataset_to_logatec.ipynb
new file mode 100644
index 0000000..0edf0cd
--- /dev/null
+++ b/JupyterNotebooks/02-Changed_dataset_to_logatec.ipynb
@@ -0,0 +1,268 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91d230e0-cc84-4ad5-9005-9f166d483d03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0304a74-ede7-4e21-8929-8e06206f4d5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = Path(\"DataSets/logatec\")\n",
+    "results_path = Path(\"02-Changed_dataset_to_logatec-results\")\n",
+    "random_seed = 42\n",
+    "n_splits = 5\n",
+    "test_size = 0.20\n",
+    "subsets = [\"spring\", \"winter\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a584e20f-f567-4716-bf99-2c631e2fb789",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_raw_data(path: Path) -> pd.DataFrame:\n",
+    "    with open(path, mode=\"r\") as fp:\n",
+    "        data = json.load(fp)\n",
+    "\n",
+    "    df = []\n",
+    "\n",
+    "    for position, measurements in data.items():\n",
+    "        digits = re.findall(r\"\\d+\", position)\n",
+    "        location = tuple(int(i) for i in digits)\n",
+    "\n",
+    "        # Winter dataset has measurements only in the middle (3rd) row.\n",
+    "        if len(location) == 1:\n",
+    "            location = (3, *location)\n",
+    "\n",
+    "        assert len(location) == 2, f\"location identifier is not length 2: {location}\"\n",
+    "\n",
+    "        pos_x, pos_y = location\n",
+    "\n",
+    "        for device_id, samples in measurements.items():\n",
+    "            device_id = int(device_id)\n",
+    "            for sample in samples:\n",
+    "                timestamp, value = sample[\"timestamp\"], sample[\"rss\"]\n",
+    "\n",
+    "                item = {\"pos_x\": pos_x, \"pos_y\": pos_y, \"node\": device_id, \"timestamp\": timestamp, \"value\": value}\n",
+    "                df.append(item)\n",
+    "\n",
+    "    df = pd.DataFrame(df)\n",
+    "    df.timestamp = pd.to_datetime(df.timestamp, unit=\"s\", origin=\"unix\").astype(\"datetime64[s]\")\n",
+    "    df = df.astype({\"pos_x\": \"uint8\", \"pos_y\": \"uint8\", \"value\": \"int8\", \"node\": \"uint8\"})\n",
+    "\n",
+    "    return df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e320cd24-f026-4fae-ab48-0604c6ec8de0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Praparation\n",
+    "# We assume the dataset has been downloaded and unzipped manually\n",
+    "\n",
+    "import json\n",
+    "import re\n",
+    "\n",
+    "df = [load_raw_data(data / f\"{subsets[0]}_data.json\"), load_raw_data(data / f\"{subsets[1]}_data.json\")]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    dat = []\n",
+    "    \n",
+    "    # Average the sample value within a second.\n",
+    "    for (x, y, node, ts), subset in df[idx].groupby(by=[\"pos_x\", \"pos_y\", \"node\", \"timestamp\"]):\n",
+    "        avg_value = subset.value.sum(min_count=1) / subset.value.count()\n",
+    "        item = {\"pos_x\": x, \"pos_y\": y, \"node\": node, \"timestamp\": ts, \"value\": avg_value}\n",
+    "        dat.append(item)\n",
+    "    \n",
+    "    df[idx] = pd.DataFrame(dat)\n",
+    "    df[idx] = df[idx].pivot(index=[\"timestamp\", \"pos_x\", \"pos_y\"], columns=[\"node\"], values=[\"value\"])\n",
+    "    df[idx] = df[idx].reset_index(drop=False)\n",
+    "    \n",
+    "    # After pivot, column names become tuples. Fix that.\n",
+    "    df[idx].columns = [\"\".join(map(str, col)).strip().replace(\"value\", \"node\") for col in df[idx].columns.values]\n",
+    "    \n",
+    "    # Fill the NaN values with some extremely low RSS value\n",
+    "    df[idx] = df[idx].fillna(-180)\n",
+    "    \n",
+    "    # TODO: Should this be part of prepare-feature stage?\n",
+    "    # Remove datetime column\n",
+    "    df[idx] = df[idx].drop(columns=[\"timestamp\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "814ec0fe-4140-49f3-ae9b-8ab752ab54ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Feature generation\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    # Convert discrete values to meters\n",
+    "    df[idx].pos_x = (df[idx].pos_x - 1) * 1.2  # meters\n",
+    "    df[idx].pos_y = (df[idx].pos_y - 1) * 1.2  # meters\n",
+    "    \n",
+    "    df[idx] = df[idx].rename(columns={\"pos_x\": \"target_x\", \"pos_y\": \"target_y\"})\n",
+    "\n",
+    "# Find target column(s)\n",
+    "targets = [\"target_x\", \"target_y\"]\n",
+    "\n",
+    "# X are features, y are target(s)\n",
+    "features, targets = [df[0].drop(targets, axis=1), df[1].drop(targets, axis=1)], [df[0][targets], df[1][targets]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "997a14f2-afe8-4f6c-b2dd-7312a31a54b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Split generation\n",
+    "from sklearn import model_selection\n",
+    "\n",
+    "groups = None\n",
+    "\n",
+    "cv = model_selection.KFold(\n",
+    "        n_splits=n_splits,\n",
+    "        shuffle=True,\n",
+    "        random_state=random_seed,\n",
+    "    )\n",
+    "\n",
+    "indices = indices = [[] for _ in range(len(subsets))]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    for train_indices, test_indices in cv.split(features[idx], targets[idx], groups):\n",
+    "                indices[idx].append((train_indices, test_indices))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1593cd46-3eb7-4a64-b358-fb3b7b2bd821",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PredefinedSplit(model_selection.BaseCrossValidator):\n",
+    "    \"\"\"Simple cross-validator for predefined train-test splits.\"\"\"\n",
+    "\n",
+    "    def __init__(self, indices_pairs: list[tuple[np.ndarray, np.ndarray]]):\n",
+    "        self.idx_pairs = indices_pairs\n",
+    "\n",
+    "    def get_n_splits(self, X=None, y=None, groups=None):\n",
+    "        \"\"\"Return the number of splitting iterations in the cross-validator\"\"\"\n",
+    "        return len(self.idx_pairs)\n",
+    "\n",
+    "    def split(self, X, y=None, groups=None):\n",
+    "        \"\"\"Generate indices to split data into training and test set.\"\"\"\n",
+    "        for train_idx, test_idx in self.idx_pairs:\n",
+    "            yield train_idx, test_idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d3ec933-69ce-4259-baee-2d539bcfa6b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Train&Evaluate\n",
+    "from sklearn.ensemble import RandomForestRegressor \n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    cv = PredefinedSplit(indices[idx])\n",
+    "    \n",
+    "    estimators = [\n",
+    "        RandomForestRegressor(random_state=42),\n",
+    "        KNeighborsRegressor()\n",
+    "    ]\n",
+    "    params=[\n",
+    "        {\n",
+    "            \"n_estimators\": [10, 50, 100, 250, 400], \n",
+    "            \"max_depth\": [5, 10, 30, 50, 150, 200, None]\n",
+    "        },\n",
+    "        {\n",
+    "            \"n_neighbors\": [3, 5, 10], \n",
+    "            \"weights\": [\"uniform\", \"distance\"], \n",
+    "            \"p\": [1, 2], \n",
+    "            \"leaf_size\": [10, 15, 30], \n",
+    "            \"metric\": [\"minkowski\", \"euclidean\"] \n",
+    "        }\n",
+    "    ]\n",
+    "    \n",
+    "    for index in range(len(estimators)):\n",
+    "        gs = model_selection.GridSearchCV(\n",
+    "            estimator = estimators[index],\n",
+    "            param_grid = params[index],\n",
+    "            n_jobs = -1,\n",
+    "            error_score = \"raise\",\n",
+    "            refit = True,\n",
+    "            scoring = make_scorer(root_mean_squared_error, greater_is_better=False),\n",
+    "            cv = cv,\n",
+    "        )\n",
+    "        \n",
+    "        gs.fit(features[idx], targets[idx])\n",
+    "        \n",
+    "        results_df = pd.DataFrame(gs.cv_results_)\n",
+    "        \n",
+    "        # Select key columns to display\n",
+    "        cols_to_show = [\n",
+    "            'params',\n",
+    "            'mean_test_score',\n",
+    "            'std_test_score',\n",
+    "            'rank_test_score',\n",
+    "            'mean_fit_time',\n",
+    "            'mean_score_time',\n",
+    "        ]\n",
+    "        \n",
+    "        # Print as a table\n",
+    "        print(results_df[cols_to_show].to_string(index=False))\n",
+    "        Path(results_path).mkdir(parents=True, exist_ok=True)\n",
+    "        joblib.dump(gs.best_estimator_, results_path / f\"Model_{estimators[index].__class__.__name__}-KFoldSplit-{subset}Subset.pkl\") \n",
+    "        joblib.dump(results_df, results_path / f\"Results_{estimators[index].__class__.__name__}-KFoldSplit-{subset}Subset.pkl\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (nancy)",
+   "language": "python",
+   "name": "nancy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/JupyterNotebooks/03-Added_split_and_metric.ipynb b/JupyterNotebooks/03-Added_split_and_metric.ipynb
new file mode 100644
index 0000000..8161553
--- /dev/null
+++ b/JupyterNotebooks/03-Added_split_and_metric.ipynb
@@ -0,0 +1,281 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91d230e0-cc84-4ad5-9005-9f166d483d03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0304a74-ede7-4e21-8929-8e06206f4d5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = Path(\"DataSets/logatec\")\n",
+    "results_path = Path(\"03-Added_split_and_metric-results\")\n",
+    "random_seed = 42\n",
+    "n_splits = 5\n",
+    "test_size = 0.20\n",
+    "subsets = [\"spring\", \"winter\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a584e20f-f567-4716-bf99-2c631e2fb789",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_raw_data(path: Path) -> pd.DataFrame:\n",
+    "    with open(path, mode=\"r\") as fp:\n",
+    "        data = json.load(fp)\n",
+    "\n",
+    "    df = []\n",
+    "\n",
+    "    for position, measurements in data.items():\n",
+    "        digits = re.findall(r\"\\d+\", position)\n",
+    "        location = tuple(int(i) for i in digits)\n",
+    "\n",
+    "        # Winter dataset has measurements only in the middle (3rd) row.\n",
+    "        if len(location) == 1:\n",
+    "            location = (3, *location)\n",
+    "\n",
+    "        assert len(location) == 2, f\"location identifier is not length 2: {location}\"\n",
+    "\n",
+    "        pos_x, pos_y = location\n",
+    "\n",
+    "        for device_id, samples in measurements.items():\n",
+    "            device_id = int(device_id)\n",
+    "            for sample in samples:\n",
+    "                timestamp, value = sample[\"timestamp\"], sample[\"rss\"]\n",
+    "\n",
+    "                item = {\"pos_x\": pos_x, \"pos_y\": pos_y, \"node\": device_id, \"timestamp\": timestamp, \"value\": value}\n",
+    "                df.append(item)\n",
+    "\n",
+    "    df = pd.DataFrame(df)\n",
+    "    df.timestamp = pd.to_datetime(df.timestamp, unit=\"s\", origin=\"unix\").astype(\"datetime64[s]\")\n",
+    "    df = df.astype({\"pos_x\": \"uint8\", \"pos_y\": \"uint8\", \"value\": \"int8\", \"node\": \"uint8\"})\n",
+    "\n",
+    "    return df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e320cd24-f026-4fae-ab48-0604c6ec8de0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Praparation\n",
+    "# We assume the dataset has been downloaded and unzipped manually\n",
+    "\n",
+    "import json\n",
+    "import re\n",
+    "\n",
+    "df = [load_raw_data(data / f\"{subsets[0]}_data.json\"), load_raw_data(data / f\"{subsets[1]}_data.json\")]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    dat = []\n",
+    "    \n",
+    "    # Average the sample value within a second.\n",
+    "    for (x, y, node, ts), subset in df[idx].groupby(by=[\"pos_x\", \"pos_y\", \"node\", \"timestamp\"]):\n",
+    "        avg_value = subset.value.sum(min_count=1) / subset.value.count()\n",
+    "        item = {\"pos_x\": x, \"pos_y\": y, \"node\": node, \"timestamp\": ts, \"value\": avg_value}\n",
+    "        dat.append(item)\n",
+    "    \n",
+    "    df[idx] = pd.DataFrame(dat)\n",
+    "    df[idx] = df[idx].pivot(index=[\"timestamp\", \"pos_x\", \"pos_y\"], columns=[\"node\"], values=[\"value\"])\n",
+    "    df[idx] = df[idx].reset_index(drop=False)\n",
+    "    \n",
+    "    # After pivot, column names become tuples. Fix that.\n",
+    "    df[idx].columns = [\"\".join(map(str, col)).strip().replace(\"value\", \"node\") for col in df[idx].columns.values]\n",
+    "    \n",
+    "    # Fill the NaN values with some extremely low RSS value\n",
+    "    df[idx] = df[idx].fillna(-180)\n",
+    "    \n",
+    "    # TODO: Should this be part of prepare-feature stage?\n",
+    "    # Remove datetime column\n",
+    "    df[idx] = df[idx].drop(columns=[\"timestamp\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "814ec0fe-4140-49f3-ae9b-8ab752ab54ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Feature generation\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    # Convert discrete values to meters\n",
+    "    df[idx].pos_x = (df[idx].pos_x - 1) * 1.2  # meters\n",
+    "    df[idx].pos_y = (df[idx].pos_y - 1) * 1.2  # meters\n",
+    "    \n",
+    "    df[idx] = df[idx].rename(columns={\"pos_x\": \"target_x\", \"pos_y\": \"target_y\"})\n",
+    "\n",
+    "# Find target column(s)\n",
+    "targets = [\"target_x\", \"target_y\"]\n",
+    "\n",
+    "# X are features, y are target(s)\n",
+    "features, targets = [df[0].drop(targets, axis=1), df[1].drop(targets, axis=1)], [df[0][targets], df[1][targets]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "997a14f2-afe8-4f6c-b2dd-7312a31a54b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Split generation\n",
+    "from sklearn import model_selection\n",
+    "\n",
+    "groups = None\n",
+    "\n",
+    "cv = [model_selection.KFold(\n",
+    "        n_splits=n_splits,\n",
+    "        shuffle=True,\n",
+    "        random_state=random_seed,\n",
+    "    ),\n",
+    "    model_selection.ShuffleSplit(\n",
+    "        n_splits=n_splits,\n",
+    "        test_size=test_size,\n",
+    "        random_state=random_seed,\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "cv_name = [\"KFold\", \"Random\"]\n",
+    "\n",
+    "indices = indices = [[[] for _ in range(len(cv))] for _ in range(len(subsets))]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    for i in range(len(cv)):\n",
+    "        for train_indices, test_indices in cv[i].split(features[idx], targets[idx], groups):\n",
+    "                indices[idx][i].append((train_indices, test_indices))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1593cd46-3eb7-4a64-b358-fb3b7b2bd821",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PredefinedSplit(model_selection.BaseCrossValidator):\n",
+    "    \"\"\"Simple cross-validator for predefined train-test splits.\"\"\"\n",
+    "\n",
+    "    def __init__(self, indices_pairs: list[tuple[np.ndarray, np.ndarray]]):\n",
+    "        self.idx_pairs = indices_pairs\n",
+    "\n",
+    "    def get_n_splits(self, X=None, y=None, groups=None):\n",
+    "        \"\"\"Return the number of splitting iterations in the cross-validator\"\"\"\n",
+    "        return len(self.idx_pairs)\n",
+    "\n",
+    "    def split(self, X, y=None, groups=None):\n",
+    "        \"\"\"Generate indices to split data into training and test set.\"\"\"\n",
+    "        for train_idx, test_idx in self.idx_pairs:\n",
+    "            yield train_idx, test_idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d3ec933-69ce-4259-baee-2d539bcfa6b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Train&Evaluate\n",
+    "from sklearn.ensemble import RandomForestRegressor \n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error, r2_score\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    cv = [PredefinedSplit(indices[idx][0]), PredefinedSplit(indices[idx][1])]\n",
+    "    \n",
+    "    estimators = [\n",
+    "        RandomForestRegressor(random_state=42),\n",
+    "        KNeighborsRegressor()\n",
+    "    ]\n",
+    "    params=[\n",
+    "        {\n",
+    "            \"n_estimators\": [10, 50, 100, 250, 400], \n",
+    "            \"max_depth\": [5, 10, 30, 50, 150, 200, None]\n",
+    "        },\n",
+    "        {\n",
+    "            \"n_neighbors\": [3, 5, 10], \n",
+    "            \"weights\": [\"uniform\", \"distance\"], \n",
+    "            \"p\": [1, 2], \n",
+    "            \"leaf_size\": [10, 15, 30], \n",
+    "            \"metric\": [\"minkowski\", \"euclidean\"] \n",
+    "        }\n",
+    "    ]\n",
+    "    \n",
+    "    for split_index in range(len(cv)):\n",
+    "        for index in range(len(estimators)):\n",
+    "            gs = model_selection.GridSearchCV(\n",
+    "                estimator = estimators[index],\n",
+    "                param_grid = params[index],\n",
+    "                n_jobs = -1,\n",
+    "                error_score = \"raise\",\n",
+    "                refit = \"rmse\",\n",
+    "                scoring = {\"rmse\": make_scorer(root_mean_squared_error, greater_is_better=False), \"r_squared\": make_scorer(r2_score, greater_is_better=True)},\n",
+    "                cv = cv[split_index],\n",
+    "            )\n",
+    "            \n",
+    "            gs.fit(features[idx], targets[idx])\n",
+    "            \n",
+    "            results_df = pd.DataFrame(gs.cv_results_)\n",
+    "            \n",
+    "            # Select key columns to display\n",
+    "            cols_to_show = [\n",
+    "                'params',\n",
+    "                'mean_test_rmse',\n",
+    "                'std_test_rmse',\n",
+    "                'rank_test_rmse',\n",
+    "                'mean_test_r_squared',\n",
+    "                'std_test_r_squared',\n",
+    "                'rank_test_r_squared',\n",
+    "                'mean_fit_time',\n",
+    "                'mean_score_time',\n",
+    "            ]\n",
+    "            \n",
+    "            # Print as a table\n",
+    "            print(results_df[cols_to_show].to_string(index=False))\n",
+    "            Path(results_path).mkdir(parents=True, exist_ok=True)\n",
+    "            joblib.dump(gs.best_estimator_, results_path / f\"Model_{estimators[index].__class__.__name__}-{cv_name[split_index]}Split-{subset}Subset.pkl\") \n",
+    "            joblib.dump(results_df, results_path / f\"Results_{estimators[index].__class__.__name__}-{cv_name[split_index]}Split-{subset}Subset.pkl\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (nancy)",
+   "language": "python",
+   "name": "nancy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/JupyterNotebooks/04-Added_automl_model.ipynb b/JupyterNotebooks/04-Added_automl_model.ipynb
new file mode 100644
index 0000000..cd92012
--- /dev/null
+++ b/JupyterNotebooks/04-Added_automl_model.ipynb
@@ -0,0 +1,477 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91d230e0-cc84-4ad5-9005-9f166d483d03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0304a74-ede7-4e21-8929-8e06206f4d5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = Path(\"DataSets/logatec\")\n",
+    "results_path = Path(\"04-Added_automl_model-results\")\n",
+    "random_seed = 42\n",
+    "n_splits = 5\n",
+    "test_size = 0.20\n",
+    "subsets = [\"spring\", \"winter\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a584e20f-f567-4716-bf99-2c631e2fb789",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_raw_data(path: Path) -> pd.DataFrame:\n",
+    "    with open(path, mode=\"r\") as fp:\n",
+    "        data = json.load(fp)\n",
+    "\n",
+    "    df = []\n",
+    "\n",
+    "    for position, measurements in data.items():\n",
+    "        digits = re.findall(r\"\\d+\", position)\n",
+    "        location = tuple(int(i) for i in digits)\n",
+    "\n",
+    "        # Winter dataset has measurements only in the middle (3rd) row.\n",
+    "        if len(location) == 1:\n",
+    "            location = (3, *location)\n",
+    "\n",
+    "        assert len(location) == 2, f\"location identifier is not length 2: {location}\"\n",
+    "\n",
+    "        pos_x, pos_y = location\n",
+    "\n",
+    "        for device_id, samples in measurements.items():\n",
+    "            device_id = int(device_id)\n",
+    "            for sample in samples:\n",
+    "                timestamp, value = sample[\"timestamp\"], sample[\"rss\"]\n",
+    "\n",
+    "                item = {\"pos_x\": pos_x, \"pos_y\": pos_y, \"node\": device_id, \"timestamp\": timestamp, \"value\": value}\n",
+    "                df.append(item)\n",
+    "\n",
+    "    df = pd.DataFrame(df)\n",
+    "    df.timestamp = pd.to_datetime(df.timestamp, unit=\"s\", origin=\"unix\").astype(\"datetime64[s]\")\n",
+    "    df = df.astype({\"pos_x\": \"uint8\", \"pos_y\": \"uint8\", \"value\": \"int8\", \"node\": \"uint8\"})\n",
+    "\n",
+    "    return df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e320cd24-f026-4fae-ab48-0604c6ec8de0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Praparation\n",
+    "# We assume the dataset has been downloaded and unzipped manually\n",
+    "\n",
+    "import json\n",
+    "import re\n",
+    "\n",
+    "df = [load_raw_data(data / f\"{subsets[0]}_data.json\"), load_raw_data(data / f\"{subsets[1]}_data.json\")]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    dat = []\n",
+    "    \n",
+    "    # Average the sample value within a second.\n",
+    "    for (x, y, node, ts), subset in df[idx].groupby(by=[\"pos_x\", \"pos_y\", \"node\", \"timestamp\"]):\n",
+    "        avg_value = subset.value.sum(min_count=1) / subset.value.count()\n",
+    "        item = {\"pos_x\": x, \"pos_y\": y, \"node\": node, \"timestamp\": ts, \"value\": avg_value}\n",
+    "        dat.append(item)\n",
+    "    \n",
+    "    df[idx] = pd.DataFrame(dat)\n",
+    "    df[idx] = df[idx].pivot(index=[\"timestamp\", \"pos_x\", \"pos_y\"], columns=[\"node\"], values=[\"value\"])\n",
+    "    df[idx] = df[idx].reset_index(drop=False)\n",
+    "    \n",
+    "    # After pivot, column names become tuples. Fix that.\n",
+    "    df[idx].columns = [\"\".join(map(str, col)).strip().replace(\"value\", \"node\") for col in df[idx].columns.values]\n",
+    "    \n",
+    "    # Fill the NaN values with some extremely low RSS value\n",
+    "    df[idx] = df[idx].fillna(-180)\n",
+    "    \n",
+    "    # TODO: Should this be part of prepare-feature stage?\n",
+    "    # Remove datetime column\n",
+    "    df[idx] = df[idx].drop(columns=[\"timestamp\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "814ec0fe-4140-49f3-ae9b-8ab752ab54ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Feature generation\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    # Convert discrete values to meters\n",
+    "    df[idx].pos_x = (df[idx].pos_x - 1) * 1.2  # meters\n",
+    "    df[idx].pos_y = (df[idx].pos_y - 1) * 1.2  # meters\n",
+    "    \n",
+    "    df[idx] = df[idx].rename(columns={\"pos_x\": \"target_x\", \"pos_y\": \"target_y\"})\n",
+    "\n",
+    "# Find target column(s)\n",
+    "targets = [\"target_x\", \"target_y\"]\n",
+    "\n",
+    "# X are features, y are target(s)\n",
+    "features, targets = [df[0].drop(targets, axis=1), df[1].drop(targets, axis=1)], [df[0][targets], df[1][targets]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "997a14f2-afe8-4f6c-b2dd-7312a31a54b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Split generation\n",
+    "from sklearn import model_selection\n",
+    "\n",
+    "groups = None\n",
+    "\n",
+    "cv = [model_selection.KFold(\n",
+    "        n_splits=n_splits,\n",
+    "        shuffle=True,\n",
+    "        random_state=random_seed,\n",
+    "    ),\n",
+    "    model_selection.ShuffleSplit(\n",
+    "        n_splits=n_splits,\n",
+    "        test_size=test_size,\n",
+    "        random_state=random_seed,\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "cv_name = [\"KFold\", \"Random\"]\n",
+    "\n",
+    "split_indices = [[[] for _ in range(len(cv))] for _ in range(len(subsets))]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    for i in range(len(cv)):\n",
+    "        for train_indices, test_indices in cv[i].split(features[idx], targets[idx], groups):\n",
+    "                split_indices[idx][i].append((train_indices, test_indices))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1593cd46-3eb7-4a64-b358-fb3b7b2bd821",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PredefinedSplit(model_selection.BaseCrossValidator):\n",
+    "    \"\"\"Simple cross-validator for predefined train-test splits.\"\"\"\n",
+    "\n",
+    "    def __init__(self, indices_pairs: list[tuple[np.ndarray, np.ndarray]]):\n",
+    "        self.idx_pairs = indices_pairs\n",
+    "\n",
+    "    def get_n_splits(self, X=None, y=None, groups=None):\n",
+    "        \"\"\"Return the number of splitting iterations in the cross-validator\"\"\"\n",
+    "        return len(self.idx_pairs)\n",
+    "\n",
+    "    def split(self, X, y=None, groups=None):\n",
+    "        \"\"\"Generate indices to split data into training and test set.\"\"\"\n",
+    "        for train_idx, test_idx in self.idx_pairs:\n",
+    "            yield train_idx, test_idx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d3ec933-69ce-4259-baee-2d539bcfa6b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Train&Evaluate\n",
+    "from sklearn.ensemble import RandomForestRegressor \n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error, r2_score\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    cv = [PredefinedSplit(split_indices[idx][0]), PredefinedSplit(split_indices[idx][1])]\n",
+    "    \n",
+    "    estimators = [\n",
+    "        RandomForestRegressor(random_state=42),\n",
+    "        KNeighborsRegressor()\n",
+    "    ]\n",
+    "    params=[\n",
+    "        {\n",
+    "            \"n_estimators\": [10, 50, 100, 250, 400], \n",
+    "            \"max_depth\": [5, 10, 30, 50, 150, 200, None]\n",
+    "        },\n",
+    "        {\n",
+    "            \"n_neighbors\": [3, 5, 10], \n",
+    "            \"weights\": [\"uniform\", \"distance\"], \n",
+    "            \"p\": [1, 2], \n",
+    "            \"leaf_size\": [10, 15, 30], \n",
+    "            \"metric\": [\"minkowski\", \"euclidean\"] \n",
+    "        }\n",
+    "    ]\n",
+    "    \n",
+    "    for split_index in range(len(cv)):\n",
+    "        for index in range(len(estimators)):\n",
+    "            gs = model_selection.GridSearchCV(\n",
+    "                estimator = estimators[index],\n",
+    "                param_grid = params[index],\n",
+    "                n_jobs = -1,\n",
+    "                error_score = \"raise\",\n",
+    "                refit = \"rmse\",\n",
+    "                scoring = {\"rmse\": make_scorer(root_mean_squared_error, greater_is_better=False), \"r_squared\": make_scorer(r2_score, greater_is_better=True)},\n",
+    "                cv = cv[split_index],\n",
+    "            )\n",
+    "            \n",
+    "            gs.fit(features[idx], targets[idx])\n",
+    "            \n",
+    "            results_df = pd.DataFrame(gs.cv_results_)\n",
+    "            \n",
+    "            # Select key columns to display\n",
+    "            cols_to_show = [\n",
+    "                'params',\n",
+    "                'mean_test_rmse',\n",
+    "                'std_test_rmse',\n",
+    "                'rank_test_rmse',\n",
+    "                'mean_test_r_squared',\n",
+    "                'std_test_r_squared',\n",
+    "                'rank_test_r_squared',\n",
+    "                'mean_fit_time',\n",
+    "                'mean_score_time',\n",
+    "            ]\n",
+    "            \n",
+    "            # Print as a table\n",
+    "            print(results_df[cols_to_show].to_string(index=False))\n",
+    "            Path(results_path).mkdir(parents=True, exist_ok=True)\n",
+    "            joblib.dump(gs.best_estimator_, results_path / f\"Model_{estimators[index].__class__.__name__}-{cv_name[split_index]}Split-{subset}Subset.pkl\") \n",
+    "            joblib.dump(results_df, results_path / f\"Results_{estimators[index].__class__.__name__}-{cv_name[split_index]}Split-{subset}Subset.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "544a5a45-c77b-4d13-91c6-62c39d71ded0",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"TF_DETERMINISTIC_OPS\"] = \"1\"\n",
+    "import tensorflow as tf\n",
+    "tf.config.experimental.enable_op_determinism()\n",
+    "import autokeras as ak\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import keras\n",
+    "import time\n",
+    "import gc\n",
+    "import shutil\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error, r2_score\n",
+    "from  multiprocess import Process\n",
+    "\n",
+    "def get_best_models(num_models, auto_model):\n",
+    "    top_trials = auto_model.tuner.oracle.get_best_trials(num_models)\n",
+    "    for trial in top_trials:\n",
+    "        model = auto_model.tuner.load_model(trial)\n",
+    "        yield model, trial.hyperparameters\n",
+    "\n",
+    "metrics = {\"rmse\": root_mean_squared_error, \"r_squared\": r2_score}\n",
+    "\n",
+    "def run_candidate(**kwargs):\n",
+    "    features      = kwargs[\"features\"]\n",
+    "    targets       = kwargs[\"targets\"]\n",
+    "    metrics       = kwargs[\"metrics\"]\n",
+    "    subset        = kwargs[\"subset\"]\n",
+    "    cv_name       = kwargs[\"cv_name\"]\n",
+    "    split_index   = kwargs[\"split_index\"]\n",
+    "    idx           = kwargs[\"idx\"]\n",
+    "    random_seed   = kwargs.get(\"random_seed\", 42)\n",
+    "    test_size     = kwargs.get(\"test_size\", 0.2)\n",
+    "    results_path  = kwargs[\"results_path\"]\n",
+    "    split_indices = kwargs[\"split_indices\"]\n",
+    "\n",
+    "    tf.config.experimental.enable_op_determinism()\n",
+    "    keras.utils.set_random_seed(random_seed)\n",
+    "    \n",
+    "    inputs  = [ak.Input(name=\"data_input\")]\n",
+    "    outputs = [ak.RegressionHead(name=\"x_out\"), ak.RegressionHead(name=\"y_out\")]\n",
+    "    \n",
+    "    keras.utils.set_random_seed(random_seed)\n",
+    "    \n",
+    "    auto_model = ak.AutoModel(\n",
+    "        inputs       = inputs,\n",
+    "        outputs      = outputs,\n",
+    "        seed         = 42,\n",
+    "        max_trials   = 10,\n",
+    "        overwrite    = True,\n",
+    "        directory    = results_path,\n",
+    "        project_name = f\"ExampleModel-{subset}-{cv_name[split_index]}\"\n",
+    "    )\n",
+    "\n",
+    "    print(f\"{subset}-{cv_name[split_index]}\")\n",
+    "    preped_features = [features[idx].to_numpy()]\n",
+    "    preped_targets  = np.hsplit(targets[idx].to_numpy(), 2)\n",
+    "\n",
+    "    n_samples = preped_features[0].shape[0]\n",
+    "    indices   = np.arange(n_samples)\n",
+    "    \n",
+    "    train_indices, val_indices = train_test_split(\n",
+    "        indices, \n",
+    "        test_size    = test_size, \n",
+    "        random_state = random_seed, \n",
+    "        shuffle      = True\n",
+    "    )\n",
+    "\n",
+    "    X_train_list = [_[train_indices] for _ in preped_features]\n",
+    "    X_val_list   = [_[val_indices] for _ in preped_features]\n",
+    "    y_train_list = [_[train_indices] for _ in preped_targets]\n",
+    "    y_val_list   = [_[val_indices] for _ in preped_targets]\n",
+    "\n",
+    "\n",
+    "\n",
+    "    auto_model.fit(\n",
+    "        X_train_list, \n",
+    "        y_train_list, \n",
+    "        validation_data = (X_val_list, y_val_list), \n",
+    "        verbose         = 2\n",
+    "    )\n",
+    "\n",
+    "    save_top_n = max(1, int(len(auto_model.tuner.oracle.trials) * 0.1))\n",
+    "\n",
+    "    cv = [PredefinedSplit(split_indices[idx][i]) for i in range(len(cv_name))]\n",
+    "    \n",
+    "    results = []\n",
+    "    for idx, (model, hyperparameters) in enumerate(get_best_models(save_top_n, auto_model)):\n",
+    "        print(f\"\\nProcessing model {idx + 1} out of {save_top_n}\")\n",
+    "\n",
+    "        optimizer = model.optimizer\n",
+    "        del model\n",
+    "\n",
+    "        scores        = {name: [] for name in metrics.keys()}\n",
+    "        train_times   = []\n",
+    "        predict_times = []\n",
+    "\n",
+    "        for split_idx, (train_indices, test_indices) in enumerate(\n",
+    "            cv[split_index].split(preped_features[0], preped_targets[0])\n",
+    "        ):\n",
+    "\n",
+    "            print(f\"\\tProcessing split {split_idx + 1} out of {cv[split_index].get_n_splits()}\")\n",
+    "            \n",
+    "            keras.utils.set_random_seed(random_seed)\n",
+    "            model         = auto_model.tuner.hypermodel.build(hyperparameters)\n",
+    "            new_optimizer = type(optimizer).from_config(optimizer.get_config())\n",
+    "            model.compile(optimizer = new_optimizer, loss=\"mse\")\n",
+    "\n",
+    "            save_path  = os.path.join(results_path / f\"ExampleModel-{subset}-{cv_name[split_index]}\", f\"model-{idx}-{split_idx}.keras\")\n",
+    "            model.save(save_path)\n",
+    "            model_size = os.path.getsize(save_path)\n",
+    "\n",
+    "            X_train = [x[train_indices] for x in preped_features]\n",
+    "            X_test  = [x[test_indices]  for x in preped_features]\n",
+    "            y_train = [y[train_indices] for y in preped_targets]\n",
+    "            y_test  = [y[test_indices]  for y in preped_targets]\n",
+    "\n",
+    "            #train\n",
+    "            start_time = time.perf_counter()\n",
+    "            model.fit(\n",
+    "                X_train, y_train, \n",
+    "                validation_data = (X_test, y_test), \n",
+    "                epochs          = 1000, \n",
+    "                callbacks       = [keras.callbacks.EarlyStopping(patience=10, min_delta=1e-4)],\n",
+    "                verbose         = 2\n",
+    "            )\n",
+    "            train_times.append(time.perf_counter() - start_time)\n",
+    "\n",
+    "            # predict\n",
+    "            start_time   = time.perf_counter()\n",
+    "            y_pred       = model.predict(X_test, batch_size=32, verbose=2)\n",
+    "            y_pred       = np.squeeze(np.stack(y_pred, axis=0), axis=-1)\n",
+    "            predict_time = time.perf_counter() - start_time\n",
+    "            predict_times.append(predict_time)\n",
+    "\n",
+    "            y_test = np.squeeze(np.stack(y_test, axis=0), axis=-1)\n",
+    "            for name, func in metrics.items():\n",
+    "                scores[name].append(func(y_test, y_pred))\n",
+    "\n",
+    "            del model\n",
+    "            keras.backend.clear_session()\n",
+    "            gc.collect()\n",
+    "            \n",
+    "        results.append({\n",
+    "            \"scores\": {\n",
+    "                name: {\n",
+    "                    \"mean\": np.mean(arr), \n",
+    "                    \"std\": np.std(arr)\n",
+    "                } \n",
+    "                for name, arr in scores.items()\n",
+    "            },\n",
+    "            \"params\"   : hyperparameters.values,\n",
+    "            \"fit_time\" : {\n",
+    "                \"mean\": np.mean(train_times), \n",
+    "                \"std\" : np.std(train_times)\n",
+    "            },\n",
+    "            \"score_time\": {\n",
+    "                \"mean\": np.mean(predict_times), \n",
+    "                \"std\" : np.std(predict_times)\n",
+    "            }\n",
+    "        })\n",
+    "    print(results, flush = True)\n",
+    "    joblib.dump(results, results_path / f\"Results_ExampleModel-{cv_name[split_index]}Split-{subset}Subset.pkl\")\n",
+    "\n",
+    "    shutil.rmtree(results_path / f\"ExampleModel-{subset}-{cv_name[split_index]}\")\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    for split_index in range(len(cv)):\n",
+    "        # We use multiprocess (the multiprocessing library doesn't work) to isolate each training run.\n",
+    "        # This resolves an issue where model training is dependant on the order in which the models are trained,\n",
+    "        # this is due to a state persisting between runs (despite all the efforts to reset the environment).\n",
+    "        \n",
+    "        p   = Process(target=run_candidate, kwargs={\n",
+    "            \"features\"      : features,\n",
+    "            \"targets\"       : targets,\n",
+    "            \"metrics\"       : metrics,\n",
+    "            \"subset\"        : subset,\n",
+    "            \"cv_name\"       : cv_name,\n",
+    "            \"split_index\"   : split_index,\n",
+    "            \"idx\"           : idx,\n",
+    "            \"random_seed\"   : random_seed,\n",
+    "            \"test_size\"     : test_size,\n",
+    "            \"results_path\"  : results_path,\n",
+    "            \"split_indices\" : split_indices\n",
+    "        })\n",
+    "        p.start()\n",
+    "        p.join()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (nancy)",
+   "language": "python",
+   "name": "nancy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/JupyterNotebooks/Benchmarking.ipynb b/JupyterNotebooks/Benchmarking.ipynb
new file mode 100644
index 0000000..547fd43
--- /dev/null
+++ b/JupyterNotebooks/Benchmarking.ipynb
@@ -0,0 +1,510 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e17b8d4d-6f08-47e5-af37-b2f891b3d5d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import importlib.util\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"TF_DETERMINISTIC_OPS\"] = \"1\"\n",
+    "os.environ[\"PYTHONHASHSEED\"] = \"42\"\n",
+    "\n",
+    "target_path = os.path.abspath(\n",
+    "    os.path.join(os.getcwd(), \"..\", \"performance.py\")\n",
+    ")\n",
+    "\n",
+    "spec = importlib.util.spec_from_file_location(\"performance\", target_path)\n",
+    "performance = importlib.util.module_from_spec(spec)\n",
+    "spec.loader.exec_module(performance)\n",
+    "\n",
+    "globals().update({k: v for k, v in performance.__dict__.items() if not k.startswith(\"__\")})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91d230e0-cc84-4ad5-9005-9f166d483d03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import sklearn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92d10d68-95d7-475f-b2e3-13b44ee79c4f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = Path(\"DataSets/logatec\")\n",
+    "results_path = Path(\"Benchmarking-results\")\n",
+    "random_seed = 42\n",
+    "n_splits = 5\n",
+    "test_size = 0.20\n",
+    "subsets = [\"spring\", \"winter\"]\n",
+    "\n",
+    "Path(results_path).mkdir(parents=True, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e320cd24-f026-4fae-ab48-0604c6ec8de0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_resource_monitor()\n",
+    "\n",
+    "# Praparation\n",
+    "# We assume the dataset has been downloaded and unzipped manually\n",
+    "\n",
+    "import json\n",
+    "import re\n",
+    "\n",
+    "def load_raw_data(path: Path) -> pd.DataFrame:\n",
+    "    with open(path, mode=\"r\") as fp:\n",
+    "        data = json.load(fp)\n",
+    "\n",
+    "    df = []\n",
+    "\n",
+    "    for position, measurements in data.items():\n",
+    "        digits = re.findall(r\"\\d+\", position)\n",
+    "        location = tuple(int(i) for i in digits)\n",
+    "\n",
+    "        # Winter dataset has measurements only in the middle (3rd) row.\n",
+    "        if len(location) == 1:\n",
+    "            location = (3, *location)\n",
+    "\n",
+    "        assert len(location) == 2, f\"location identifier is not length 2: {location}\"\n",
+    "\n",
+    "        pos_x, pos_y = location\n",
+    "\n",
+    "        for device_id, samples in measurements.items():\n",
+    "            device_id = int(device_id)\n",
+    "            for sample in samples:\n",
+    "                timestamp, value = sample[\"timestamp\"], sample[\"rss\"]\n",
+    "\n",
+    "                item = {\"pos_x\": pos_x, \"pos_y\": pos_y, \"node\": device_id, \"timestamp\": timestamp, \"value\": value}\n",
+    "                df.append(item)\n",
+    "\n",
+    "    df = pd.DataFrame(df)\n",
+    "    df.timestamp = pd.to_datetime(df.timestamp, unit=\"s\", origin=\"unix\").astype(\"datetime64[s]\")\n",
+    "    df = df.astype({\"pos_x\": \"uint8\", \"pos_y\": \"uint8\", \"value\": \"int8\", \"node\": \"uint8\"})\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "df = [load_raw_data(data / f\"{subsets[i]}_data.json\") for i in range(len(subsets))]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    dat = []\n",
+    "    \n",
+    "    # Average the sample value within a second.\n",
+    "    for (x, y, node, ts), subset in df[idx].groupby(by=[\"pos_x\", \"pos_y\", \"node\", \"timestamp\"]):\n",
+    "        avg_value = subset.value.sum(min_count=1) / subset.value.count()\n",
+    "        item = {\"pos_x\": x, \"pos_y\": y, \"node\": node, \"timestamp\": ts, \"value\": avg_value}\n",
+    "        dat.append(item)\n",
+    "    \n",
+    "    df[idx] = pd.DataFrame(dat)\n",
+    "    df[idx] = df[idx].pivot(index=[\"timestamp\", \"pos_x\", \"pos_y\"], columns=[\"node\"], values=[\"value\"])\n",
+    "    df[idx] = df[idx].reset_index(drop=False)\n",
+    "    \n",
+    "    # After pivot, column names become tuples. Fix that.\n",
+    "    df[idx].columns = [\"\".join(map(str, col)).strip().replace(\"value\", \"node\") for col in df[idx].columns.values]\n",
+    "    \n",
+    "    # Fill the NaN values with some extremely low RSS value\n",
+    "    df[idx] = df[idx].fillna(-180)\n",
+    "    \n",
+    "    # TODO: Should this be part of prepare-feature stage?\n",
+    "    # Remove datetime column\n",
+    "    df[idx] = df[idx].drop(columns=[\"timestamp\"])\n",
+    "\n",
+    "stop_resource_monitor(results_path / f\"prepare_usage.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "814ec0fe-4140-49f3-ae9b-8ab752ab54ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_resource_monitor()\n",
+    "\n",
+    "#Feature generation\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    # Convert discrete values to meters\n",
+    "    df[idx].pos_x = (df[idx].pos_x - 1) * 1.2  # meters\n",
+    "    df[idx].pos_y = (df[idx].pos_y - 1) * 1.2  # meters\n",
+    "    \n",
+    "    df[idx] = df[idx].rename(columns={\"pos_x\": \"target_x\", \"pos_y\": \"target_y\"})\n",
+    "\n",
+    "# Find target column(s)\n",
+    "targets = [\"target_x\", \"target_y\"]\n",
+    "\n",
+    "# X are features, y are target(s)\n",
+    "features, targets = [df[i].drop(targets, axis=1) for i in range(len(subsets))], [df[i][targets] for i in range(len(subsets))]\n",
+    "\n",
+    "stop_resource_monitor(results_path / f\"featurize_usage.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "997a14f2-afe8-4f6c-b2dd-7312a31a54b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_resource_monitor()\n",
+    "\n",
+    "#Split generation\n",
+    "from sklearn import model_selection\n",
+    "\n",
+    "class PredefinedSplit(model_selection.BaseCrossValidator):\n",
+    "    \"\"\"Simple cross-validator for predefined train-test splits.\"\"\"\n",
+    "\n",
+    "    def __init__(self, indices_pairs: list[tuple[np.ndarray, np.ndarray]]):\n",
+    "        self.idx_pairs = indices_pairs\n",
+    "\n",
+    "    def get_n_splits(self, X=None, y=None, groups=None):\n",
+    "        \"\"\"Return the number of splitting iterations in the cross-validator\"\"\"\n",
+    "        return len(self.idx_pairs)\n",
+    "\n",
+    "    def split(self, X, y=None, groups=None):\n",
+    "        \"\"\"Generate indices to split data into training and test set.\"\"\"\n",
+    "        for train_idx, test_idx in self.idx_pairs:\n",
+    "            yield train_idx, test_idx\n",
+    "\n",
+    "groups = None\n",
+    "\n",
+    "cv = [model_selection.KFold(\n",
+    "        n_splits=n_splits,\n",
+    "        shuffle=True,\n",
+    "        random_state=random_seed,\n",
+    "    ),\n",
+    "    model_selection.ShuffleSplit(\n",
+    "        n_splits=n_splits,\n",
+    "        test_size=test_size,\n",
+    "        random_state=random_seed,\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "cv_name = [\"KFold\", \"Random\"]\n",
+    "\n",
+    "split_indices = [[[] for _ in range(len(cv))] for _ in range(len(subsets))]\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    for i in range(len(cv)):\n",
+    "        for train_indices, test_indices in cv[i].split(features[idx], targets[idx], groups):\n",
+    "                split_indices[idx][i].append((train_indices, test_indices))\n",
+    "\n",
+    "stop_resource_monitor(results_path / f\"split_usage.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d3ec933-69ce-4259-baee-2d539bcfa6b7",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "start_resource_monitor()\n",
+    "\n",
+    "#Train&Evaluate\n",
+    "from sklearn.ensemble import RandomForestRegressor \n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error, r2_score\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    cv = [PredefinedSplit(split_indices[idx][i])for i in range(len(cv_name))]\n",
+    "    \n",
+    "    estimators = [\n",
+    "        RandomForestRegressor(random_state=42),\n",
+    "        KNeighborsRegressor()\n",
+    "    ]\n",
+    "    params=[\n",
+    "        {\n",
+    "            \"n_estimators\": [10, 50, 100, 250, 400], \n",
+    "            \"max_depth\": [5, 10, 30, 50, 150, 200, None]\n",
+    "        },\n",
+    "        {\n",
+    "            \"n_neighbors\": [3, 5, 10], \n",
+    "            \"weights\": [\"uniform\", \"distance\"], \n",
+    "            \"p\": [1, 2], \n",
+    "            \"leaf_size\": [10, 15, 30], \n",
+    "            \"metric\": [\"minkowski\", \"euclidean\"] \n",
+    "        }\n",
+    "    ]\n",
+    "    \n",
+    "    for split_index in range(len(cv)):\n",
+    "        for index in range(len(estimators)):\n",
+    "            print(f\"{subset}-{cv_name[split_index]}-{estimators[index].__class__.__name__}\")\n",
+    "            gs = model_selection.GridSearchCV(\n",
+    "                estimator = estimators[index],\n",
+    "                param_grid = params[index],\n",
+    "                n_jobs = 5,\n",
+    "                error_score = \"raise\",\n",
+    "                refit = \"rmse\",\n",
+    "                scoring = {\"rmse\": make_scorer(root_mean_squared_error, greater_is_better=False), \"r_squared\": make_scorer(r2_score, greater_is_better=True)},\n",
+    "                cv = cv[split_index],\n",
+    "            )\n",
+    "\n",
+    "\n",
+    "            gs.fit(features[idx], targets[idx])\n",
+    "            \n",
+    "            results_df = pd.DataFrame(gs.cv_results_)\n",
+    "            \n",
+    "            # Select key columns to display\n",
+    "            cols_to_show = [\n",
+    "                'params',\n",
+    "                'mean_test_rmse',\n",
+    "                'std_test_rmse',\n",
+    "                'rank_test_rmse',\n",
+    "                'mean_test_r_squared',\n",
+    "                'std_test_r_squared',\n",
+    "                'rank_test_r_squared',\n",
+    "                'mean_fit_time',\n",
+    "                'mean_score_time',\n",
+    "            ]\n",
+    "            \n",
+    "            #print(results_df[cols_to_show].to_string(index=False))\n",
+    "            Path(results_path).mkdir(parents=True, exist_ok=True)\n",
+    "            joblib.dump(gs.best_estimator_, results_path / f\"Model_{estimators[index].__class__.__name__}-{cv_name[split_index]}Split-{subset}Subset.pkl\") \n",
+    "            joblib.dump(results_df, results_path / f\"Results_{estimators[index].__class__.__name__}-{cv_name[split_index]}Split-{subset}Subset.pkl\")\n",
+    "\n",
+    "stop_resource_monitor(results_path / f\"gridsearch_usage.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "544a5a45-c77b-4d13-91c6-62c39d71ded0",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "start_resource_monitor()\n",
+    "\n",
+    "import os\n",
+    "os.environ[\"TF_DETERMINISTIC_OPS\"] = \"1\"\n",
+    "import tensorflow as tf\n",
+    "tf.config.experimental.enable_op_determinism()\n",
+    "import autokeras as ak\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import keras\n",
+    "import time\n",
+    "import gc\n",
+    "import shutil\n",
+    "from sklearn.metrics import make_scorer, root_mean_squared_error, r2_score\n",
+    "from  multiprocess import Process\n",
+    "\n",
+    "def get_best_models(num_models, auto_model):\n",
+    "    top_trials = auto_model.tuner.oracle.get_best_trials(num_models)\n",
+    "    for trial in top_trials:\n",
+    "        model = auto_model.tuner.load_model(trial)\n",
+    "        yield model, trial.hyperparameters\n",
+    "\n",
+    "metrics = {\"rmse\": root_mean_squared_error, \"r_squared\": r2_score}\n",
+    "\n",
+    "def run_candidate(**kwargs):\n",
+    "    features      = kwargs[\"features\"]\n",
+    "    targets       = kwargs[\"targets\"]\n",
+    "    metrics       = kwargs[\"metrics\"]\n",
+    "    subset        = kwargs[\"subset\"]\n",
+    "    cv_name       = kwargs[\"cv_name\"]\n",
+    "    split_index   = kwargs[\"split_index\"]\n",
+    "    idx           = kwargs[\"idx\"]\n",
+    "    random_seed   = kwargs.get(\"random_seed\", 42)\n",
+    "    test_size     = kwargs.get(\"test_size\", 0.2)\n",
+    "    results_path  = kwargs[\"results_path\"]\n",
+    "    split_indices = kwargs[\"split_indices\"]\n",
+    "\n",
+    "    tf.config.experimental.enable_op_determinism()\n",
+    "    keras.utils.set_random_seed(random_seed)\n",
+    "    \n",
+    "    inputs  = [ak.Input(name=\"data_input\")]\n",
+    "    outputs = [ak.RegressionHead(name=\"x_out\"), ak.RegressionHead(name=\"y_out\")]\n",
+    "    \n",
+    "    keras.utils.set_random_seed(random_seed)\n",
+    "    \n",
+    "    auto_model = ak.AutoModel(\n",
+    "        inputs       = inputs,\n",
+    "        outputs      = outputs,\n",
+    "        seed         = 42,\n",
+    "        max_trials   = 10,\n",
+    "        overwrite    = True,\n",
+    "        directory    = results_path,\n",
+    "        project_name = f\"ExampleModel-{subset}-{cv_name[split_index]}\"\n",
+    "    )\n",
+    "\n",
+    "    print(f\"{subset}-{cv_name[split_index]}\")\n",
+    "    preped_features = [features[idx].to_numpy()]\n",
+    "    preped_targets  = np.hsplit(targets[idx].to_numpy(), 2)\n",
+    "\n",
+    "    n_samples = preped_features[0].shape[0]\n",
+    "    indices   = np.arange(n_samples)\n",
+    "    \n",
+    "    train_indices, val_indices = train_test_split(\n",
+    "        indices, \n",
+    "        test_size    = test_size, \n",
+    "        random_state = random_seed, \n",
+    "        shuffle      = True\n",
+    "    )\n",
+    "\n",
+    "    X_train_list = [_[train_indices] for _ in preped_features]\n",
+    "    X_val_list   = [_[val_indices] for _ in preped_features]\n",
+    "    y_train_list = [_[train_indices] for _ in preped_targets]\n",
+    "    y_val_list   = [_[val_indices] for _ in preped_targets]\n",
+    "\n",
+    "\n",
+    "\n",
+    "    auto_model.fit(\n",
+    "        X_train_list, \n",
+    "        y_train_list, \n",
+    "        validation_data = (X_val_list, y_val_list), \n",
+    "        verbose         = 2\n",
+    "    )\n",
+    "\n",
+    "    save_top_n = max(1, int(len(auto_model.tuner.oracle.trials) * 0.1))\n",
+    "\n",
+    "    cv = [PredefinedSplit(split_indices[idx][i]) for i in range(len(cv_name))]\n",
+    "    \n",
+    "    results = []\n",
+    "    for idx, (model, hyperparameters) in enumerate(get_best_models(save_top_n, auto_model)):\n",
+    "        print(f\"\\nProcessing model {idx + 1} out of {save_top_n}\")\n",
+    "\n",
+    "        optimizer = model.optimizer\n",
+    "        del model\n",
+    "\n",
+    "        scores        = {name: [] for name in metrics.keys()}\n",
+    "        train_times   = []\n",
+    "        predict_times = []\n",
+    "\n",
+    "        for split_idx, (train_indices, test_indices) in enumerate(\n",
+    "            cv[split_index].split(preped_features[0], preped_targets[0])\n",
+    "        ):\n",
+    "\n",
+    "            print(f\"\\tProcessing split {split_idx + 1} out of {cv[split_index].get_n_splits()}\")\n",
+    "            \n",
+    "            keras.utils.set_random_seed(random_seed)\n",
+    "            model         = auto_model.tuner.hypermodel.build(hyperparameters)\n",
+    "            new_optimizer = type(optimizer).from_config(optimizer.get_config())\n",
+    "            model.compile(optimizer = new_optimizer, loss=\"mse\")\n",
+    "\n",
+    "            save_path  = os.path.join(results_path / f\"ExampleModel-{subset}-{cv_name[split_index]}\", f\"model-{idx}-{split_idx}.keras\")\n",
+    "            model.save(save_path)\n",
+    "            model_size = os.path.getsize(save_path)\n",
+    "\n",
+    "            X_train = [x[train_indices] for x in preped_features]\n",
+    "            X_test  = [x[test_indices]  for x in preped_features]\n",
+    "            y_train = [y[train_indices] for y in preped_targets]\n",
+    "            y_test  = [y[test_indices]  for y in preped_targets]\n",
+    "\n",
+    "            #train\n",
+    "            start_time = time.perf_counter()\n",
+    "            model.fit(\n",
+    "                X_train, y_train, \n",
+    "                validation_data = (X_test, y_test), \n",
+    "                epochs          = 1000, \n",
+    "                callbacks       = [keras.callbacks.EarlyStopping(patience=10, min_delta=1e-4)],\n",
+    "                verbose         = 2\n",
+    "            )\n",
+    "            train_times.append(time.perf_counter() - start_time)\n",
+    "\n",
+    "            # predict\n",
+    "            start_time   = time.perf_counter()\n",
+    "            y_pred       = model.predict(X_test, batch_size=32, verbose=2)\n",
+    "            y_pred       = np.squeeze(np.stack(y_pred, axis=0), axis=-1)\n",
+    "            predict_time = time.perf_counter() - start_time\n",
+    "            predict_times.append(predict_time)\n",
+    "\n",
+    "            y_test = np.squeeze(np.stack(y_test, axis=0), axis=-1)\n",
+    "            for name, func in metrics.items():\n",
+    "                scores[name].append(func(y_test, y_pred))\n",
+    "\n",
+    "            del model\n",
+    "            keras.backend.clear_session()\n",
+    "            gc.collect()\n",
+    "            \n",
+    "        results.append({\n",
+    "            \"scores\": {\n",
+    "                name: {\n",
+    "                    \"mean\": np.mean(arr), \n",
+    "                    \"std\": np.std(arr)\n",
+    "                } \n",
+    "                for name, arr in scores.items()\n",
+    "            },\n",
+    "            \"params\"   : hyperparameters.values,\n",
+    "            \"fit_time\" : {\n",
+    "                \"mean\": np.mean(train_times), \n",
+    "                \"std\" : np.std(train_times)\n",
+    "            },\n",
+    "            \"score_time\": {\n",
+    "                \"mean\": np.mean(predict_times), \n",
+    "                \"std\" : np.std(predict_times)\n",
+    "            }\n",
+    "        })\n",
+    "    print(results, flush = True)\n",
+    "    joblib.dump(results, results_path / f\"Results_ExampleModel-{cv_name[split_index]}Split-{subset}Subset.pkl\")\n",
+    "\n",
+    "    shutil.rmtree(results_path / f\"ExampleModel-{subset}-{cv_name[split_index]}\")\n",
+    "\n",
+    "for idx, subset in enumerate(subsets):\n",
+    "    for split_index in range(len(cv)):\n",
+    "        # We use multiprocess (the multiprocessing library doesn't work) to isolate each training run.\n",
+    "        # This resolves an issue where model training is dependant on the order in which the models are trained,\n",
+    "        # this is due to a state persisting between runs (despite all the efforts to reset the environment).\n",
+    "        \n",
+    "        p   = Process(target=run_candidate, kwargs={\n",
+    "            \"features\"      : features,\n",
+    "            \"targets\"       : targets,\n",
+    "            \"metrics\"       : metrics,\n",
+    "            \"subset\"        : subset,\n",
+    "            \"cv_name\"       : cv_name,\n",
+    "            \"split_index\"   : split_index,\n",
+    "            \"idx\"           : idx,\n",
+    "            \"random_seed\"   : random_seed,\n",
+    "            \"test_size\"     : test_size,\n",
+    "            \"results_path\"  : results_path,\n",
+    "            \"split_indices\" : split_indices\n",
+    "        })\n",
+    "        p.start()\n",
+    "        p.join()\n",
+    "    \n",
+    "stop_resource_monitor(results_path / f\"automl_usage.pkl\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (nancy)",
+   "language": "python",
+   "name": "nancy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/LOCALIZE_Configs/00-Initial/dvc.yaml b/LOCALIZE_Configs/00-Initial/dvc.yaml
new file mode 100644
index 0000000..092e9fc
--- /dev/null
+++ b/LOCALIZE_Configs/00-Initial/dvc.yaml
@@ -0,0 +1,77 @@
+# 00-Initial
+vars:
+  - path:
+      # Path are relative to location of dvc.yaml file
+      scripts: ../../src/umu
+      common: ../../src
+      data: ../../artifacts/00-Initial/data
+      models: ../../artifacts/00-Initial/models
+      reports: ../../artifacts/00-Initial/reports
+
+
+stages:
+  prepare:
+    desc: Prepare UMU dataset for the MLOps pipeline
+    cmd: |
+      unzip ${path.data}/raw/umu.zip -d ${path.data}/raw/
+      python ${path.scripts}/prepare.py --method average --input ${path.data}/raw/umu/tcp_nokia_20240325.xlsx --output ${path.data}/interim/umu.pkl
+      rm ${path.data}/raw/umu/*.xlsx
+      rm ${path.data}/raw/__MACOSX/umu/.*.xlsx
+    deps:
+      - ${path.scripts}/prepare.py
+      - ${path.data}/raw/umu.zip
+    outs:
+      - ${path.data}/interim/umu.pkl
+
+  featurize:
+    desc: Enrich dataset with additional features
+    cmd: >
+      python ${path.scripts}/featurize.py
+      --input ${path.data}/interim/umu.pkl
+      --output ${path.data}/prepared/umu.pkl
+    deps:
+      - ${path.scripts}/featurize.py
+      - ${path.data}/interim/umu.pkl
+    outs:
+      - ${path.data}/prepared/umu.pkl
+
+
+  split:
+    desc: Split dataset and store indices
+    matrix:
+      split: ${split.types}
+    cmd: >
+      python ${path.scripts}/split.py
+      --input ${path.data}/prepared/umu.pkl
+      --split ${item.split}
+      --output-indices ${path.data}/splits/${item.split}Split.pkl
+    params:
+      - split
+    deps:
+      - ${path.scripts}/split.py
+      - ${path.data}/prepared/umu.pkl
+    outs:
+      - ${path.data}/splits/${item.split}Split.pkl
+
+
+  gridsearch:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      split: ${split.types}
+      model: ${gridsearch}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer gridsearch
+      --data ${path.data}/prepared/umu.pkl
+      --split-indices ${path.data}/splits/${item.split}Split.pkl
+      --output-results ${path.models}/gridsearch-${item.split}-${item.model}/${item.model}-${item.split}Split-results.pkl
+    params:
+      - gridsearch.${item.model}
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/umu.pkl
+      - ${path.data}/splits/${item.split}Split.pkl
+    outs:
+      - ${path.models}/gridsearch-${item.split}-${item.model}/${item.model}-${item.split}Split-results.pkl
+
diff --git a/LOCALIZE_Configs/00-Initial/params.yaml b/LOCALIZE_Configs/00-Initial/params.yaml
new file mode 100644
index 0000000..37685ca
--- /dev/null
+++ b/LOCALIZE_Configs/00-Initial/params.yaml
@@ -0,0 +1,18 @@
+# 00-Initial
+evaluation:
+  metrics: {'rmse': False}
+  save_top_models: 0.1
+  score_with: 'rmse'
+
+split:
+  types: [KFold]
+  seed: 42
+  n_splits: 5
+  test_size: 0.20
+
+gridsearch:
+  LinearRegression:
+    module: sklearn.linear_model
+    class: LinearRegression
+    hyperparameters:
+      fit_intercept: [true, false]
diff --git a/LOCALIZE_Configs/01-Changed_and_added_model/dvc.yaml b/LOCALIZE_Configs/01-Changed_and_added_model/dvc.yaml
new file mode 100644
index 0000000..4f03be4
--- /dev/null
+++ b/LOCALIZE_Configs/01-Changed_and_added_model/dvc.yaml
@@ -0,0 +1,77 @@
+# 01-Initial
+vars:
+  - path:
+      # Path are relative to location of dvc.yaml file
+      scripts: ../../src/umu
+      common: ../../src
+      data: ../../artifacts/01-Changed_and_added_model/data
+      models: ../../artifacts/01-Changed_and_added_model/models
+      reports: ../../artifacts/01-Changed_and_added_model/reports
+
+
+stages:
+  prepare:
+    desc: Prepare UMU dataset for the MLOps pipeline
+    cmd: |
+      unzip ${path.data}/raw/umu.zip -d ${path.data}/raw/
+      python ${path.scripts}/prepare.py --method average --input ${path.data}/raw/umu/tcp_nokia_20240325.xlsx --output ${path.data}/interim/umu.pkl
+      rm ${path.data}/raw/umu/*.xlsx
+      rm ${path.data}/raw/__MACOSX/umu/.*.xlsx
+    deps:
+      - ${path.scripts}/prepare.py
+      - ${path.data}/raw/umu.zip
+    outs:
+      - ${path.data}/interim/umu.pkl
+
+  featurize:
+    desc: Enrich dataset with additional features
+    cmd: >
+      python ${path.scripts}/featurize.py
+      --input ${path.data}/interim/umu.pkl
+      --output ${path.data}/prepared/umu.pkl
+    deps:
+      - ${path.scripts}/featurize.py
+      - ${path.data}/interim/umu.pkl
+    outs:
+      - ${path.data}/prepared/umu.pkl
+
+
+  split:
+    desc: Split dataset and store indices
+    matrix:
+      split: ${split.types}
+    cmd: >
+      python ${path.scripts}/split.py
+      --input ${path.data}/prepared/umu.pkl
+      --split ${item.split}
+      --output-indices ${path.data}/splits/${item.split}Split.pkl
+    params:
+      - split
+    deps:
+      - ${path.scripts}/split.py
+      - ${path.data}/prepared/umu.pkl
+    outs:
+      - ${path.data}/splits/${item.split}Split.pkl
+
+
+  gridsearch:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      split: ${split.types}
+      model: ${gridsearch}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer gridsearch
+      --data ${path.data}/prepared/umu.pkl
+      --split-indices ${path.data}/splits/${item.split}Split.pkl
+      --output-results ${path.models}/gridsearch-${item.split}-${item.model}/${item.model}-${item.split}Split-results.pkl
+    params:
+      - gridsearch.${item.model}
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/umu.pkl
+      - ${path.data}/splits/${item.split}Split.pkl
+    outs:
+      - ${path.models}/gridsearch-${item.split}-${item.model}/${item.model}-${item.split}Split-results.pkl
+
diff --git a/LOCALIZE_Configs/01-Changed_and_added_model/params.yaml b/LOCALIZE_Configs/01-Changed_and_added_model/params.yaml
new file mode 100644
index 0000000..01f1bba
--- /dev/null
+++ b/LOCALIZE_Configs/01-Changed_and_added_model/params.yaml
@@ -0,0 +1,31 @@
+# 01-Initial
+evaluation:
+  metrics: {'rmse': False}
+  save_top_models: 0.1
+  score_with: 'rmse'
+
+split:
+  types: [KFold]
+  seed: 42
+  n_splits: 5
+  test_size: 0.20
+
+gridsearch:
+  RandomForestRegressor:
+    module: sklearn.ensemble
+    class: RandomForestRegressor
+    parameters:
+      random_state: 42
+    hyperparameters:
+      n_estimators: [10, 50, 100, 250, 400]
+      max_depth: [5, 10, 30, 50, 150, 200, null]
+
+  KNeighborsRegressor:
+    module: sklearn.neighbors
+    class: KNeighborsRegressor
+    hyperparameters:
+      n_neighbors: [3, 5, 10]
+      weights: [uniform, distance]
+      p: [1, 2]
+      leaf_size: [10, 15, 30]
+      metric: [minkowski, euclidean]
diff --git a/LOCALIZE_Configs/02-Changed_dataset_to_logatec/dvc.yaml b/LOCALIZE_Configs/02-Changed_dataset_to_logatec/dvc.yaml
new file mode 100644
index 0000000..dbd64e6
--- /dev/null
+++ b/LOCALIZE_Configs/02-Changed_dataset_to_logatec/dvc.yaml
@@ -0,0 +1,79 @@
+# 02-Initial
+vars:
+  - path:
+      # Path are relative to location of dvc.yaml file
+      scripts: ../../src/logatec
+      common: ../../src
+      data: ../../artifacts/02-Changed_dataset_to_logatec/data
+      models: ../../artifacts/02-Changed_dataset_to_logatec/models
+      reports: ../../artifacts/02-Changed_dataset_to_logatec/reports
+
+  - subsets: [winter, spring]
+
+stages:
+  prepare:
+    desc: Download, unzip, and convert dataset to pickle format
+    matrix:
+      subset: ${subsets}
+    cmd: |
+      unzip -j ${path.data}/raw/logatec.zip ${item.subset}_data.json -d ${path.data}/raw/
+      python ${path.scripts}/prepare.py --method average --input ${path.data}/raw/${item.subset}_data.json --output ${path.data}/interim/${item.subset}.pkl
+      rm ${path.data}/raw/${item.subset}_data.json
+    deps:
+      - ${path.scripts}/prepare.py
+      - ${path.data}/raw/logatec.zip
+    outs:
+      - ${path.data}/interim/${item.subset}.pkl
+
+  featurize:
+    desc: Enrich dataset with additional features
+    matrix:
+      subset: ${subsets}
+    cmd: >
+      python ${path.scripts}/featurize.py
+      --input ${path.data}/interim/${item.subset}.pkl
+      --output ${path.data}/prepared/${item.subset}.pkl
+    deps:
+      - ${path.scripts}/featurize.py
+      - ${path.data}/interim/${item.subset}.pkl
+    outs:
+      - ${path.data}/prepared/${item.subset}.pkl
+
+
+  split:
+    desc: Prepare train-test split indices for model training and evaluation
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+    cmd: >
+      python ${path.scripts}/split.py
+      --input ${path.data}/prepared/${item.subset}.pkl
+      --split ${item.split}
+      --output-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    deps:
+      - ${path.scripts}/split.py
+      - ${path.data}/prepared/${item.subset}.pkl
+    outs:
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+
+
+  gridsearch:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+      model: ${gridsearch}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer gridsearch
+      --data ${path.data}/prepared/${item.subset}.pkl
+      --split-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+      --output-results ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/${item.subset}.pkl
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    outs:
+      - ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+
diff --git a/LOCALIZE_Configs/02-Changed_dataset_to_logatec/params.yaml b/LOCALIZE_Configs/02-Changed_dataset_to_logatec/params.yaml
new file mode 100644
index 0000000..42c2c97
--- /dev/null
+++ b/LOCALIZE_Configs/02-Changed_dataset_to_logatec/params.yaml
@@ -0,0 +1,31 @@
+# 02-Initial
+evaluation:
+  metrics: {'rmse': False}
+  save_top_models: 0.1
+  score_with: 'rmse'
+
+split:
+  types: [KFold]
+  seed: 42
+  n_splits: 5
+  test_size: 0.20
+
+gridsearch:
+  RandomForestRegressor:
+    module: sklearn.ensemble
+    class: RandomForestRegressor
+    parameters:
+      random_state: 42
+    hyperparameters:
+      n_estimators: [10, 50, 100, 250, 400]
+      max_depth: [5, 10, 30, 50, 150, 200, null]
+
+  KNeighborsRegressor:
+    module: sklearn.neighbors
+    class: KNeighborsRegressor
+    hyperparameters:
+      n_neighbors: [3, 5, 10]
+      weights: [uniform, distance]
+      p: [1, 2]
+      leaf_size: [10, 15, 30]
+      metric: [minkowski, euclidean]
diff --git a/LOCALIZE_Configs/03-Added_split_and_metric/dvc.yaml b/LOCALIZE_Configs/03-Added_split_and_metric/dvc.yaml
new file mode 100644
index 0000000..15db467
--- /dev/null
+++ b/LOCALIZE_Configs/03-Added_split_and_metric/dvc.yaml
@@ -0,0 +1,79 @@
+# 03-Initial
+vars:
+  - path:
+      # Path are relative to location of dvc.yaml file
+      scripts: ../../src/logatec
+      common: ../../src
+      data: ../../artifacts/03-Added_split_and_metric/data
+      models: ../../artifacts/03-Added_split_and_metric/models
+      reports: ../../artifacts/03-Added_split_and_metric/reports
+
+  - subsets: [winter, spring]
+
+stages:
+  prepare:
+    desc: Download, unzip, and convert dataset to pickle format
+    matrix:
+      subset: ${subsets}
+    cmd: |
+      unzip -j ${path.data}/raw/logatec.zip ${item.subset}_data.json -d ${path.data}/raw/
+      python ${path.scripts}/prepare.py --method average --input ${path.data}/raw/${item.subset}_data.json --output ${path.data}/interim/${item.subset}.pkl
+      rm ${path.data}/raw/${item.subset}_data.json
+    deps:
+      - ${path.scripts}/prepare.py
+      - ${path.data}/raw/logatec.zip
+    outs:
+      - ${path.data}/interim/${item.subset}.pkl
+
+  featurize:
+    desc: Enrich dataset with additional features
+    matrix:
+      subset: ${subsets}
+    cmd: >
+      python ${path.scripts}/featurize.py
+      --input ${path.data}/interim/${item.subset}.pkl
+      --output ${path.data}/prepared/${item.subset}.pkl
+    deps:
+      - ${path.scripts}/featurize.py
+      - ${path.data}/interim/${item.subset}.pkl
+    outs:
+      - ${path.data}/prepared/${item.subset}.pkl
+
+
+  split:
+    desc: Prepare train-test split indices for model training and evaluation
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+    cmd: >
+      python ${path.scripts}/split.py
+      --input ${path.data}/prepared/${item.subset}.pkl
+      --split ${item.split}
+      --output-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    deps:
+      - ${path.scripts}/split.py
+      - ${path.data}/prepared/${item.subset}.pkl
+    outs:
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+
+
+  gridsearch:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+      model: ${gridsearch}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer gridsearch
+      --data ${path.data}/prepared/${item.subset}.pkl
+      --split-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+      --output-results ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/${item.subset}.pkl
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    outs:
+      - ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+
diff --git a/LOCALIZE_Configs/03-Added_split_and_metric/params.yaml b/LOCALIZE_Configs/03-Added_split_and_metric/params.yaml
new file mode 100644
index 0000000..1848da6
--- /dev/null
+++ b/LOCALIZE_Configs/03-Added_split_and_metric/params.yaml
@@ -0,0 +1,31 @@
+# 03-Initial
+evaluation:
+  metrics: {'rmse': False, 'r_squared': True}
+  save_top_models: 0.1
+  score_with: 'rmse'
+
+split:
+  types: [KFold, Random]
+  seed: 42
+  n_splits: 5
+  test_size: 0.20
+
+gridsearch:
+  RandomForestRegressor:
+    module: sklearn.ensemble
+    class: RandomForestRegressor
+    parameters:
+      random_state: 42
+    hyperparameters:
+      n_estimators: [10, 50, 100, 250, 400]
+      max_depth: [5, 10, 30, 50, 150, 200, null]
+
+  KNeighborsRegressor:
+    module: sklearn.neighbors
+    class: KNeighborsRegressor
+    hyperparameters:
+      n_neighbors: [3, 5, 10]
+      weights: [uniform, distance]
+      p: [1, 2]
+      leaf_size: [10, 15, 30]
+      metric: [minkowski, euclidean]
diff --git a/LOCALIZE_Configs/04-Added_automl_model/dvc.yaml b/LOCALIZE_Configs/04-Added_automl_model/dvc.yaml
new file mode 100644
index 0000000..1a881af
--- /dev/null
+++ b/LOCALIZE_Configs/04-Added_automl_model/dvc.yaml
@@ -0,0 +1,101 @@
+# 04-Added_automl_model
+vars:
+  - path:
+      # Path are relative to location of dvc.yaml file
+      scripts: ../../src/logatec
+      common: ../../src
+      data: ../../artifacts/04-Added_automl_model/data
+      models: ../../artifacts/04-Added_automl_model/models
+      reports: ../../artifacts/04-Added_automl_model/reports
+
+  - subsets: [winter, spring]
+
+stages:
+  prepare:
+    desc: Download, unzip, and convert dataset to pickle format
+    matrix:
+      subset: ${subsets}
+    cmd: |
+      unzip -j ${path.data}/raw/logatec.zip ${item.subset}_data.json -d ${path.data}/raw/
+      python ${path.scripts}/prepare.py --method average --input ${path.data}/raw/${item.subset}_data.json --output ${path.data}/interim/${item.subset}.pkl
+      rm ${path.data}/raw/${item.subset}_data.json
+    deps:
+      - ${path.scripts}/prepare.py
+      - ${path.data}/raw/logatec.zip
+    outs:
+      - ${path.data}/interim/${item.subset}.pkl
+
+  featurize:
+    desc: Enrich dataset with additional features
+    matrix:
+      subset: ${subsets}
+    cmd: >
+      python ${path.scripts}/featurize.py
+      --input ${path.data}/interim/${item.subset}.pkl
+      --output ${path.data}/prepared/${item.subset}.pkl
+    deps:
+      - ${path.scripts}/featurize.py
+      - ${path.data}/interim/${item.subset}.pkl
+    outs:
+      - ${path.data}/prepared/${item.subset}.pkl
+
+
+  split:
+    desc: Prepare train-test split indices for model training and evaluation
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+    cmd: >
+      python ${path.scripts}/split.py
+      --input ${path.data}/prepared/${item.subset}.pkl
+      --split ${item.split}
+      --output-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    deps:
+      - ${path.scripts}/split.py
+      - ${path.data}/prepared/${item.subset}.pkl
+    outs:
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+
+
+  gridsearch:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+      model: ${gridsearch}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer gridsearch
+      --data ${path.data}/prepared/${item.subset}.pkl
+      --split-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+      --output-results ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/${item.subset}.pkl
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    outs:
+      - ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+
+  automl:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+      model: ${automl}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer automl
+      --data ${path.data}/prepared/${item.subset}.pkl
+      --split-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+      --output-results ${path.models}/automl-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+    params:
+      - automl.${item.model}
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/${item.subset}.pkl
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    outs:
+      - ${path.models}/automl-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+
diff --git a/LOCALIZE_Configs/04-Added_automl_model/params.yaml b/LOCALIZE_Configs/04-Added_automl_model/params.yaml
new file mode 100644
index 0000000..ec84d3d
--- /dev/null
+++ b/LOCALIZE_Configs/04-Added_automl_model/params.yaml
@@ -0,0 +1,51 @@
+# 04-Added_automl_model
+evaluation:
+  metrics: {'rmse': False, 'r_squared': True}
+  save_top_models: 0.1
+  score_with: 'rmse'
+
+split:
+  types: [KFold, Random]
+  seed: 42
+  n_splits: 5
+  test_size: 0.20
+
+gridsearch:
+  RandomForestRegressor:
+    module: sklearn.ensemble
+    class: RandomForestRegressor
+    parameters:
+      random_state: 42
+      n_jobs: -1
+    hyperparameters:
+      n_estimators: [10, 50, 100, 250, 400]
+      max_depth: [5, 10, 30, 50, 150, 200, null]
+
+  KNeighborsRegressor:
+    module: sklearn.neighbors
+    class: KNeighborsRegressor
+    parameters:
+      n_jobs: -1
+    hyperparameters:
+      n_neighbors: [3, 5, 10]
+      weights: [uniform, distance]
+      p: [1, 2]
+      leaf_size: [10, 15, 30]
+      metric: [minkowski, euclidean]
+
+automl:
+  ExampleModel:
+    inputs:
+      - name: data_input
+
+    outputs:
+      - name: x_out
+      - name: y_out
+
+    settings:
+      seed: 42
+      max_trials: 10
+      overwrite: True
+
+    fit_settings:
+      verbose: 2
\ No newline at end of file
diff --git a/LOCALIZE_Configs/Benchmarking/README.txt b/LOCALIZE_Configs/Benchmarking/README.txt
new file mode 100644
index 0000000..c70af5e
--- /dev/null
+++ b/LOCALIZE_Configs/Benchmarking/README.txt
@@ -0,0 +1 @@
+For the benchmarking the code for each stage was modified so that at it called ´start_resource_monitor´ at the start and ´stop_resource_monitor´ at the end.
\ No newline at end of file
diff --git a/LOCALIZE_Configs/Benchmarking/dvc.yaml b/LOCALIZE_Configs/Benchmarking/dvc.yaml
new file mode 100644
index 0000000..8eecd97
--- /dev/null
+++ b/LOCALIZE_Configs/Benchmarking/dvc.yaml
@@ -0,0 +1,101 @@
+# Benchmarking
+vars:
+  - path:
+      # Path are relative to location of dvc.yaml file
+      scripts: ../../src/logatec
+      common: ../../src
+      data: ../../artifacts/Benchmarking/data
+      models: ../../artifacts/Benchmarking/models
+      reports: ../../artifacts/Benchmarking/reports
+
+  - subsets: [winter, spring]
+
+stages:
+  prepare:
+    desc: Download, unzip, and convert dataset to pickle format
+    matrix:
+      subset: ${subsets}
+    cmd: |
+      unzip -j ${path.data}/raw/logatec.zip ${item.subset}_data.json -d ${path.data}/raw/
+      python ${path.scripts}/prepare.py --method average --input ${path.data}/raw/${item.subset}_data.json --output ${path.data}/interim/${item.subset}.pkl
+      rm ${path.data}/raw/${item.subset}_data.json
+    deps:
+      - ${path.scripts}/prepare.py
+      - ${path.data}/raw/logatec.zip
+    outs:
+      - ${path.data}/interim/${item.subset}.pkl
+
+  featurize:
+    desc: Enrich dataset with additional features
+    matrix:
+      subset: ${subsets}
+    cmd: >
+      python ${path.scripts}/featurize.py
+      --input ${path.data}/interim/${item.subset}.pkl
+      --output ${path.data}/prepared/${item.subset}.pkl
+    deps:
+      - ${path.scripts}/featurize.py
+      - ${path.data}/interim/${item.subset}.pkl
+    outs:
+      - ${path.data}/prepared/${item.subset}.pkl
+
+
+  split:
+    desc: Prepare train-test split indices for model training and evaluation
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+    cmd: >
+      python ${path.scripts}/split.py
+      --input ${path.data}/prepared/${item.subset}.pkl
+      --split ${item.split}
+      --output-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    deps:
+      - ${path.scripts}/split.py
+      - ${path.data}/prepared/${item.subset}.pkl
+    outs:
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+
+
+  gridsearch:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+      model: ${gridsearch}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer gridsearch
+      --data ${path.data}/prepared/${item.subset}.pkl
+      --split-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+      --output-results ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/${item.subset}.pkl
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    outs:
+      - ${path.models}/gridsearch-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+
+  automl:
+    desc: Determine best hyper-parameters for algorithm on several CVs
+    matrix:
+      subset: ${subsets}
+      split: ${split.types}
+      model: ${automl}
+    cmd: >
+      python ${path.common}/benchmark.py
+      --use ${item.model}
+      --optimizer automl
+      --data ${path.data}/prepared/${item.subset}.pkl
+      --split-indices ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+      --output-results ${path.models}/automl-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+    params:
+      - automl.${item.model}
+    deps:
+      - ${path.common}/benchmark.py
+      - ${path.data}/prepared/${item.subset}.pkl
+      - ${path.data}/splits/${item.subset}-${item.split}Split.pkl
+    outs:
+      - ${path.models}/automl-${item.subset}-${item.split}-${item.model}/${item.subset}-${item.model}-${item.split}Split-results.pkl
+
diff --git a/LOCALIZE_Configs/Benchmarking/params.yaml b/LOCALIZE_Configs/Benchmarking/params.yaml
new file mode 100644
index 0000000..ec84d3d
--- /dev/null
+++ b/LOCALIZE_Configs/Benchmarking/params.yaml
@@ -0,0 +1,51 @@
+# 04-Added_automl_model
+evaluation:
+  metrics: {'rmse': False, 'r_squared': True}
+  save_top_models: 0.1
+  score_with: 'rmse'
+
+split:
+  types: [KFold, Random]
+  seed: 42
+  n_splits: 5
+  test_size: 0.20
+
+gridsearch:
+  RandomForestRegressor:
+    module: sklearn.ensemble
+    class: RandomForestRegressor
+    parameters:
+      random_state: 42
+      n_jobs: -1
+    hyperparameters:
+      n_estimators: [10, 50, 100, 250, 400]
+      max_depth: [5, 10, 30, 50, 150, 200, null]
+
+  KNeighborsRegressor:
+    module: sklearn.neighbors
+    class: KNeighborsRegressor
+    parameters:
+      n_jobs: -1
+    hyperparameters:
+      n_neighbors: [3, 5, 10]
+      weights: [uniform, distance]
+      p: [1, 2]
+      leaf_size: [10, 15, 30]
+      metric: [minkowski, euclidean]
+
+automl:
+  ExampleModel:
+    inputs:
+      - name: data_input
+
+    outputs:
+      - name: x_out
+      - name: y_out
+
+    settings:
+      seed: 42
+      max_trials: 10
+      overwrite: True
+
+    fit_settings:
+      verbose: 2
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a047bc9
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+Evauluation code/configs for LOCALIZE framework
diff --git a/performance.py b/performance.py
new file mode 100644
index 0000000..12ad690
--- /dev/null
+++ b/performance.py
@@ -0,0 +1,157 @@
+INITIAL_PROFILING_DURATION = 0.25
+DEFAULT_PROFILING_INTERVAL = 0.1
+NUM_CORES_TO_ALLOCATE      = 5
+
+DEBUG = False
+
+
+import psutil, os
+from pathlib import Path
+
+def get_cores_by_usage(interval: float = INITIAL_PROFILING_DURATION):
+    cpu_usage = psutil.cpu_percent(percpu=True, interval=interval)
+    return sorted(range(len(cpu_usage)), key=lambda i: cpu_usage[i])
+
+cores_by_usage = get_cores_by_usage()
+logging_core_id = cores_by_usage[0]
+task_execution_core_ids = cores_by_usage[1:1+NUM_CORES_TO_ALLOCATE]
+
+os.sched_setaffinity(0, {*task_execution_core_ids})
+
+print("Executing on core:", task_execution_core_ids)
+print("Logging on core:", logging_core_id)
+
+core = task_execution_core_ids
+proc   = psutil.Process(os.getpid())
+
+for t in proc.threads():
+    os.sched_setaffinity(t.id, {*core})
+
+for p in proc.children(recursive=True):
+    os.sched_setaffinity(p.id, {*core})
+
+
+from multiprocessing import Event, Process, Queue
+import pandas as pd
+import joblib, time
+
+_monitor_proc = None
+_stop_evt     = None
+_path_q       = None
+
+def _safe_value(func, default=0.0):
+    try:
+        return func()
+    except (psutil.NoSuchProcess, psutil.ZombieProcess):
+        return default
+
+def _sample_once(root, exclude_pids: set[int]=frozenset()):
+    # persistent cache so we always talk to the *same* Process objects
+    if not hasattr(_sample_once, "cache"):
+        _sample_once.cache = {}          # pid ➜ Process
+
+    cache = _sample_once.cache
+    procs = [root] + root.children(recursive=True)
+
+    # make sure every pid we see has a cached Process object
+    for p in procs:
+        if p.pid in exclude_pids:
+            continue
+        if p.pid not in cache:
+            cache[p.pid] = p
+            p.cpu_percent(None)          # prime – first call always 0.0
+
+    total_cpu = 0.0
+    total_mem = 0
+    c = []
+    for pid, p in list(cache.items()):
+        if not p.is_running():
+            cache.pop(pid, None)         # clean up dead workers
+            continue
+        cpu = p.cpu_percent(None)        # non-blocking, since last call
+        c.append(cpu)
+        total_cpu += cpu                 # add this process’s %
+        total_mem += p.memory_info().rss
+
+    return total_cpu / 100.0, total_mem / (1024 ** 2), c   # CPU cores, RAM MB
+
+def _resource_worker(interval: float, logging_core_id: int,  stop_evt, path_q: Queue):
+    # Use separate core for logging to not affect performance
+    os.sched_setaffinity(0, {logging_core_id})
+
+    parent_pid = os.getppid()
+    logger_pid = os.getpid()
+    proc       = psutil.Process(parent_pid)
+
+    # Prime the logger
+    _sample_once(proc, exclude_pids={logger_pid})
+    time.sleep(interval)
+
+    resource_log = []
+    last_sample  = 0
+    sleep_time   = 0
+    print_log    = ""
+    start_time   = time.perf_counter()
+
+    # Get starting datapoint
+    cpu, ram, c = _sample_once(proc, exclude_pids={logger_pid})
+    resource_log.append({"t": 0, "cpu_cores": cpu, "ram_mb": ram})
+
+    while not stop_evt.is_set():
+        # Sample
+        now      = time.perf_counter() - start_time
+        cpu, ram, c = _sample_once(proc, exclude_pids={logger_pid})
+        resource_log.append({"t": now, "cpu_cores": cpu, "ram_mb": ram})
+
+        # Compensate drift
+        elapsed     = now - last_sample
+        sleep_time  = min(max(0.05, interval - (elapsed - sleep_time)), interval)
+        print_log  += f"{now:>6.2f} s| {(interval-sleep_time)*1000.0:4.2f} ms| {cpu*100:>5.1f}% [" + "|".join([f"{process_cpu:>3.0f}%" for process_cpu in c]) +f"] - {len(c)}\n"
+        last_sample = now
+        time.sleep(sleep_time)
+
+    end_time = time.perf_counter() - start_time
+    cpu, ram, c = _sample_once(proc, exclude_pids={logger_pid})
+    resource_log.append({"t": end_time, "cpu_cores": cpu, "ram_mb": ram})
+    if DEBUG:
+        print(print_log)
+
+    df      = pd.DataFrame(resource_log)
+    outfile = Path(path_q.get())
+    joblib.dump(df, outfile)
+
+def start_resource_monitor(interval: float = DEFAULT_PROFILING_INTERVAL):
+    global _monitor_proc, _stop_evt, _path_q, logging_core_id
+    if _monitor_proc is not None and _monitor_proc.is_alive():
+        raise RuntimeError("Resource monitor already running.")
+
+    _stop_evt = Event()
+    _path_q = Queue(maxsize=1)
+    _monitor_proc = Process(
+        target = _resource_worker,
+        args   = (
+            interval,
+            logging_core_id,
+            _stop_evt,
+            _path_q
+        ),
+        daemon =True
+    )
+    _monitor_proc.start()
+
+def stop_resource_monitor(outfile: Path):
+    global _monitor_proc, _stop_evt, _path_q
+    if _monitor_proc is None or not _monitor_proc.is_alive():
+        raise RuntimeError("Resource monitor is not running.")
+
+    _path_q.put(outfile)
+    _stop_evt.set()
+    _monitor_proc.join()
+
+    _monitor_proc = None
+    _stop_evt     = None
+    _path_q       = None
+
+def get_directory(path: Path) -> Path:
+    path = path.resolve()  # Get absolute path
+    return path if path.is_dir() else path.parent
\ No newline at end of file