From a2ae8d3c3c833ad304fb14b6dd1138058236d0cf Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 11:41:38 +0000
Subject: [PATCH 01/17] chore: add hydra to project dependencies

---
 pyproject.toml | 1 +
 uv.lock        | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index e431f9d..43e02d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ dependencies = [
     "typer>=0.15.2",
     "instanovo>=1.1.4",
     "huggingface-hub>=0.35.3",
+    "hydra-core>=1.3.2",
 ]
 
 [project.urls]
diff --git a/uv.lock b/uv.lock
index cf896ba..3596ffb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -5257,6 +5257,7 @@ version = "1.0.3"
 source = { editable = "." }
 dependencies = [
     { name = "huggingface-hub" },
+    { name = "hydra-core" },
     { name = "instanovo" },
     { name = "jax", version = "0.6.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "jax", version = "0.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -5288,6 +5289,7 @@ notebook = [
 [package.metadata]
 requires-dist = [
     { name = "huggingface-hub", specifier = ">=0.35.3" },
+    { name = "hydra-core", specifier = ">=1.3.2" },
     { name = "instanovo", specifier = ">=1.1.4" },
     { name = "jax", specifier = ">=0.5.2" },
     { name = "koinapy", specifier = ">=0.0.10" },

From d51a2644a362f320a6fe72fbcd5ed226841ff9c6 Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 11:45:45 +0000
Subject: [PATCH 02/17] feat: use hydra to configure winnow runs

---
 config/calibrator.yaml                     |  48 ++
 config/data_loader/instanovo.yaml          |  23 +
 config/data_loader/mztab.yaml              |  20 +
 config/data_loader/pointnovo.yaml          |   5 +
 config/data_loader/winnow.yaml             |   7 +
 config/fdr_method/database_grounded.yaml   |   8 +
 config/fdr_method/nonparametric.yaml       |   3 +
 config/predict.yaml                        |  38 ++
 config/residues.yaml                       |  64 +++
 config/train.yaml                          |  21 +
 winnow/calibration/calibration_features.py |  74 ++-
 winnow/calibration/calibrator.py           |  57 ++-
 winnow/constants.py                        | 103 ----
 winnow/datasets/calibration_dataset.py     |  14 +-
 winnow/datasets/data_loaders.py            | 438 +++++++++-------
 winnow/datasets/interfaces.py              |  17 +-
 winnow/fdr/database_grounded.py            |  40 +-
 winnow/scripts/main.py                     | 553 ++++++++-------------
 18 files changed, 849 insertions(+), 684 deletions(-)
 create mode 100644 config/calibrator.yaml
 create mode 100644 config/data_loader/instanovo.yaml
 create mode 100644 config/data_loader/mztab.yaml
 create mode 100644 config/data_loader/pointnovo.yaml
 create mode 100644 config/data_loader/winnow.yaml
 create mode 100644 config/fdr_method/database_grounded.yaml
 create mode 100644 config/fdr_method/nonparametric.yaml
 create mode 100644 config/predict.yaml
 create mode 100644 config/residues.yaml
 create mode 100644 config/train.yaml
 delete mode 100644 winnow/constants.py

diff --git a/config/calibrator.yaml b/config/calibrator.yaml
new file mode 100644
index 0000000..0816814
--- /dev/null
+++ b/config/calibrator.yaml
@@ -0,0 +1,48 @@
+# --- Calibrator configuration ---
+
+calibrator:
+  _target_: winnow.calibration.calibrator.ProbabilityCalibrator
+
+  seed: 42
+  hidden_layer_sizes: [50, 50]  # The number of neurons in each hidden layer of the MLP classifier.
+  learning_rate_init: 0.001  # The initial learning rate for the MLP classifier.
+  alpha: 0.0001  # L2 regularisation parameter for the MLP classifier.
+  max_iter: 1000  # Maximum number of training iterations for the MLP classifier.
+  early_stopping: true  # Whether to use early stopping to terminate training.
+  validation_fraction: 0.1  # Proportion of training data to use for early stopping validation.
+
+  features:
+    mass_error:
+      _target_: winnow.calibration.calibration_features.MassErrorFeature
+      residue_masses: ${residue_masses}  # The residue masses to use for the mass error feature.
+
+    prosit_features:
+      _target_: winnow.calibration.calibration_features.PrositFeatures
+      mz_tolerance: 0.02
+      learn_from_missing: true  # Whether to learn from missing Prosit features. If False, errors will be raised when invalid spectra are encountered.
+      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit features.
+      prosit_intensity_model_name: Prosit_2020_intensity_HCD  # The name of the Prosit intensity model to use.
+
+    retention_time_feature:
+      _target_: winnow.calibration.calibration_features.RetentionTimeFeature
+      hidden_dim: 10  # The hidden dimension size for the MLP regressor used to predict iRT from observed retention times.
+      train_fraction: 0.1  # The fraction of the data to use for training the iRT predictor.
+      learn_from_missing: true  # Whether to learn from missing retention time features. If False, errors will be raised when invalid spectra are encountered.
+      seed: 42  # Random seed for the MLP regressor.
+      learning_rate_init: 0.001  # The initial learning rate for the MLP regressor.
+      alpha: 0.0001  # L2 regularisation parameter for the MLP regressor.
+      max_iter: 200  # Maximum number of training iterations for the MLP regressor.
+      early_stopping: false  # Whether to use early stopping for the MLP regressor.
+      validation_fraction: 0.1  # Proportion of training data to use for early stopping validation.
+      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit iRT features.
+      prosit_irt_model_name: Prosit_2019_irt  # The name of the Prosit iRT model to use.
+
+    chimeric_features:
+      _target_: winnow.calibration.calibration_features.ChimericFeatures
+      mz_tolerance: 0.02
+      learn_from_missing: true  # Whether to learn from missing chimeric features. If False, errors will be raised when invalid spectra are encountered.
+      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit chimeric intensity features.
+      prosit_intensity_model_name: Prosit_2020_intensity_HCD  # The name of the Prosit intensity model to use.
+
+    beam_features:
+      _target_: winnow.calibration.calibration_features.BeamFeatures
diff --git a/config/data_loader/instanovo.yaml b/config/data_loader/instanovo.yaml
new file mode 100644
index 0000000..caf142e
--- /dev/null
+++ b/config/data_loader/instanovo.yaml
@@ -0,0 +1,23 @@
+# --- InstaNovo data loading configuration ---
+
+_target_: winnow.datasets.data_loaders.InstaNovoDatasetLoader
+
+residue_masses: ${residue_masses}
+residue_remapping:  # Used to map InstaNovo legacy notations to UNIMOD tokens.
+  "M(ox)": "M[UNIMOD:35]"  # Oxidation
+  "M(+15.99)": "M[UNIMOD:35]"  # Oxidation
+  "S(p)": "S[UNIMOD:21]"  # Phosphorylation
+  "T(p)": "T[UNIMOD:21]"  # Phosphorylation
+  "Y(p)": "Y[UNIMOD:21]"  # Phosphorylation
+  "S(+79.97)": "S[UNIMOD:21]"  # Phosphorylation
+  "T(+79.97)": "T[UNIMOD:21]"  # Phosphorylation
+  "Y(+79.97)": "Y[UNIMOD:21]"  # Phosphorylation
+  "Q(+0.98)": "Q[UNIMOD:7]"  # Deamidation
+  "N(+0.98)": "N[UNIMOD:7]"  # Deamidation
+  "Q(+.98)": "Q[UNIMOD:7]"  # Deamidation
+  "N(+.98)": "N[UNIMOD:7]"  # Deamidation
+  "C(+57.02)": "C[UNIMOD:4]"  # Carbamidomethylation
+  # N-terminal modifications.
+  "(+42.01)": "[UNIMOD:1]"  # Acetylation
+  "(+43.01)": "[UNIMOD:5]"  # Carbamylation
+  "(-17.03)": "[UNIMOD:385]"  # Ammonia loss
diff --git a/config/data_loader/mztab.yaml b/config/data_loader/mztab.yaml
new file mode 100644
index 0000000..dc184ea
--- /dev/null
+++ b/config/data_loader/mztab.yaml
@@ -0,0 +1,20 @@
+# --- MZTab data loading configuration ---
+_target_: winnow.datasets.data_loaders.MZTabDatasetLoader
+
+residue_masses: ${residue_masses}
+residue_remapping:  # Used to map Casanovo-specific notations to UNIMOD tokens.
+  "M+15.995": "M[UNIMOD:35]"  # Oxidation
+  "Q+0.984": "Q[UNIMOD:7]"  # Deamidation
+  "N+0.984": "N[UNIMOD:7]"  # Deamidation
+  "+42.011": "[UNIMOD:1]"  # Acetylation
+  "+43.006": "[UNIMOD:5]"  # Carbamylation
+  "-17.027": "[UNIMOD:385]"  # Ammonia loss
+  "C+57.021": "C[UNIMOD:4]"  # Carbamidomethylation
+  "C[Carbamidomethyl]": "C[UNIMOD:4]"  # Carbamidomethylation
+  "M[Oxidation]": "M[UNIMOD:35]"  # Oxidation
+  "N[Deamidated]": "N[UNIMOD:7]"  # Deamidation
+  "Q[Deamidated]": "Q[UNIMOD:7]"  # Deamidation
+  # N-terminal modifications.
+  "[Acetyl]-": "[UNIMOD:1]"  # Acetylation
+  "[Carbamyl]-": "[UNIMOD:5]"  # Carbamylation
+  "[Ammonia-loss]-": "[UNIMOD:385]"  # Ammonia loss
diff --git a/config/data_loader/pointnovo.yaml b/config/data_loader/pointnovo.yaml
new file mode 100644
index 0000000..022691a
--- /dev/null
+++ b/config/data_loader/pointnovo.yaml
@@ -0,0 +1,5 @@
+# --- PointNovo data loading configuration ---
+
+_target_: winnow.datasets.data_loaders.PointNovoDatasetLoader
+
+residue_masses: ${residue_masses}
diff --git a/config/data_loader/winnow.yaml b/config/data_loader/winnow.yaml
new file mode 100644
index 0000000..dbfb632
--- /dev/null
+++ b/config/data_loader/winnow.yaml
@@ -0,0 +1,7 @@
+# --- Winnow data loading configuration ---
+
+_target_: winnow.datasets.data_loaders.WinnowDatasetLoader
+
+residue_masses: ${residue_masses}
+# The internal Winnow dataset loader does not need a residue remapping
+# since it uses the UNIMOD tokens directly.
diff --git a/config/fdr_method/database_grounded.yaml b/config/fdr_method/database_grounded.yaml
new file mode 100644
index 0000000..41a6cc3
--- /dev/null
+++ b/config/fdr_method/database_grounded.yaml
@@ -0,0 +1,8 @@
+# --- Database-grounded FDR control configuration ---
+
+_target_: winnow.fdr.database_grounded.DatabaseGroundedFDRControl
+
+confidence_feature: ${fdr_control.confidence_column}  # Name of the column with confidence scores to use for FDR estimation.
+residue_masses: ${residue_masses}  # The residue masses from global `residues` config
+isotope_error_range: [0, 1]  # The isotope error range for matching peptides
+drop: 10  # The number of top predictions to drop for stability
diff --git a/config/fdr_method/nonparametric.yaml b/config/fdr_method/nonparametric.yaml
new file mode 100644
index 0000000..2d8c5a3
--- /dev/null
+++ b/config/fdr_method/nonparametric.yaml
@@ -0,0 +1,3 @@
+# --- Non-parametric FDR control configuration ---
+
+_target_: winnow.fdr.nonparametric.NonParametricFDRControl
diff --git a/config/predict.yaml b/config/predict.yaml
new file mode 100644
index 0000000..743c4d2
--- /dev/null
+++ b/config/predict.yaml
@@ -0,0 +1,38 @@
+# --- Predicting scores and applying FDR control ---
+defaults:
+  - _self_
+  - residues
+  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
+  - fdr_method: nonparametric  # Options: nonparametric, database_grounded
+
+# --- Pipeline Execution Configuration ---
+
+dataset:
+  # Dataset paths:
+  # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
+  spectrum_path_or_directory: data/spectra.ipc
+  # Path to the beam predictions file.
+  # Leave as `null` if data source is `winnow`, or loading will fail.
+  predictions_path: data/predictions.csv
+  # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
+
+calibrator:
+  # Model loading:
+  # Path to the local calibrator directory or the HuggingFace model identifier.
+  # If the path is a local directory path, it will be used directly. If it is a HuggingFace repository identifier, it will be downloaded from HuggingFace.
+  pretrained_model_name_or_path: InstaDeepAI/winnow-general-model
+  # Directory to cache the HuggingFace model.
+  cache_dir: null  # can be set to `null` if using local model or for the default cache directory from HuggingFace.
+
+fdr_control:
+  # FDR settings:
+  # Target FDR threshold (e.g. 0.01 for 1%, 0.05 for 5% etc.).
+  fdr_threshold: 0.05
+  # Name of the column with confidence scores to use for FDR estimation.
+  confidence_column: calibrated_confidence
+
+# Folder path to write the outputs to.
+# This will create two CSV files in the output folder:
+# - metadata.csv: Contains all metadata and feature columns from the input dataset.
+# - preds_and_fdr_metrics.csv: Contains predictions and FDR metrics.
+output_folder: results/predictions
diff --git a/config/residues.yaml b/config/residues.yaml
new file mode 100644
index 0000000..d76d4a3
--- /dev/null
+++ b/config/residues.yaml
@@ -0,0 +1,64 @@
+# --- Residues configuration ---
+
+# This is Winnow's internal residue representation.
+# We use this to calculate the mass error feature and during database-grounded FDR control.
+# We also use this to initialise the residue set for the Metrics class.
+residue_masses:
+  "G": 57.021464
+  "A": 71.037114
+  "S": 87.032028
+  "P": 97.052764
+  "V": 99.068414
+  "T": 101.047670
+  "C": 103.009185
+  "L": 113.084064
+  "I": 113.084064
+  "N": 114.042927
+  "D": 115.026943
+  "Q": 128.058578
+  "K": 128.094963
+  "E": 129.042593
+  "M": 131.040485
+  "H": 137.058912
+  "F": 147.068414
+  "R": 156.101111
+  "Y": 163.063329
+  "W": 186.079313
+  # Modifications
+  "M[UNIMOD:35]": 147.035400 # Oxidation
+  "C[UNIMOD:4]": 160.030649 # Carboxyamidomethylation
+  "N[UNIMOD:7]": 115.026943 # Deamidation
+  "Q[UNIMOD:7]": 129.042594 # Deamidation
+  "R[UNIMOD:7]": 157.085127 # Arginine citrullination
+  "P[UNIMOD:35]": 113.047679 # Proline hydroxylation
+  "S[UNIMOD:21]": 166.998028 # Phosphorylation + 79.966
+  "T[UNIMOD:21]": 181.01367 # Phosphorylation + 79.966
+  "Y[UNIMOD:21]": 243.029329 # Phosphorylation + 79.966
+  "C[UNIMOD:312]": 222.013284  # Cysteinylation
+  "E[UNIMOD:27]": 111.032028  # Glu -> pyro-Glu
+  "Q[UNIMOD:28]": 111.032029  # Gln -> pyro-Gln
+  # Terminal modifications
+  "[UNIMOD:1]": 42.010565 # Acetylation
+  "[UNIMOD:5]": 43.005814 # Carbamylation
+  "[UNIMOD:385]": -17.026549 # NH3 loss
+  "(+25.98)": 25.980265  # Carbamylation & NH3 loss (legacy notation)
+
+# The tokens to consider as invalid for Prosit features.
+# We also filter out non-carboxyamidomethylated Cysteine in a separate step.
+invalid_prosit_tokens:
+  # InstaNovo
+  - "[UNIMOD:7]"
+  - "[UNIMOD:21]"
+  - "[UNIMOD:1]"
+  - "[UNIMOD:5]"
+  - "[UNIMOD:385]"
+  - "(+25.98)"  # (legacy notation)
+  # Casanovo
+  - "+0.984"
+  - "+42.011"
+  - "+43.006"
+  - "-17.027"
+  - "[Ammonia-loss]-"
+  - "[Carbamyl]-"
+  - "[Acetyl]-"
+  - "[Deamidated]"
diff --git a/config/train.yaml b/config/train.yaml
new file mode 100644
index 0000000..76ab931
--- /dev/null
+++ b/config/train.yaml
@@ -0,0 +1,21 @@
+# --- Training a calibrator ---
+defaults:
+  - _self_
+  - residues
+  - calibrator
+  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
+
+# --- Pipeline Execution Configuration ---
+
+dataset:
+  # Dataset paths:
+  # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
+  spectrum_path_or_directory: data/spectra.ipc
+  # Path to the beam predictions file.
+  # Leave as `null` if data source is `winnow`, or loading will fail.
+  predictions_path: data/predictions.csv
+  # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
+
+# Output paths:
+model_output_dir: models/new_model
+dataset_output_path: results/calibrated_dataset.csv
diff --git a/winnow/calibration/calibration_features.py b/winnow/calibration/calibration_features.py
index b5a820e..3fd17df 100644
--- a/winnow/calibration/calibration_features.py
+++ b/winnow/calibration/calibration_features.py
@@ -13,7 +13,6 @@
 import koinapy
 
 from winnow.datasets.calibration_dataset import CalibrationDataset
-from winnow.constants import INVALID_PROSIT_TOKENS
 
 
 def map_modification(peptide: List[str]) -> List[str]:
@@ -197,18 +196,28 @@ def compute_ion_identifications(
 class PrositFeatures(CalibrationFeatures):
     """A class for extracting features related to Prosit: a machine learning-based intensity prediction tool for peptide fragmentation."""
 
-    def __init__(self, mz_tolerance: float, learn_from_missing: bool = True) -> None:
+    def __init__(
+        self,
+        mz_tolerance: float,
+        invalid_prosit_tokens: List[str],
+        learn_from_missing: bool = True,
+        prosit_intensity_model_name: str = "Prosit_2020_intensity_HCD",
+    ) -> None:
         """Initialize PrositFeatures.
 
         Args:
             mz_tolerance (float): The mass-to-charge tolerance for ion matching.
+            invalid_prosit_tokens (List[str]): The tokens to consider as invalid for Prosit intensity prediction.
             learn_from_missing (bool): Whether to learn from missing data by including a missingness indicator column.
                 If False, an error will be raised when invalid spectra are encountered.
                 Defaults to True.
+            prosit_intensity_model_name (str): The name of the Prosit intensity model to use.
+                Defaults to "Prosit_2020_intensity_HCD".
         """
         self.mz_tolerance = mz_tolerance
+        self.invalid_prosit_tokens = invalid_prosit_tokens
         self.learn_from_missing = learn_from_missing
-        self.prosit_intensity_model_name = "Prosit_2020_intensity_HCD"
+        self.prosit_intensity_model_name = prosit_intensity_model_name
 
     @property
     def dependencies(self) -> List[FeatureDependency]:
@@ -266,7 +275,7 @@ def check_valid_prosit_prediction(self, dataset: CalibrationDataset) -> pd.Serie
                 metadata_predicate=lambda row: (
                     any(
                         token in row["prediction_untokenised"]
-                        for token in INVALID_PROSIT_TOKENS
+                        for token in self.invalid_prosit_tokens
                     )
                 )
             )
@@ -322,7 +331,7 @@ def compute(self, dataset: CalibrationDataset) -> None:
                     f"Please filter your dataset to remove:\n"
                     f"  - Peptides longer than 30 amino acids\n"
                     f"  - Precursor charges greater than 6\n"
-                    f"  - Peptides with unsupported modifications (e.g., {', '.join(INVALID_PROSIT_TOKENS[:3])}...)\n"
+                    f"  - Peptides with unsupported modifications (e.g., {', '.join(self.invalid_prosit_tokens[:3])}...)\n"
                     f"Or set learn_from_missing=True to handle missing data automatically."
                 )
 
@@ -413,18 +422,28 @@ class ChimericFeatures(CalibrationFeatures):
     are stored in the dataset metadata.
     """
 
-    def __init__(self, mz_tolerance: float, learn_from_missing: bool = True) -> None:
+    def __init__(
+        self,
+        mz_tolerance: float,
+        invalid_prosit_tokens: List[str],
+        learn_from_missing: bool = True,
+        prosit_intensity_model_name: str = "Prosit_2020_intensity_HCD",
+    ) -> None:
         """Initialize ChimericFeatures.
 
         Args:
             mz_tolerance (float): The mass-to-charge tolerance for ion matching.
+            invalid_prosit_tokens (List[str]): The tokens to consider as invalid for Prosit intensity prediction.
             learn_from_missing (bool): Whether to learn from missing data by including a missingness indicator column.
                 If False, an error will be raised when invalid spectra are encountered.
                 Defaults to True.
+            prosit_intensity_model_name (str): The name of the Prosit intensity model to use.
+                Defaults to "Prosit_2020_intensity_HCD".
         """
         self.mz_tolerance = mz_tolerance
         self.learn_from_missing = learn_from_missing
-        self.prosit_intensity_model_name = "Prosit_2020_intensity_HCD"
+        self.invalid_prosit_tokens = invalid_prosit_tokens
+        self.prosit_intensity_model_name = prosit_intensity_model_name
 
     @property
     def dependencies(self) -> List[FeatureDependency]:
@@ -488,7 +507,7 @@ def check_valid_chimeric_prosit_prediction(
                     len(beam) > 1
                     and any(
                         token in "".join(beam[1].sequence)
-                        for token in INVALID_PROSIT_TOKENS
+                        for token in self.invalid_prosit_tokens
                     )
                 )
             )
@@ -552,7 +571,7 @@ def compute(self, dataset: CalibrationDataset) -> None:
                     f"  - Spectra without runner-up sequences (beam search required)\n"
                     f"  - Runner-up peptides longer than 30 amino acids\n"
                     f"  - Runner-up peptides with precursor charges greater than 6\n"
-                    f"  - Runner-up peptides with unsupported modifications (e.g., {', '.join(INVALID_PROSIT_TOKENS[:3])}...)\n"
+                    f"  - Runner-up peptides with unsupported modifications (e.g., {', '.join(self.invalid_prosit_tokens[:3])}...)\n"
                     f"Or set learn_from_missing=True to handle missing data automatically."
                 )
 
@@ -834,23 +853,50 @@ class RetentionTimeFeature(CalibrationFeatures):
     irt_predictor: MLPRegressor
 
     def __init__(
-        self, hidden_dim: int, train_fraction: float, learn_from_missing: bool = True
+        self,
+        hidden_dim: int,
+        train_fraction: float,
+        invalid_prosit_tokens: List[str],
+        learn_from_missing: bool = True,
+        seed: int = 42,
+        learning_rate_init: float = 0.001,
+        alpha: float = 0.0001,
+        max_iter: int = 200,
+        early_stopping: bool = False,
+        validation_fraction: float = 0.1,
+        prosit_irt_model_name: str = "Prosit_2019_irt",
     ) -> None:
         """Initialize RetentionTimeFeature.
 
         Args:
             hidden_dim (int): Hidden dimension size for the MLP regressor.
             train_fraction (float): Fraction of data to use for training the iRT calibrator.
+            invalid_prosit_tokens (List[str]): The tokens to consider as invalid for Prosit iRT prediction.
             learn_from_missing (bool): Whether to learn from missing data by including a missingness indicator column.
                 If False, an error will be raised when invalid spectra are encountered.
                 Defaults to True.
+            seed (int): Random seed for the regressor. Defaults to 42.
+            learning_rate_init (float): The initial learning rate. Defaults to 0.001.
+            alpha (float): L2 regularisation parameter. Defaults to 0.0001.
+            max_iter (int): Maximum number of training iterations. Defaults to 200.
+            early_stopping (bool): Whether to use early stopping to terminate training. Defaults to False.
+            validation_fraction (float): Proportion of training data to use for early stopping validation. Defaults to 0.1.
+            prosit_irt_model_name (str): The name of the Prosit iRT model to use.
+                Defaults to "Prosit_2019_irt".
         """
         self.train_fraction = train_fraction
         self.hidden_dim = hidden_dim
         self.learn_from_missing = learn_from_missing
-        self.prosit_irt_model_name = "Prosit_2019_irt"
+        self.invalid_prosit_tokens = invalid_prosit_tokens
+        self.prosit_irt_model_name = prosit_irt_model_name
         self.irt_predictor = MLPRegressor(
-            hidden_layer_sizes=[hidden_dim], random_state=42
+            hidden_layer_sizes=[hidden_dim],
+            random_state=seed,
+            learning_rate_init=learning_rate_init,
+            alpha=alpha,
+            max_iter=max_iter,
+            early_stopping=early_stopping,
+            validation_fraction=validation_fraction,
         )
 
     @property
@@ -906,7 +952,7 @@ def check_valid_irt_prediction(self, dataset: CalibrationDataset) -> pd.Series:
                 metadata_predicate=lambda row: (
                     any(
                         token in row["prediction_untokenised"]
-                        for token in INVALID_PROSIT_TOKENS
+                        for token in self.invalid_prosit_tokens
                     )
                 )
             )
@@ -1003,7 +1049,7 @@ def compute(self, dataset: CalibrationDataset) -> None:
                     f"  - Spectra without retention time data\n"
                     f"  - Peptides longer than 30 amino acids\n"
                     f"  - Precursor charges greater than 6\n"
-                    f"  - Peptides with unsupported modifications (e.g., {', '.join(INVALID_PROSIT_TOKENS[:3])}...)\n"
+                    f"  - Peptides with unsupported modifications (e.g., {', '.join(self.invalid_prosit_tokens[:3])}...)\n"
                     f"Or set learn_from_missing=True to handle missing data automatically."
                 )
 
diff --git a/winnow/calibration/calibrator.py b/winnow/calibration/calibrator.py
index 97ae44d..8c5edfe 100644
--- a/winnow/calibration/calibrator.py
+++ b/winnow/calibration/calibrator.py
@@ -8,6 +8,8 @@
 from sklearn.preprocessing import StandardScaler
 from jaxtyping import Float
 from huggingface_hub import snapshot_download
+from omegaconf import DictConfig
+
 from winnow.calibration.calibration_features import (
     CalibrationFeatures,
     FeatureDependency,
@@ -21,21 +23,54 @@ class ProbabilityCalibrator:
     This class provides functionality to recalibrate predicted probabilities by fitting a logistic regression model using various features computed from a calibration dataset.
     """
 
-    def __init__(self, seed: int = 42) -> None:
+    def __init__(
+        self,
+        seed: int = 42,
+        features: Optional[
+            Union[List[CalibrationFeatures], Dict[str, CalibrationFeatures], DictConfig]
+        ] = None,
+        hidden_layer_sizes: Tuple[int, ...] = (50, 50),
+        learning_rate_init: float = 0.001,
+        alpha: float = 0.0001,
+        max_iter: int = 1000,
+        early_stopping: bool = True,
+        validation_fraction: float = 0.1,
+    ) -> None:
+        """Initialise the probability calibrator.
+
+        Args:
+            seed (int): Random seed for the classifier. Defaults to 42.
+            features (Optional[Union[List[CalibrationFeatures], Dict[str, CalibrationFeatures], DictConfig]]):
+                Features to add to the calibrator. Can be a list or dict of CalibrationFeatures objects.
+                If None, no features are added. Defaults to None.
+            hidden_layer_sizes (Tuple[int, ...]): The number of neurons in each hidden layer. Defaults to (50, 50).
+            learning_rate_init (float): The initial learning rate. Defaults to 0.001.
+            alpha (float): L2 regularisation parameter. Defaults to 0.0001.
+            max_iter (int): Maximum number of training iterations. Defaults to 1000.
+            early_stopping (bool): Whether to use early stopping to terminate training. Defaults to True.
+            validation_fraction (float): Proportion of training data to use for early stopping validation. Defaults to 0.1.
+        """
         self.feature_dict: Dict[str, CalibrationFeatures] = {}
         self.dependencies: Dict[str, FeatureDependency] = {}
         self.dependency_reference_counter: Dict[str, int] = {}
         self.classifier = MLPClassifier(
             random_state=seed,
-            hidden_layer_sizes=(50, 50),
-            learning_rate_init=0.001,
-            alpha=0.0001,
-            max_iter=1000,
-            early_stopping=True,
-            validation_fraction=0.1,
+            hidden_layer_sizes=hidden_layer_sizes,
+            learning_rate_init=learning_rate_init,
+            alpha=alpha,
+            max_iter=max_iter,
+            early_stopping=early_stopping,
+            validation_fraction=validation_fraction,
         )
         self.scaler = StandardScaler()
 
+        # Add features if provided
+        if features is not None:
+            if isinstance(features, (dict, DictConfig)):
+                self.add_features(list(features.values()))
+            else:
+                self.add_features(list(features))
+
     @property
     def columns(self) -> List[str]:
         """Returns the list of column names corresponding to the features added to the calibrator.
@@ -59,14 +94,18 @@ def features(self) -> List[str]:
         return list(self.feature_dict.keys())
 
     @classmethod
-    def save(cls, calibrator: "ProbabilityCalibrator", dir_path: Path) -> None:
+    def save(
+        cls, calibrator: "ProbabilityCalibrator", dir_path: Union[Path, str]
+    ) -> None:
         """Save the calibrator to a file.
 
         Args:
             calibrator (ProbabilityCalibrator): The calibrator to save.
             dir_path (Path): The path to the directory where the calibrator checkpoint will be saved.
         """
-        dir_path.mkdir(parents=True)
+        if isinstance(dir_path, str):
+            dir_path = Path(dir_path)
+        dir_path.mkdir(parents=True, exist_ok=True)
         pickle.dump(calibrator, open(dir_path / "calibrator.pkl", "wb"))
 
     @classmethod
diff --git a/winnow/constants.py b/winnow/constants.py
deleted file mode 100644
index a4eeb31..0000000
--- a/winnow/constants.py
+++ /dev/null
@@ -1,103 +0,0 @@
-from instanovo.utils.residues import ResidueSet
-from instanovo.utils.metrics import Metrics
-
-RESIDUE_MASSES: dict[str, float] = {
-    "G": 57.021464,
-    "A": 71.037114,
-    "S": 87.032028,
-    "P": 97.052764,
-    "V": 99.068414,
-    "T": 101.047670,
-    "C": 103.009185,
-    "L": 113.084064,
-    "I": 113.084064,
-    "N": 114.042927,
-    "D": 115.026943,
-    "Q": 128.058578,
-    "K": 128.094963,
-    "E": 129.042593,
-    "M": 131.040485,
-    "H": 137.058912,
-    "F": 147.068414,
-    "R": 156.101111,
-    "Y": 163.063329,
-    "W": 186.079313,
-    # Modifications
-    "M[UNIMOD:35]": 147.035400,  # Oxidation
-    "N[UNIMOD:7]": 115.026943,  # Deamidation
-    "Q[UNIMOD:7]": 129.042594,  # Deamidation
-    "C[UNIMOD:4]": 160.030649,  # Carboxyamidomethylation
-    "S[UNIMOD:21]": 166.998028,  # Phosphorylation
-    "T[UNIMOD:21]": 181.01367,  # Phosphorylation
-    "Y[UNIMOD:21]": 243.029329,  # Phosphorylation
-    "[UNIMOD:385]": -17.026549,  # Ammonia Loss
-    "[UNIMOD:5]": 43.005814,  # Carbamylation
-    "[UNIMOD:1]": 42.010565,  # Acetylation
-    "C[UNIMOD:312]": 222.013284,  # Cysteinylation
-    "E[UNIMOD:27]": 111.032028,  # Glu -> pyro-Glu
-    "Q[UNIMOD:28]": 111.032029,  # Gln -> pyro-Gln
-    "(+25.98)": 25.980265,  # Carbamylation & NH3 loss
-}
-
-RESIDUE_REMAPPING: dict[str, str] = {
-    "M(ox)": "M[UNIMOD:35]",  # Oxidation
-    "M(+15.99)": "M[UNIMOD:35]",
-    "S(p)": "S[UNIMOD:21]",  # Phosphorylation
-    "T(p)": "T[UNIMOD:21]",
-    "Y(p)": "Y[UNIMOD:21]",
-    "S(+79.97)": "S[UNIMOD:21]",
-    "T(+79.97)": "T[UNIMOD:21]",
-    "Y(+79.97)": "Y[UNIMOD:21]",
-    "Q(+0.98)": "Q[UNIMOD:7]",  # Deamidation
-    "N(+0.98)": "N[UNIMOD:7]",
-    "Q(+.98)": "Q[UNIMOD:7]",
-    "N(+.98)": "N[UNIMOD:7]",
-    "C(+57.02)": "C[UNIMOD:4]",  # Carbamidomethylation
-    "(+42.01)": "[UNIMOD:1]",  # Acetylation
-    "(+43.01)": "[UNIMOD:5]",  # Carbamylation
-    "(-17.03)": "[UNIMOD:385]",  # Loss of ammonia
-}
-
-CASANOVO_RESIDUE_REMAPPING: dict[str, str] = {
-    "M+15.995": "M[UNIMOD:35]",  # Oxidation
-    "Q+0.984": "Q[UNIMOD:7]",  # Deamidation
-    "N+0.984": "N[UNIMOD:7]",  # Deamidation
-    "+42.011": "[UNIMOD:1]",  # Acetylation
-    "+43.006": "[UNIMOD:5]",  # Carbamylation
-    "-17.027": "[UNIMOD:385]",  # Loss of ammonia
-    "C+57.021": "C[UNIMOD:4]",  # Carbamidomethylation
-    # "+43.006-17.027": "[UNIMOD:5][UNIMOD:385]",  # Carbamylation and Loss of ammonia
-    "C[Carbamidomethyl]": "C[UNIMOD:4]",  # Carbamidomethylation
-    "M[Oxidation]": "M[UNIMOD:35]",  # Met oxidation:   131.040485 + 15.994915
-    "N[Deamidated]": "N[UNIMOD:7]",  # Asn deamidation: 114.042927 +  0.984016
-    "Q[Deamidated]": "Q[UNIMOD:7]",  # Gln deamidation: 128.058578 +  0.984016
-    # N-terminal modifications.
-    "[Acetyl]-": "[UNIMOD:1]",  # Acetylation
-    "[Carbamyl]-": "[UNIMOD:5]",  # Carbamylation
-    "[Ammonia-loss]-": "[UNIMOD:385]",  # Ammonia loss
-    # "[+25.980265]-": 25.980265     # Carbamylation and ammonia loss
-}
-
-# Each C is also treated as Cysteine with carbamidomethylation in Prosit.
-INVALID_PROSIT_TOKENS: list = [
-    "(+25.98)",
-    "[UNIMOD:7]",
-    "[UNIMOD:21]",
-    "[UNIMOD:1]",
-    "[UNIMOD:5]",
-    "[UNIMOD:385]",
-    "+0.984",
-    "+42.011",
-    "+43.006",
-    "-17.027",
-    "[Ammonia-loss]-",
-    "[Carbamyl]-",
-    "[Acetyl]-",
-    "[Deamidated]",
-]
-
-
-residue_set = ResidueSet(
-    residue_masses=RESIDUE_MASSES, residue_remapping=RESIDUE_REMAPPING
-)
-metrics = Metrics(residue_set=residue_set, isotope_error_range=[0, 1])
diff --git a/winnow/datasets/calibration_dataset.py b/winnow/datasets/calibration_dataset.py
index 118387a..50df89b 100644
--- a/winnow/datasets/calibration_dataset.py
+++ b/winnow/datasets/calibration_dataset.py
@@ -9,7 +9,7 @@
 
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple, Union
 import pickle
 
 import numpy as np
@@ -48,7 +48,7 @@ def save(self, data_dir: Path) -> None:
             data_dir (Path): Directory to save the dataset. This will contain `metadata.csv` and
                             optionally, `predictions.pkl` for serialized beam search results.
         """
-        data_dir.mkdir(parents=True)
+        data_dir.mkdir(parents=True, exist_ok=True)
         with (data_dir / "metadata.csv").open(mode="w") as metadata_file:
             output_metadata = self.metadata.copy(deep=True)
             if "sequence" in output_metadata.columns:
@@ -127,20 +127,26 @@ def filter_entries(
 
         return CalibrationDataset(predictions=predictions, metadata=metadata)
 
-    def to_csv(self, path: Path) -> None:
+    def to_csv(self, path: Union[Path, str]) -> None:
         """Saves the dataset metadata to a CSV file.
 
         Args:
             path (str): Path to the output CSV file.
         """
+        if isinstance(path, str):
+            path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
         self.metadata.to_csv(path)
 
-    def to_parquet(self, path: str) -> None:
+    def to_parquet(self, path: Union[Path, str]) -> None:
         """Saves the dataset metadata to a parquet file.
 
         Args:
             path (str): Path to the output parquet file.
         """
+        if isinstance(path, str):
+            path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
         self.metadata.to_parquet(path)
 
     def _create_predicate_error_message(
diff --git a/winnow/datasets/data_loaders.py b/winnow/datasets/data_loaders.py
index 34c3d4c..9203752 100644
--- a/winnow/datasets/data_loaders.py
+++ b/winnow/datasets/data_loaders.py
@@ -9,24 +9,44 @@
 import re
 from pathlib import Path
 from typing import Any, List, Optional, Tuple
-
 import numpy as np
 import pandas as pd
 import polars as pl
 import polars.selectors as cs
 from pyteomics import mztab
+from instanovo.utils.residues import ResidueSet
+from instanovo.utils.metrics import Metrics
 
 from winnow.datasets.interfaces import DatasetLoader
 from winnow.datasets.calibration_dataset import (
     CalibrationDataset,
     ScoredSequence,
 )
-from winnow.constants import metrics, CASANOVO_RESIDUE_REMAPPING
 
 
 class InstaNovoDatasetLoader(DatasetLoader):
     """Loader for InstaNovo predictions in CSV format."""
 
+    def __init__(
+        self,
+        residue_masses: dict[str, float],
+        residue_remapping: dict[str, str],
+        isotope_error_range: Tuple[int, int] = (0, 1),
+    ) -> None:
+        """Initialise the InstaNovoDatasetLoader.
+
+        Args:
+            residue_masses: The mapping of residue masses to UNIMOD tokens.
+            residue_remapping: The mapping of residue notations to UNIMOD tokens.
+            isotope_error_range: The range of isotope errors to consider when matching peptides.
+        """
+        self.metrics = Metrics(
+            residue_set=ResidueSet(
+                residue_masses=residue_masses, residue_remapping=residue_remapping
+            ),
+            isotope_error_range=isotope_error_range,
+        )
+
     @staticmethod
     def _load_beam_preds(
         predictions_path: Path,
@@ -39,6 +59,10 @@ def _load_beam_preds(
         Returns:
             Tuple[pl.DataFrame, pl.DataFrame]: A tuple containing the predictions and beams dataframes.
         """
+        if predictions_path.suffix != ".csv":
+            raise ValueError(
+                f"Unsupported file format for InstaNovo beam predictions: {predictions_path.suffix}. Supported format is .csv."
+            )
         df = pl.read_csv(predictions_path)
         # Use polars column selectors to split dataframe
         beam_df = df.select(cs.contains("_beam_"))
@@ -46,7 +70,103 @@ def _load_beam_preds(
         return preds_df, beam_df
 
     @staticmethod
-    def _process_beams(beam_df: pl.DataFrame) -> List[Optional[List[ScoredSequence]]]:
+    def _load_spectrum_data(spectrum_path: Path | str) -> Tuple[pl.DataFrame, bool]:
+        """Loads spectrum data from either a Parquet or IPC file.
+
+        Args:
+            spectrum_path (Path | str): The path to the spectrum data file.
+
+        Returns:
+            Tuple[pl.DataFrame, bool]: A tuple containing the spectrum data and a boolean indicating whether the dataset has ground truth labels.
+        """
+        spectrum_path = Path(spectrum_path)
+
+        if spectrum_path.suffix == ".parquet":
+            df = pl.read_parquet(spectrum_path)
+        elif spectrum_path.suffix == ".ipc":
+            df = pl.read_ipc(spectrum_path)
+        else:
+            raise ValueError(
+                f"Unsupported file format for spectrum data: {spectrum_path.suffix}. Supported formats are .parquet and .ipc."
+            )
+
+        if "sequence" in df.columns:
+            has_labels = True
+        else:
+            has_labels = False
+
+        return df, has_labels
+
+    @staticmethod
+    def _merge_spectrum_data(
+        beam_dataset: pd.DataFrame, spectrum_dataset: pd.DataFrame
+    ) -> pd.DataFrame:
+        """Merge the input and output data from the de novo sequencing model.
+
+        Args:
+            beam_dataset (pd.DataFrame): The dataframe containing the beam predictions.
+            spectrum_dataset (pd.DataFrame): The dataframe containing the spectrum data.
+
+        Returns:
+            pd.DataFrame: The merged dataframe.
+        """
+        merged_df = pd.merge(
+            beam_dataset,
+            spectrum_dataset,
+            on=["spectrum_id"],
+            suffixes=("_from_beams", ""),
+        )
+        merged_df = merged_df.drop(
+            columns=[
+                col + "_from_beams"
+                for col in beam_dataset.columns
+                if col in spectrum_dataset.columns and col != "spectrum_id"
+            ],
+            axis=1,
+        )
+
+        if len(merged_df) != len(beam_dataset):
+            raise ValueError(
+                f"Merge conflict: Expected {len(beam_dataset)} rows, but got {len(merged_df)}."
+            )
+
+        return merged_df
+
+    def load(
+        self, *, data_path: Path, predictions_path: Optional[Path] = None, **kwargs: Any
+    ) -> CalibrationDataset:
+        """Load a CalibrationDataset from InstaNovo CSV predictions.
+
+        Args:
+            data_path: Path to the spectrum data file
+            predictions_path: Path to the IPC or parquet beam predictions file
+            **kwargs: Not used
+
+        Returns:
+            CalibrationDataset: An instance of the CalibrationDataset class containing metadata and predictions.
+
+        Raises:
+            ValueError: If predictions_path is None
+        """
+        if predictions_path is None:
+            raise ValueError("predictions_path is required for InstaNovoDatasetLoader")
+
+        beam_predictions_path = predictions_path
+        inputs, has_labels = self._load_spectrum_data(data_path)
+        inputs = self._process_spectrum_data(inputs, has_labels)
+
+        predictions, beams = self._load_beam_preds(beam_predictions_path)
+        beams = self._process_beams(beams)
+        predictions = self._process_predictions(predictions.to_pandas(), has_labels)
+
+        predictions = self._merge_spectrum_data(predictions, inputs)
+        predictions = self._evaluate_predictions(predictions, has_labels)
+
+        return CalibrationDataset(metadata=predictions, predictions=beams)
+
+    def _process_beams(
+        self, beam_df: pl.DataFrame
+    ) -> List[Optional[List[ScoredSequence]]]:
         """Processes beam predictions into scored sequences.
 
         Args:
@@ -77,7 +197,7 @@ def convert_row_to_scored_sequences(
                 if sequence and log_prob > float("-inf"):
                     scored_sequences.append(
                         ScoredSequence(
-                            sequence=metrics._split_peptide(sequence),
+                            sequence=self.metrics._split_peptide(sequence),
                             mass_error=None,
                             sequence_log_probability=log_prob,
                             token_log_probabilities=token_log_prob,
@@ -102,8 +222,35 @@ def convert_row_to_scored_sequences(
             for row in beam_df.iter_rows(named=True)
         ]
 
-    @staticmethod
-    def _process_predictions(dataset: pd.DataFrame, has_labels: bool) -> pd.DataFrame:
+    def _process_spectrum_data(
+        self, df: pl.DataFrame, has_labels: bool
+    ) -> pd.DataFrame:
+        """Processes the input data from the de novo sequencing model.
+
+        Args:
+            df (pl.DataFrame): The dataframe containing the spectrum data.
+            has_labels (bool): Whether the dataset has ground truth labels.
+
+        Returns:
+            pd.DataFrame: The processed dataframe.
+        """
+        # Convert to pandas for downstream compatibility
+        df = df.to_pandas()
+        if has_labels:
+            df["sequence"] = (
+                df["sequence"]
+                .apply(
+                    lambda peptide: peptide.replace("L", "I")
+                    if isinstance(peptide, str)
+                    else peptide
+                )
+                .apply(self.metrics._split_peptide)
+            )
+        return df
+
+    def _process_predictions(
+        self, dataset: pd.DataFrame, has_labels: bool
+    ) -> pd.DataFrame:
         """Processes the predictions obtained from saved beams.
 
         Args:
@@ -136,7 +283,7 @@ def _process_predictions(dataset: pd.DataFrame, has_labels: bool) -> pd.DataFram
                 else peptide
             )
             dataset["sequence"] = dataset["sequence_untokenised"].apply(
-                metrics._split_peptide
+                self.metrics._split_peptide
             )
         dataset["prediction"] = dataset["prediction"].apply(
             lambda peptide: [
@@ -153,96 +300,9 @@ def _process_predictions(dataset: pd.DataFrame, has_labels: bool) -> pd.DataFram
 
         return dataset
 
-    @staticmethod
-    def _load_spectrum_data(spectrum_path: Path | str) -> Tuple[pl.DataFrame, bool]:
-        """Loads spectrum data from either a Parquet or IPC file.
-
-        Args:
-            spectrum_path (Path | str): The path to the spectrum data file.
-
-        Returns:
-            Tuple[pl.DataFrame, bool]: A tuple containing the spectrum data and a boolean indicating whether the dataset has ground truth labels.
-        """
-        spectrum_path = Path(spectrum_path)
-
-        if spectrum_path.suffix == ".parquet":
-            df = pl.read_parquet(spectrum_path)
-        elif spectrum_path.suffix == ".ipc":
-            df = pl.read_ipc(spectrum_path)
-        else:
-            raise ValueError(
-                f"Unsupported file format: {spectrum_path.suffix}. Supported formats are .parquet and .ipc."
-            )
-
-        if "sequence" in df.columns:
-            has_labels = True
-        else:
-            has_labels = False
-
-        return df, has_labels
-
-    @staticmethod
-    def _process_spectrum_data(df: pl.DataFrame, has_labels: bool) -> pd.DataFrame:
-        """Processes the input data from the de novo sequencing model.
-
-        Args:
-            df (pl.DataFrame): The dataframe containing the spectrum data.
-            has_labels (bool): Whether the dataset has ground truth labels.
-
-        Returns:
-            pd.DataFrame: The processed dataframe.
-        """
-        # Convert to pandas for downstream compatibility
-        df = df.to_pandas()
-        if has_labels:
-            df["sequence"] = (
-                df["sequence"]
-                .apply(
-                    lambda peptide: peptide.replace("L", "I")
-                    if isinstance(peptide, str)
-                    else peptide
-                )
-                .apply(metrics._split_peptide)
-            )
-        return df
-
-    @staticmethod
-    def _merge_spectrum_data(
-        beam_dataset: pd.DataFrame, spectrum_dataset: pd.DataFrame
+    def _evaluate_predictions(
+        self, dataset: pd.DataFrame, has_labels: bool
     ) -> pd.DataFrame:
-        """Merge the input and output data from the de novo sequencing model.
-
-        Args:
-            beam_dataset (pd.DataFrame): The dataframe containing the beam predictions.
-            spectrum_dataset (pd.DataFrame): The dataframe containing the spectrum data.
-
-        Returns:
-            pd.DataFrame: The merged dataframe.
-        """
-        merged_df = pd.merge(
-            beam_dataset,
-            spectrum_dataset,
-            on=["spectrum_id"],
-            suffixes=("_from_beams", ""),
-        )
-        merged_df = merged_df.drop(
-            columns=[
-                col + "_from_beams"
-                for col in beam_dataset.columns
-                if col in spectrum_dataset.columns and col != "spectrum_id"
-            ],
-            axis=1,
-        )
-
-        if len(merged_df) != len(beam_dataset):
-            raise ValueError(
-                f"Merge conflict: Expected {len(beam_dataset)} rows, but got {len(merged_df)}."
-            )
-
-        return merged_df
-
-    @staticmethod
-    def _evaluate_predictions(dataset: pd.DataFrame, has_labels: bool) -> pd.DataFrame:
         """Evaluates predictions in a dataset by checking validity and accuracy.
 
         Args:
@@ -261,7 +321,9 @@ def _evaluate_predictions(dataset: pd.DataFrame, has_labels: bool) -> pd.DataFra
         )
         if has_labels:
             dataset["num_matches"] = dataset.apply(
-                lambda row: metrics._novor_match(row["sequence"], row["prediction"])
+                lambda row: self.metrics._novor_match(
+                    row["sequence"], row["prediction"]
+                )
                 if isinstance(row["sequence"], list)
                 and isinstance(row["prediction"], list)
                 else 0,
@@ -278,38 +340,6 @@ def _evaluate_predictions(dataset: pd.DataFrame, has_labels: bool) -> pd.DataFra
             )
         return dataset
 
-    def load(
-        self, *, data_path: Path, predictions_path: Optional[Path] = None, **kwargs: Any
-    ) -> CalibrationDataset:
-        """Load a CalibrationDataset from InstaNovo CSV predictions.
-
-        Args:
-            data_path: Path to the spectrum data file
-            predictions_path: Path to the IPC or parquet beam predictions file
-            **kwargs: Not used
-
-        Returns:
-            CalibrationDataset: An instance of the CalibrationDataset class containing metadata and predictions.
-
-        Raises:
-            ValueError: If predictions_path is None
-        """
-        if predictions_path is None:
-            raise ValueError("predictions_path is required for InstaNovoDatasetLoader")
-
-        beam_predictions_path = predictions_path
-        inputs, has_labels = self._load_spectrum_data(data_path)
-        inputs = self._process_spectrum_data(inputs, has_labels)
-
-        predictions, beams = self._load_beam_preds(beam_predictions_path)
-        beams = self._process_beams(beams)
-        predictions = self._process_predictions(predictions.to_pandas(), has_labels)
-
-        predictions = self._merge_spectrum_data(predictions, inputs)
-        predictions = self._evaluate_predictions(predictions, has_labels)
-
-        return CalibrationDataset(metadata=predictions, predictions=beams)
-
 
 class MZTabDatasetLoader(DatasetLoader):
     """Loader for MZTab predictions from both traditional search engines and Casanovo outputs.
@@ -336,36 +366,25 @@ class MZTabDatasetLoader(DatasetLoader):
     """
 
     def __init__(
-        self, residue_remapping: dict[str, str] | None = None, *args: Any, **kwargs: Any
+        self,
+        residue_masses: dict[str, float],
+        residue_remapping: dict[str, str],
+        isotope_error_range: Tuple[int, int] = (0, 1),
     ) -> None:
         """Initialise the MZTabDatasetLoader.
 
         Args:
-            residue_remapping: Optional dictionary mapping modification strings to UNIMOD format.
-                If None, uses the default CASANOVO_RESIDUE_REMAPPING.
-            *args: Additional positional arguments for parent class
-            **kwargs: Additional keyword arguments for parent class
+            residue_masses: The mapping of residue masses to UNIMOD tokens.
+            residue_remapping: The mapping of residue notations to UNIMOD tokens.
+            isotope_error_range: The range of isotope errors to consider when matching peptides.
         """
-        super().__init__(*args, **kwargs)
-        self.residue_remapping = (
-            residue_remapping
-            if residue_remapping is not None
-            else CASANOVO_RESIDUE_REMAPPING
+        self.metrics = Metrics(
+            residue_set=ResidueSet(
+                residue_masses=residue_masses, residue_remapping=residue_remapping
+            ),
+            isotope_error_range=isotope_error_range,
         )
 
-    @staticmethod
-    def _load_dataset(predictions_path: Path) -> pl.DataFrame:
-        """Load predictions from mzTab file.
-
-        Args:
-            predictions_path: Path to mzTab file containing predictions
-
-        Returns:
-            DataFrame containing predictions
-        """
-        predictions = mztab.MzTab(str(predictions_path)).spectrum_match_table
-        return pl.DataFrame(predictions)
-
     @staticmethod
     def _load_spectrum_data(spectrum_path: Path | str) -> Tuple[pl.DataFrame, bool]:
         """Load spectrum data from either a Parquet or IPC file.
@@ -385,7 +404,7 @@ def _load_spectrum_data(spectrum_path: Path | str) -> Tuple[pl.DataFrame, bool]:
             df = pl.read_ipc(spectrum_path)
         else:
             raise ValueError(
-                f"Unsupported file format: {spectrum_path.suffix}. Supported formats are .parquet and .ipc."
+                f"Unsupported file format for spectrum data: {spectrum_path.suffix}. Supported formats are .parquet and .ipc."
             )
 
         if "sequence" in df.columns:
@@ -393,6 +412,23 @@ def _load_spectrum_data(spectrum_path: Path | str) -> Tuple[pl.DataFrame, bool]:
 
         return df, has_labels
 
+    @staticmethod
+    def _load_dataset(predictions_path: Path) -> pl.DataFrame:
+        """Load predictions from mzTab file.
+
+        Args:
+            predictions_path: Path to mzTab file containing predictions
+
+        Returns:
+            DataFrame containing predictions
+        """
+        if predictions_path.suffix != ".mztab":
+            raise ValueError(
+                f"Unsupported file format for MZTab predictions: {predictions_path.suffix}. Supported format is .mztab."
+            )
+        predictions = mztab.MzTab(str(predictions_path)).spectrum_match_table
+        return pl.DataFrame(predictions)
+
     def load(
         self, *, data_path: Path, predictions_path: Optional[Path] = None, **kwargs: Any
     ) -> CalibrationDataset:
@@ -520,7 +556,7 @@ def _tokenize(
         ).with_columns(
             # Split sequence string into list of amino acid tokens
             pl.col(tokenised_column)
-            .map_elements(metrics._split_peptide, return_dtype=pl.List(pl.Utf8))
+            .map_elements(self.metrics._split_peptide, return_dtype=pl.List(pl.Utf8))
             .alias(tokenised_column)
         )
 
@@ -561,7 +597,7 @@ def _create_beam_predictions(
 
     def _map_modifications(self, sequence: str) -> str:
         """Map modifications to UNIMOD."""
-        for mod, unimod in self.residue_remapping.items():
+        for mod, unimod in self.metrics.residue_remapping.items():
             sequence = sequence.replace(mod, unimod)
         return sequence
 
@@ -659,7 +695,9 @@ def _evaluate_predictions(
                 # Count matching amino acids between prediction and ground truth
                 pl.struct(["sequence", "prediction"])
                 .map_elements(
-                    lambda row: metrics._novor_match(row["sequence"], row["prediction"])
+                    lambda row: self.metrics._novor_match(
+                        row["sequence"], row["prediction"]
+                    )
                     if isinstance(row["sequence"], list)
                     and isinstance(row["prediction"], list)
                     else 0,
@@ -716,6 +754,26 @@ def load(
 class WinnowDatasetLoader(DatasetLoader):
     """Loader for previously saved CalibrationDataset instances."""
 
+    def __init__(
+        self,
+        residue_masses: dict[str, float],
+        residue_remapping: dict[str, str],
+        isotope_error_range: Tuple[int, int] = (0, 1),
+    ) -> None:
+        """Initialise the WinnowDatasetLoader.
+
+        Args:
+            residue_masses: The mapping of residue masses to UNIMOD tokens.
+            residue_remapping: The mapping of residue notations to UNIMOD tokens.
+            isotope_error_range: The range of isotope errors to consider when matching peptides.
+        """
+        self.metrics = Metrics(
+            residue_set=ResidueSet(
+                residue_masses=residue_masses, residue_remapping=residue_remapping
+            ),
+            isotope_error_range=isotope_error_range,
+        )
+
     def load(
         self, *, data_path: Path, predictions_path: Optional[Path] = None, **kwargs: Any
     ) -> CalibrationDataset:
@@ -732,34 +790,52 @@ def load(
         if predictions_path is not None:
             raise ValueError("predictions_path is not used for WinnowDatasetLoader")
 
-        with (data_path / "metadata.csv").open(mode="r") as metadata_file:
-            metadata = pd.read_csv(metadata_file)
-            if "sequence" in metadata.columns:
-                metadata["sequence"] = metadata["sequence"].apply(
-                    metrics._split_peptide
-                )
-            metadata["prediction"] = metadata["prediction"].apply(
-                metrics._split_peptide
+        metadata_csv_path = data_path / "metadata.csv"
+        if not metadata_csv_path.exists():
+            raise FileNotFoundError(
+                f"Winnow dataset loader expects a CSV file containing metadata at {metadata_csv_path}. "
+                f"The specified directory {data_path} should contain a 'metadata.csv' file "
+                f"with PSM metadata from a previously saved Winnow dataset."
             )
-            metadata["mz_array"] = metadata["mz_array"].apply(
-                lambda s: ast.literal_eval(s)
-                if "," in s
-                else ast.literal_eval(
-                    re.sub(r"(\n?)(\s+)", ", ", re.sub(r"\[\s+", "[", s))
-                )
-            )
-            metadata["intensity_array"] = metadata["intensity_array"].apply(
-                lambda s: ast.literal_eval(s)
-                if "," in s
-                else ast.literal_eval(
-                    re.sub(r"(\n?)(\s+)", ", ", re.sub(r"\[\s+", "[", s))
-                )
+
+        try:
+            with metadata_csv_path.open(mode="r") as metadata_file:
+                metadata = pd.read_csv(metadata_file)
+        except Exception as e:
+            raise ValueError(
+                f"Failed to read metadata.csv from Winnow dataset directory {data_path}. "
+                f"The file should be a valid CSV containing PSM metadata. Error: {e}"
+            ) from e
+
+        if "sequence" in metadata.columns:
+            metadata["sequence"] = metadata["sequence"].apply(
+                self.metrics._split_peptide
             )
+        metadata["prediction"] = metadata["prediction"].apply(
+            self.metrics._split_peptide
+        )
+        metadata["mz_array"] = metadata["mz_array"].apply(
+            lambda s: ast.literal_eval(s)
+            if "," in s
+            else ast.literal_eval(re.sub(r"(\n?)(\s+)", ", ", re.sub(r"\[\s+", "[", s)))
+        )
+        metadata["intensity_array"] = metadata["intensity_array"].apply(
+            lambda s: ast.literal_eval(s)
+            if "," in s
+            else ast.literal_eval(re.sub(r"(\n?)(\s+)", ", ", re.sub(r"\[\s+", "[", s)))
+        )
 
-        predictions_path = data_path / "predictions.pkl"
-        if predictions_path.exists():
-            with predictions_path.open(mode="rb") as predictions_file:
-                predictions = pickle.load(predictions_file)
+        predictions_pkl_path = data_path / "predictions.pkl"
+        if predictions_pkl_path.exists():
+            try:
+                with predictions_pkl_path.open(mode="rb") as predictions_file:
+                    predictions = pickle.load(predictions_file)
+            except Exception as e:
+                raise ValueError(
+                    f"Failed to load predictions.pkl from Winnow dataset directory {data_path}. "
+                    f"The file should be a pickled beam predictions object from a previously saved Winnow dataset. "
+                    f"Error: {e}"
+                ) from e
         else:
             predictions = None
         return CalibrationDataset(metadata=metadata, predictions=predictions)
diff --git a/winnow/datasets/interfaces.py b/winnow/datasets/interfaces.py
index 91260be..7abfd7a 100644
--- a/winnow/datasets/interfaces.py
+++ b/winnow/datasets/interfaces.py
@@ -3,7 +3,7 @@
 This module provides abstract interfaces that define the contract for dataset loaders.
 """
 
-from typing import Protocol, Optional
+from typing import Protocol, Optional, Tuple
 from pathlib import Path
 from winnow.datasets.calibration_dataset import CalibrationDataset
 
@@ -14,6 +14,21 @@ class DatasetLoader(Protocol):
     Any class implementing this protocol must provide a load method that returns a CalibrationDataset.
     """
 
+    def __init__(
+        self,
+        residue_masses: dict[str, float],
+        residue_remapping: dict[str, str] | None = None,
+        isotope_error_range: Tuple[int, int] = (0, 1),
+    ) -> None:
+        """Initialise the DatasetLoader.
+
+        Args:
+            residue_masses: The mapping of residue masses to UNIMOD tokens.
+            residue_remapping: Optional mapping of residue notations to UNIMOD tokens. Defaults to None.
+            isotope_error_range: The range of isotope errors to consider when matching peptides. Defaults to (0, 1).
+        """
+        ...
+
     def load(
         self, *, data_path: Path, predictions_path: Optional[Path] = None, **kwargs
     ) -> CalibrationDataset:
diff --git a/winnow/fdr/database_grounded.py b/winnow/fdr/database_grounded.py
index 141405c..90ba323 100644
--- a/winnow/fdr/database_grounded.py
+++ b/winnow/fdr/database_grounded.py
@@ -2,9 +2,9 @@
 import pandas as pd
 import numpy as np
 from instanovo.utils.metrics import Metrics
+from instanovo.utils.residues import ResidueSet
 
 from winnow.fdr.base import FDRControl
-from winnow.constants import residue_set
 
 
 class DatabaseGroundedFDRControl(FDRControl):
@@ -13,16 +13,27 @@ class DatabaseGroundedFDRControl(FDRControl):
     This method estimates FDR thresholds by comparing model-predicted peptides to ground-truth peptides from a database.
     """
 
-    def __init__(self, confidence_feature: str) -> None:
+    def __init__(
+        self,
+        confidence_feature: str,
+        residue_masses: dict[str, float],
+        isotope_error_range: Tuple[int, int] = (0, 1),
+        drop: int = 10,
+    ) -> None:
         super().__init__()
         self.confidence_feature = confidence_feature
+        self.residue_masses = residue_masses
+        self.isotope_error_range = isotope_error_range
+        self.drop = drop
+
+        self.metrics = Metrics(
+            residue_set=ResidueSet(residue_masses=residue_masses),
+            isotope_error_range=isotope_error_range,
+        )
 
     def fit(  # type: ignore
         self,
         dataset: pd.DataFrame,
-        residue_masses: dict[str, float],
-        isotope_error_range: Tuple[int, int] = (0, 1),
-        drop: int = 10,
     ) -> None:
         """Computes the precision-recall curve by comparing model predictions to database-grounded peptide sequences.
 
@@ -32,25 +43,14 @@ def fit(  # type: ignore
                 - 'peptide': Ground-truth peptide sequences.
                 - 'prediction': Model-predicted peptide sequences.
                 - 'confidence': Confidence scores associated with predictions.
-
-            residue_masses (dict[str, float]): A dictionary mapping amino acid residues to their respective masses.
-
-            isotope_error_range (Tuple[int, int], optional): Range of isotope errors to consider when matching peptides. Defaults to (0, 1).
-
-            drop (int): Number of top-scoring predictions to exclude when computing FDR thresholds. Defaults to 10.
         """
         assert len(dataset) > 0, "Fit method requires non-empty data"
 
-        metrics = Metrics(
-            residue_set=residue_set, isotope_error_range=isotope_error_range
-        )
-
-        dataset["sequence"] = dataset["sequence"].apply(metrics._split_peptide)
-        # dataset["prediction"] = dataset["prediction"].apply(metrics._split_peptide)
+        dataset["sequence"] = dataset["sequence"].apply(self.metrics._split_peptide)
 
         dataset["num_matches"] = dataset.apply(
             lambda row: (
-                metrics._novor_match(row["sequence"], row["prediction"])
+                self.metrics._novor_match(row["sequence"], row["prediction"])
                 if isinstance(row["prediction"], list)
                 else 0
             ),
@@ -70,5 +70,5 @@ def fit(  # type: ignore
         precision = np.cumsum(dataset["correct"]) / np.arange(1, len(dataset) + 1)
         confidence = np.array(dataset[self.confidence_feature])
 
-        self._fdr_values = np.array(1 - precision[drop:])
-        self._confidence_scores = confidence[drop:]
+        self._fdr_values = np.array(1 - precision[self.drop :])
+        self._confidence_scores = confidence[self.drop :]
diff --git a/winnow/scripts/main.py b/winnow/scripts/main.py
index 58b6ec5..8726c35 100644
--- a/winnow/scripts/main.py
+++ b/winnow/scripts/main.py
@@ -1,90 +1,18 @@
-# -- Import
-from winnow.calibration.calibration_features import (
-    PrositFeatures,
-    MassErrorFeature,
-    RetentionTimeFeature,
-    ChimericFeatures,
-    BeamFeatures,
-)
-from winnow.calibration.calibrator import ProbabilityCalibrator
-from winnow.datasets.calibration_dataset import CalibrationDataset
-from winnow.datasets.data_loaders import (
-    InstaNovoDatasetLoader,
-    MZTabDatasetLoader,
-    PointNovoDatasetLoader,
-    WinnowDatasetLoader,
-)
-from winnow.fdr.nonparametric import NonParametricFDRControl
-from winnow.fdr.database_grounded import DatabaseGroundedFDRControl
-from winnow.constants import RESIDUE_MASSES
-
-from dataclasses import dataclass
-from enum import Enum
+from typing import Union, Tuple, Optional, List
 import typer
-from typing_extensions import Annotated
-from typing import Union, Optional
 import logging
 from rich.logging import RichHandler
-from pathlib import Path
-import yaml
 import pandas as pd
+from hydra import initialize, compose
+from pathlib import Path
+from hydra.utils import instantiate
 
+from winnow.calibration.calibrator import ProbabilityCalibrator
+from winnow.datasets.calibration_dataset import CalibrationDataset
+from winnow.fdr.nonparametric import NonParametricFDRControl
+from winnow.fdr.database_grounded import DatabaseGroundedFDRControl
 
-# --- Configuration ---
-SEED = 42
-MZ_TOLERANCE = 0.02
-HIDDEN_DIM = 10
-TRAIN_FRACTION = 0.1
-
-
-class DataSource(Enum):
-    """Source of a dataset to be used for calibration."""
-
-    winnow = "winnow"
-    instanovo = "instanovo"
-    pointnovo = "pointnovo"
-    mztab = "mztab"
-
-
-@dataclass
-class WinnowDatasetConfig:
-    """Config for calibration datasets saved through `winnow`."""
-
-    data_dir: Path
-
-
-@dataclass
-class InstaNovoDatasetConfig:
-    """Config for calibration datasets generated by InstaNovo."""
-
-    beam_predictions_path: Path
-    spectrum_path: Path
-
-
-@dataclass
-class MZTabDatasetConfig:
-    """Config for calibration datasets saved in MZTab format."""
-
-    spectrum_path: Path
-    predictions_path: Path
-
-
-@dataclass
-class PointNovoDatasetConfig:
-    """Config for calibration datasets generated by PointNovo."""
-
-    mgf_path: Path
-    predictions_path: Path
-
-
-class FDRMethod(Enum):
-    """FDR estimation method."""
-
-    database = "database-ground"
-    winnow = "winnow"
-
-
-# --- Logging Setup ---
+# Logging setup
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 # Prevent duplicate messages by disabling propagation and using only RichHandler
@@ -92,70 +20,15 @@ class FDRMethod(Enum):
 if not logger.handlers:
     logger.addHandler(RichHandler())
 
+
+# Typer CLI setup
 app = typer.Typer(
     name="winnow",
-    help="""
-    Confidence calibration and FDR estimation for de novo peptide sequencing.
-    """,
+    help="""Confidence calibration and FDR estimation for de novo peptide sequencing.""",
+    rich_markup_mode="rich",
 )
 
 
-def load_dataset(
-    data_source: DataSource, dataset_config_path: Path
-) -> CalibrationDataset:
-    """Load PSM dataset into a `CalibrationDataset` object.
-
-    Args:
-        data_source (DataSource): The source of the dataset
-        dataset_config_path (Path): Path to a `.yaml` file containing arguments
-                                    for the load method for the data source.
-
-    Raises:
-        TypeError: If `data_source` is not one of the supported data sources
-
-    Returns:
-        CalibrationDataset: A calibration dataset
-    """
-    logger.info(f"Loading dataset from {data_source}.")
-    with open(dataset_config_path) as dataset_config_file:
-        if data_source is DataSource.winnow:
-            winnow_dataset_config = WinnowDatasetConfig(
-                **yaml.safe_load(dataset_config_file)
-            )
-            dataset = WinnowDatasetLoader().load(
-                data_path=Path(winnow_dataset_config.data_dir)
-            )
-        elif data_source is DataSource.instanovo:
-            instanovo_dataset_config = InstaNovoDatasetConfig(
-                **yaml.safe_load(dataset_config_file)
-            )
-            dataset = InstaNovoDatasetLoader().load(
-                data_path=Path(instanovo_dataset_config.spectrum_path),
-                predictions_path=Path(instanovo_dataset_config.beam_predictions_path),
-            )
-        elif data_source is DataSource.mztab:
-            mztab_dataset_config = MZTabDatasetConfig(
-                **yaml.safe_load(dataset_config_file)
-            )
-            dataset = MZTabDatasetLoader().load(
-                data_path=Path(mztab_dataset_config.spectrum_path),
-                predictions_path=Path(mztab_dataset_config.predictions_path),
-            )
-        elif data_source is DataSource.pointnovo:
-            pointnovo_dataset_config = PointNovoDatasetConfig(
-                **yaml.safe_load(dataset_config_file)
-            )
-            dataset = PointNovoDatasetLoader().load(
-                data_path=Path(pointnovo_dataset_config.mgf_path),
-                predictions_path=Path(pointnovo_dataset_config.predictions_path),
-            )
-        else:
-            raise TypeError(
-                f"Data source was {data_source}. Only 'instanovo', 'mztab' and 'pointnovo' are supported."
-            )
-    return dataset
-
-
 def filter_dataset(dataset: CalibrationDataset) -> CalibrationDataset:
     """Filter out rows whose predictions are empty or contain unsupported PSMs.
 
@@ -165,7 +38,6 @@ def filter_dataset(dataset: CalibrationDataset) -> CalibrationDataset:
     Returns:
         CalibrationDataset: The filtered dataset
     """
-    logger.info("Filtering dataset.")
     filtered_dataset = (
         dataset.filter_entries(
             # Filter out non-list predictions
@@ -177,47 +49,6 @@ def filter_dataset(dataset: CalibrationDataset) -> CalibrationDataset:
     return filtered_dataset
 
 
-def initialise_calibrator(
-    learn_prosit_missing: bool = True,
-    learn_chimeric_missing: bool = True,
-    learn_retention_missing: bool = True,
-) -> ProbabilityCalibrator:
-    """Set up the probability calibrator with features.
-
-    Args:
-        learn_prosit_missing: Whether to learn from missing Prosit features. If False,
-            errors will be raised when invalid spectra are encountered.
-        learn_chimeric_missing: Whether to learn from missing chimeric features. If False,
-            errors will be raised when invalid spectra are encountered.
-        learn_retention_missing: Whether to learn from missing retention time features. If False,
-            errors will be raised when invalid spectra are encountered.
-
-    Returns:
-        ProbabilityCalibrator: Configured calibrator with specified features.
-    """
-    calibrator = ProbabilityCalibrator(SEED)
-    calibrator.add_feature(MassErrorFeature(residue_masses=RESIDUE_MASSES))
-    calibrator.add_feature(
-        PrositFeatures(
-            mz_tolerance=MZ_TOLERANCE, learn_from_missing=learn_prosit_missing
-        )
-    )
-    calibrator.add_feature(
-        RetentionTimeFeature(
-            hidden_dim=HIDDEN_DIM,
-            train_fraction=TRAIN_FRACTION,
-            learn_from_missing=learn_retention_missing,
-        )
-    )
-    calibrator.add_feature(
-        ChimericFeatures(
-            mz_tolerance=MZ_TOLERANCE, learn_from_missing=learn_chimeric_missing
-        )
-    )
-    calibrator.add_feature(BeamFeatures())
-    return calibrator
-
-
 def apply_fdr_control(
     fdr_control: Union[NonParametricFDRControl, DatabaseGroundedFDRControl],
     dataset: CalibrationDataset,
@@ -229,10 +60,8 @@ def apply_fdr_control(
         fdr_control.fit(dataset=dataset.metadata[confidence_column])
         dataset.metadata = fdr_control.add_psm_pep(dataset.metadata, confidence_column)
     else:
-        fdr_control.fit(
-            dataset=dataset.metadata[confidence_column],
-            residue_masses=RESIDUE_MASSES,
-        )
+        fdr_control.fit(dataset=dataset.metadata[confidence_column])
+
     dataset.metadata = fdr_control.add_psm_fdr(dataset.metadata, confidence_column)
     dataset.metadata = fdr_control.add_psm_q_value(dataset.metadata, confidence_column)
     confidence_cutoff = fdr_control.get_confidence_cutoff(threshold=fdr_threshold)
@@ -249,192 +78,212 @@ def check_if_labelled(dataset: CalibrationDataset) -> None:
         )
 
 
-@app.command(name="train", help="Fit a calibration model.")
-def train(
-    data_source: Annotated[
-        DataSource, typer.Option(help="The type of PSM dataset to be calibrated.")
-    ],
-    dataset_config_path: Annotated[
-        Path,
-        typer.Option(
-            help="The path to the config with the specification of the calibration dataset."
-        ),
-    ],
-    model_output_dir: Annotated[
-        Path,
-        typer.Option(
-            help="The path to the directory where the fitted model checkpoint will be saved."
-        ),
-    ],
-    dataset_output_path: Annotated[
-        Path, typer.Option(help="The path to write the output to.")
-    ],
-    learn_prosit_missing: Annotated[
-        bool,
-        typer.Option(
-            help="Whether to learn from missing Prosit features. If False, training will fail if any spectra have invalid Prosit predictions."
-        ),
-    ] = True,
-    learn_chimeric_missing: Annotated[
-        bool,
-        typer.Option(
-            help="Whether to learn from missing chimeric features. If False, training will fail if any spectra have invalid predictions for chimeric feature computation."
-        ),
-    ] = True,
-    learn_retention_missing: Annotated[
-        bool,
-        typer.Option(
-            help="Whether to learn from missing retention time features. If False, training will fail if any spectra have invalid retention time predictions."
-        ),
-    ] = True,
-):
-    """Fit the calibration model.
+def separate_metadata_and_predictions(
+    dataset_metadata: pd.DataFrame,
+    fdr_control: Union[NonParametricFDRControl, DatabaseGroundedFDRControl],
+    confidence_column: str,
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """Separate out metadata from prediction and FDR metrics.
 
     Args:
-        data_source (Annotated[ DataSource, typer.Option, optional): The type of PSM dataset to be calibrated.
-        dataset_config_path (Annotated[ Path, typer.Option, optional): The path to the config with the specification of the calibration dataset.
-        model_output_dir (Annotated[Path, typer.Option, optional]): The path to the directory where the fitted model checkpoint will be saved.
-        dataset_output_path (Annotated[Path, typer.Option, optional): The path to write the output to.
+        dataset_metadata: The metadata dataframe to separate out prediction and FDR metrics from metadata and computed features.
+        fdr_control: The FDR control object used (to determine which columns were added).
+        confidence_column: The name of the confidence column.
+
+    Returns:
+        Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the metadata dataframe and the prediction and FDR metrics dataframe.
     """
-    # -- Load dataset
-    logger.info("Loading datasets.")
-    annotated_dataset = load_dataset(
-        data_source=data_source,
-        dataset_config_path=dataset_config_path,
-    )
+    # Separate out metadata from prediction and FDR metrics
+    preds_and_fdr_metrics_cols = [
+        "spectrum_id",
+        confidence_column,
+        "prediction",
+        "psm_fdr",
+        "psm_q_value",
+    ]
+    if "sequence" in dataset_metadata.columns:
+        preds_and_fdr_metrics_cols.append("sequence")
+    # NonParametricFDRControl adds psm_pep column
+    if isinstance(fdr_control, NonParametricFDRControl):
+        preds_and_fdr_metrics_cols.append("psm_pep")
+    dataset_preds_and_fdr_metrics = dataset_metadata[preds_and_fdr_metrics_cols]
+    dataset_metadata = dataset_metadata.drop(columns=preds_and_fdr_metrics_cols)
+    return dataset_metadata, dataset_preds_and_fdr_metrics
+
+
+def train_entry_point(overrides: Optional[List[str]] = None) -> None:
+    """The main training pipeline entry point.
+
+    Args:
+        overrides: Optional list of config overrides.
+    """
+    with initialize(
+        config_path="../../config", version_base="1.3", job_name="winnow_train"
+    ):
+        cfg = compose(config_name="train", overrides=overrides)
+
+    logger.info("Starting training pipeline.")
+    logger.info(f"Training configuration: {cfg}")
 
+    # Load dataset - Hydra creates the DatasetLoader object
+    logger.info("Loading dataset.")
+    data_loader = instantiate(cfg.data_loader)
+
+    # Extract dataset loading parameters and convert to dict for flexible kwargs
+    dataset_params = dict(cfg.dataset)
+    # Rename config keys to match the Protocol interface
+    dataset_params["data_path"] = dataset_params.pop("spectrum_path_or_directory")
+    dataset_params["predictions_path"] = dataset_params.pop("predictions_path", None)
+
+    annotated_dataset = data_loader.load(**dataset_params)
+
+    logger.info("Filtering dataset.")
     annotated_dataset = filter_dataset(annotated_dataset)
 
-    # Train
-    logger.info("Training calibrator.")
-    calibrator = initialise_calibrator(
-        learn_prosit_missing=learn_prosit_missing,
-        learn_chimeric_missing=learn_chimeric_missing,
-        learn_retention_missing=learn_retention_missing,
-    )
+    # Instantiate the calibrator from the config
+    logger.info("Instantiating calibrator from config.")
+    calibrator = instantiate(cfg.calibrator)
+
+    # Fit the calibrator to the dataset
+    logger.info("Fitting calibrator to dataset.")
     calibrator.fit(annotated_dataset)
 
-    # -- Write model checkpoints
-    logger.info(f"Saving model to {model_output_dir}")
-    ProbabilityCalibrator.save(calibrator, model_output_dir)
+    # Save the model
+    logger.info(f"Saving model to {cfg.model_output_dir}")
+    ProbabilityCalibrator.save(calibrator, cfg.model_output_dir)
 
-    # -- Write output
-    logger.info("Writing output.")
-    annotated_dataset.to_csv(dataset_output_path)
-    logger.info(f"Training dataset results saved: {dataset_output_path}")
+    # Save the training dataset results
+    logger.info(f"Saving training dataset results to {cfg.dataset_output_path}")
+    annotated_dataset.to_csv(cfg.dataset_output_path)
 
+    logger.info("Training pipeline completed successfully.")
 
-@app.command(
-    name="predict",
-    help="Calibrate scores and optionally filter results to a target FDR.",
-)
-def predict(
-    data_source: Annotated[
-        DataSource, typer.Option(help="The type of PSM dataset to be calibrated.")
-    ],
-    dataset_config_path: Annotated[
-        Path,
-        typer.Option(
-            help="The path to the config with the specification of the calibration dataset."
-        ),
-    ],
-    method: Annotated[
-        FDRMethod, typer.Option(help="Method to use for FDR estimation.")
-    ],
-    fdr_threshold: Annotated[
-        float,
-        typer.Option(
-            help="The target FDR threshold (e.g. 0.01 for 1%, 0.05 for 5% etc.)"
-        ),
-    ],
-    confidence_column: Annotated[
-        str, typer.Option(help="Name of the column with confidence scores.")
-    ],
-    output_folder: Annotated[
-        Path, typer.Option(help="The folder path to write the outputs to.")
-    ],
-    huggingface_model_name: Annotated[
-        str,
-        typer.Option(
-            help="HuggingFace model identifier. If neither this nor `--local-model-folder` are provided, loads default model from HuggingFace.",
-        ),
-    ] = "InstaDeepAI/winnow-general-model",
-    local_model_folder: Annotated[
-        Optional[Path],
-        typer.Option(
-            help="Path to local calibrator directory. If neither this nor `--huggingface-model-name` are provided, loads default pretrained model from HuggingFace.",
-        ),
-    ] = None,
-):
-    """Calibrate model scores, estimate FDR and filter for a threshold.
+
+def predict_entry_point(overrides: Optional[List[str]] = None) -> None:
+    """The main prediction pipeline entry point.
 
     Args:
-        data_source (Annotated[ DataSource, typer.Option, optional): The type of PSM dataset to be calibrated.
-        dataset_config_path (Annotated[ Path, typer.Option, optional): The path to the config with the specification of the dataset.
-        method (Annotated[ FDRMethod, typer.Option, optional): Method to use for FDR estimation.
-        fdr_threshold (Annotated[ float, typer.Option, optional): The target FDR threshold (e.g. 0.01 for 1%, 0.05 for 5% etc.).
-        confidence_column (Annotated[ str, typer.Option, optional): Name of the column with confidence scores.
-        output_folder (Annotated[ Path, typer.Option, optional): The folder path to write the outputs to: `metadata.csv` and `preds_and_fdr_metrics.csv`.
-        huggingface_model_name (Annotated[str, typer.Option, optional): HuggingFace model identifier.
-        local_model_folder (Annotated[Path, typer.Option, optional): Path to local calibrator directory (e.g., Path("./my-model-directory")).
-
-    Note that either `local_model_folder` or `huggingface-model-name` may be overwritten, but not both.
-    If neither `local_model_folder` nor `huggingface-model-name` are provided, the general model from HuggingFace will be loaded by default (i.e., `InstaDeepAI/winnow-general-model`).
+        overrides: Optional list of config overrides.
     """
-    # -- Load dataset
-    logger.info("Loading datasets.")
-    dataset = load_dataset(
-        data_source=data_source,
-        dataset_config_path=dataset_config_path,
-    )
+    with initialize(
+        config_path="../../config", version_base="1.3", job_name="winnow_predict"
+    ):
+        cfg = compose(config_name="predict", overrides=overrides)
+
+    logger.info("Starting prediction pipeline.")
+    logger.info(f"Prediction configuration: {cfg}")
+
+    # Load dataset - Hydra creates the DatasetLoader object
+    logger.info("Loading dataset.")
+    data_loader = instantiate(cfg.data_loader)
 
+    # Extract dataset loading parameters and convert to dict for flexible kwargs
+    dataset_params = dict(cfg.dataset)
+    # Rename config keys to match the Protocol interface
+    dataset_params["data_path"] = dataset_params.pop("spectrum_path_or_directory")
+    dataset_params["predictions_path"] = dataset_params.pop("predictions_path", None)
+
+    dataset = data_loader.load(**dataset_params)
+
+    logger.info("Filtering dataset.")
     dataset = filter_dataset(dataset)
 
-    # Predict
-    # If local_model_folder is an empty string, load the HuggingFace model
-    if local_model_folder is None:
-        logger.info(f"Loading HuggingFace model: {huggingface_model_name}")
-        calibrator = ProbabilityCalibrator.load(huggingface_model_name)
-    # Otherwise, load the model from the local folder path
-    else:
-        logger.info(f"Loading local model from: {local_model_folder}")
-        calibrator = ProbabilityCalibrator.load(local_model_folder)
+    # Load trained calibrator
+    logger.info("Loading trained calibrator.")
+    calibrator = ProbabilityCalibrator.load(
+        pretrained_model_name_or_path=cfg.calibrator.pretrained_model_name_or_path,
+        cache_dir=cfg.calibrator.cache_dir,
+    )
 
+    # Calibrate scores
     logger.info("Calibrating scores.")
     calibrator.predict(dataset)
 
-    if method is FDRMethod.winnow:
-        logger.info("Applying FDR control.")
-        dataset_metadata = apply_fdr_control(
-            NonParametricFDRControl(), dataset, fdr_threshold, confidence_column
-        )
-    elif method is FDRMethod.database:
-        logger.info("Applying FDR control.")
+    # Instantiate FDR control from config - Hydra handles which FDR method to use
+    logger.info("Instantiating FDR control from config.")
+    fdr_control = instantiate(cfg.fdr_method)
+
+    # Check if dataset is labelled for database-grounded FDR
+    if isinstance(fdr_control, DatabaseGroundedFDRControl):
         check_if_labelled(dataset)
-        dataset_metadata = apply_fdr_control(
-            DatabaseGroundedFDRControl(confidence_feature=confidence_column),
-            dataset,
-            fdr_threshold,
-            confidence_column,
-        )
 
-    # -- Write output
-    logger.info("Writing output.")
-    # Separate out metadata from prediction and FDR metrics
-    preds_and_fdr_metrics_cols = [
-        "spectrum_id",
-        confidence_column,
-        "prediction",
-        "psm_fdr",
-        "psm_q_value",
-    ]
-    if "sequence" in dataset_metadata.columns:
-        preds_and_fdr_metrics_cols.append("sequence")
-    if method is FDRMethod.winnow:
-        preds_and_fdr_metrics_cols.append("psm_pep")
-    dataset_preds_and_fdr_metrics = dataset_metadata[preds_and_fdr_metrics_cols]
-    dataset_metadata = dataset_metadata.drop(columns=preds_and_fdr_metrics_cols)
-    # Write outputs
-    dataset_metadata.to_csv(output_folder / "metadata.csv")
-    dataset_preds_and_fdr_metrics.to_csv(output_folder / "preds_and_fdr_metrics.csv")
-    logger.info(f"Outputs saved: {output_folder}")
+    # Apply FDR control
+    logger.info(f"Applying {fdr_control.__class__.__name__} FDR control.")
+    dataset_metadata = apply_fdr_control(
+        fdr_control,
+        dataset,
+        cfg.fdr_control.fdr_threshold,
+        cfg.fdr_control.confidence_column,
+    )
+
+    # Write output
+    logger.info(f"Writing output to {cfg.output_folder}")
+    dataset_metadata, dataset_preds_and_fdr_metrics = separate_metadata_and_predictions(
+        dataset_metadata, fdr_control, cfg.fdr_control.confidence_column
+    )
+    output_folder = Path(cfg.output_folder)
+    output_folder.mkdir(parents=True, exist_ok=True)
+    dataset_metadata.to_csv(output_folder.joinpath("metadata.csv"))
+    dataset_preds_and_fdr_metrics.to_csv(
+        output_folder.joinpath("preds_and_fdr_metrics.csv")
+    )
+
+    logger.info("Prediction pipeline completed successfully.")
+
+
+@app.command(
+    name="train",
+    help=(
+        "Train a probability calibration model on annotated peptide sequencing data.\n\n"
+        "This command loads your dataset, trains calibration features, and saves the trained model.\n\n"
+        "[bold cyan]Quick start:[/bold cyan]\n"
+        "  [dim]winnow train[/dim]  # Uses default config from config/train.yaml\n\n"
+        "[bold cyan]Override parameters:[/bold cyan]\n"
+        "  [dim]winnow train data_loader=mztab[/dim]  # Use MZTab format instead of InstaNovo\n"
+        "  [dim]winnow train model_output_dir=models/my_model[/dim]  # Custom output location\n"
+        "  [dim]winnow train calibrator.seed=42[/dim]  # Set random seed\n\n"
+        "[bold cyan]Configuration files to customise:[/bold cyan]\n"
+        "  • config/train.yaml - Main config (data paths, output locations)\n"
+        "  • config/calibrator.yaml - Model architecture and features\n"
+        "  • config/data_loader/ - Dataset format loaders\n"
+        "  • config/residues.yaml - Amino acid masses and modifications"
+    ),
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
+def train(ctx: typer.Context) -> None:
+    """Passes control directly to the Hydra training pipeline."""
+    # Capture extra arguments as Hydra overrides
+    overrides = ctx.args if ctx.args else None
+    train_entry_point(overrides)
+
+
+@app.command(
+    name="predict",
+    help=(
+        "Calibrate confidence scores and filter peptide predictions by false discovery rate (FDR).\n\n"
+        "This command loads your dataset, applies a trained calibrator to improve confidence scores, "
+        "estimates FDR using your chosen method, and outputs filtered predictions at your target FDR threshold.\n\n"
+        "[bold cyan]Quick start:[/bold cyan]\n"
+        "  [dim]winnow predict[/dim]  # Uses default config from config/predict.yaml\n\n"
+        "[bold cyan]Override parameters:[/bold cyan]\n"
+        "  [dim]winnow predict data_loader=mztab[/dim]  # Use MZTab format instead of InstaNovo\n"
+        "  [dim]winnow predict fdr_method=database_grounded[/dim]  # Use database-grounded FDR\n"
+        "  [dim]winnow predict fdr_threshold=0.01[/dim]  # Target 1% FDR instead of 5%\n"
+        "  [dim]winnow predict output_folder=results/my_run[/dim]  # Custom output location\n\n"
+        "[bold cyan]Configuration files to customise:[/bold cyan]\n"
+        "  • config/predict.yaml - Main config (data paths, FDR settings, output)\n"
+        "  • config/fdr_method/ - FDR methods (nonparametric, database_grounded)\n"
+        "  • config/data_loader/ - Dataset format loaders\n"
+        "  • config/residues.yaml - Amino acid masses and modifications"
+    ),
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
+def predict(ctx: typer.Context) -> None:
+    """Passes control directly to the Hydra predict pipeline."""
+    # Capture extra arguments as Hydra overrides
+    overrides = ctx.args if ctx.args else None
+    predict_entry_point(overrides)
+
+
+if __name__ == "__main__":
+    app()

From 8d3a02a2379e1629ab53cddea3e7ae210289473c Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 11:46:54 +0000
Subject: [PATCH 03/17] test: update tests to use extra init arguments

---
 .../calibration/test_calibration_features.py  | 41 ++++++++--
 tests/fdr/test_database_grounded.py           | 75 +++++++++----------
 2 files changed, 69 insertions(+), 47 deletions(-)

diff --git a/tests/calibration/test_calibration_features.py b/tests/calibration/test_calibration_features.py
index 90aba49..d785d26 100644
--- a/tests/calibration/test_calibration_features.py
+++ b/tests/calibration/test_calibration_features.py
@@ -16,7 +16,6 @@
     _raise_value_error,
 )
 from winnow.datasets.calibration_dataset import CalibrationDataset
-from winnow.constants import RESIDUE_MASSES
 
 
 class TestUtilityFunctions:
@@ -105,7 +104,23 @@ class TestMassErrorFeature:
     @pytest.fixture()
     def mass_error_feature(self):
         """Create a MassErrorFeature instance for testing."""
-        return MassErrorFeature(residue_masses=RESIDUE_MASSES)
+        residue_masses = {
+            "G": 57.021464,
+            "A": 71.037114,
+            "P": 97.052764,
+            "E": 129.042593,
+            "T": 101.047670,
+            "I": 113.084064,
+            "D": 115.026943,
+            "R": 156.101111,
+            "O": 237.147727,
+            "N": 114.042927,
+            "S": 87.032028,
+            "M": 131.040485,
+            "L": 113.084064,
+            "V": 99.068414,
+        }
+        return MassErrorFeature(residue_masses=residue_masses)
 
     @pytest.fixture()
     def sample_dataset(self):
@@ -469,7 +484,9 @@ class TestRetentionTimeFeature:
     @pytest.fixture()
     def retention_time_feature(self):
         """Create a RetentionTimeFeature instance for testing."""
-        return RetentionTimeFeature(hidden_dim=10, train_fraction=0.8)
+        return RetentionTimeFeature(
+            hidden_dim=10, train_fraction=0.8, invalid_prosit_tokens=["U", "O", "X"]
+        )
 
     @pytest.fixture()
     def sample_dataset_with_rt(self):
@@ -494,7 +511,9 @@ def test_properties(self, retention_time_feature):
 
     def test_initialization_parameters(self):
         """Test initialization with custom parameters."""
-        feature = RetentionTimeFeature(hidden_dim=10, train_fraction=0.8)
+        feature = RetentionTimeFeature(
+            hidden_dim=10, train_fraction=0.8, invalid_prosit_tokens=["U", "O", "X"]
+        )
         assert feature.hidden_dim == 10
         assert feature.train_fraction == 0.8
         assert feature.prosit_irt_model_name == "Prosit_2019_irt"
@@ -779,7 +798,7 @@ class TestPrositFeatures:
     @pytest.fixture()
     def prosit_features(self):
         """Create a PrositFeatures instance for testing."""
-        return PrositFeatures(mz_tolerance=0.02)
+        return PrositFeatures(mz_tolerance=0.02, invalid_prosit_tokens=["U", "O", "X"])
 
     @pytest.fixture()
     def sample_dataset_with_spectra(self):
@@ -818,7 +837,9 @@ def test_properties(self, prosit_features):
 
     def test_initialization_with_tolerance(self):
         """Test initialization with custom tolerance."""
-        feature = PrositFeatures(mz_tolerance=0.01)
+        feature = PrositFeatures(
+            mz_tolerance=0.01, invalid_prosit_tokens=["U", "O", "X"]
+        )
         assert feature.mz_tolerance == 0.01
         assert feature.prosit_intensity_model_name == "Prosit_2020_intensity_HCD"
 
@@ -1152,7 +1173,9 @@ class TestChimericFeatures:
     @pytest.fixture()
     def chimeric_features(self):
         """Create a ChimericFeatures instance for testing."""
-        return ChimericFeatures(mz_tolerance=0.02)
+        return ChimericFeatures(
+            mz_tolerance=0.02, invalid_prosit_tokens=["U", "O", "X"]
+        )
 
     @pytest.fixture()
     def sample_dataset_with_beam_predictions(self):
@@ -1205,7 +1228,9 @@ def test_properties(self, chimeric_features):
 
     def test_initialization_with_tolerance(self):
         """Test initialization with custom tolerance."""
-        feature = ChimericFeatures(mz_tolerance=0.01)
+        feature = ChimericFeatures(
+            mz_tolerance=0.01, invalid_prosit_tokens=["U", "O", "X"]
+        )
         assert feature.mz_tolerance == 0.01
 
     def test_prepare_does_nothing(
diff --git a/tests/fdr/test_database_grounded.py b/tests/fdr/test_database_grounded.py
index b5d9a2b..8e5f051 100644
--- a/tests/fdr/test_database_grounded.py
+++ b/tests/fdr/test_database_grounded.py
@@ -1,7 +1,6 @@
 """Unit tests for winnow DatabaseGroundedFDRControl."""
 
 import pytest
-from unittest.mock import patch, Mock
 import pandas as pd
 from winnow.fdr.database_grounded import DatabaseGroundedFDRControl
 
@@ -12,7 +11,24 @@ class TestDatabaseGroundedFDRControl:
     @pytest.fixture()
     def db_fdr_control(self):
         """Create a DatabaseGroundedFDRControl instance for testing."""
-        return DatabaseGroundedFDRControl(confidence_feature="confidence")
+        residue_masses = {
+            "G": 57.021464,
+            "A": 71.037114,
+            "P": 97.052764,
+            "E": 129.042593,
+            "T": 101.047670,
+            "I": 113.084064,
+            "D": 115.026943,
+            "R": 156.101111,
+            "O": 237.147727,
+            "N": 114.042927,
+            "S": 87.032028,
+            "M": 131.040485,
+            "L": 113.084064,
+        }
+        return DatabaseGroundedFDRControl(
+            confidence_feature="confidence", residue_masses=residue_masses
+        )
 
     @pytest.fixture()
     def sample_dataset_df(self):
@@ -32,59 +48,40 @@ def test_initialization(self, db_fdr_control):
         assert db_fdr_control._fdr_values is None
         assert db_fdr_control._confidence_scores is None
 
-    @patch("winnow.fdr.database_grounded.Metrics")
-    def test_fit_basic(self, mock_metrics, db_fdr_control, sample_dataset_df):
+    def test_fit_basic(self, db_fdr_control, sample_dataset_df):
         """Test basic fitting functionality."""
-        # Mock the Metrics class and its methods
-        mock_metrics_instance = Mock()
-        mock_metrics.return_value = mock_metrics_instance
-        mock_metrics_instance._split_peptide = lambda x: list(x)
-
-        residue_masses = {
-            "P": 100.0,
-            "E": 110.0,
-            "T": 120.0,
-            "I": 130.0,
-            "D": 140.0,
-            "R": 150.0,
-            "O": 160.0,
-            "N": 170.0,
-            "S": 180.0,
-            "A": 190.0,
-            "M": 200.0,
-            "L": 210.0,
-        }
+        # Convert sequences to list format as expected by the implementation
+        sample_dataset_df = sample_dataset_df.copy()
+        sample_dataset_df["prediction"] = sample_dataset_df["prediction"].apply(list)
 
         # Should not raise an exception
-        db_fdr_control.fit(sample_dataset_df, residue_masses)
+        db_fdr_control.fit(sample_dataset_df)
 
-        # Check that metrics was called
-        mock_metrics.assert_called_once()
+        # Check that fit created the required attributes
+        assert hasattr(db_fdr_control, "preds")
+        assert hasattr(db_fdr_control, "_fdr_values")
+        assert hasattr(db_fdr_control, "_confidence_scores")
+        assert db_fdr_control._fdr_values is not None
+        assert db_fdr_control._confidence_scores is not None
 
     def test_fit_with_parameters(self, db_fdr_control):
         """Test fit with custom parameters."""
         sample_df = pd.DataFrame(
-            {"sequence": ["TEST"], "prediction": ["TEST"], "confidence": [0.9]}
+            {"sequence": ["TEST"], "prediction": [list("TEST")], "confidence": [0.9]}
         )
-        residue_masses = {"T": 100.0, "E": 110.0, "S": 120.0}
-
-        with patch("winnow.fdr.database_grounded.Metrics") as mock_metrics:
-            mock_metrics_instance = Mock()
-            mock_metrics.return_value = mock_metrics_instance
-            mock_metrics_instance._split_peptide = lambda x: list(x)
 
-            db_fdr_control.fit(
-                sample_df, residue_masses, isotope_error_range=(0, 2), drop=5
-            )
+        db_fdr_control.fit(sample_df)
 
-            # Check that Metrics was initialized with correct parameters
-            mock_metrics.assert_called_once()
+        # Check that fit created the required attributes
+        assert hasattr(db_fdr_control, "preds")
+        assert len(db_fdr_control.preds) == 1
+        assert db_fdr_control.preds.iloc[0]["confidence"] == 0.9
 
     def test_fit_with_empty_data(self, db_fdr_control):
         """Test that fit method handles empty data."""
         empty_data = pd.DataFrame()
         with pytest.raises(AssertionError, match="Fit method requires non-empty data"):
-            db_fdr_control.fit(empty_data, residue_masses={"A": 71.03})
+            db_fdr_control.fit(empty_data)
 
     def test_get_confidence_cutoff_requires_fitting(self, db_fdr_control):
         """Test that get_confidence_cutoff requires fitting first."""

From 5e730c7260dfed8dbf1c5206a1e45209d3f70e1f Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 12:40:12 +0000
Subject: [PATCH 04/17] feat: add winnow config command to view resolved
 configuration

---
 winnow/scripts/config_formatter.py | 160 +++++++++++++++++++++++++++++
 winnow/scripts/main.py             |  75 +++++++++++++-
 2 files changed, 233 insertions(+), 2 deletions(-)
 create mode 100644 winnow/scripts/config_formatter.py

diff --git a/winnow/scripts/config_formatter.py b/winnow/scripts/config_formatter.py
new file mode 100644
index 0000000..1bb88e4
--- /dev/null
+++ b/winnow/scripts/config_formatter.py
@@ -0,0 +1,160 @@
+"""Configuration output formatter with hierarchical colour-coding."""
+
+from rich.console import Console
+from rich.text import Text
+from omegaconf import DictConfig, OmegaConf
+
+
+class ConfigFormatter:
+    """Format Hydra configuration with hierarchical colour-coding based on nesting depth.
+
+    Keys are coloured according to their indentation level to help visualise
+    the configuration structure.
+    """
+
+    # Colour palette for different indentation levels (similar to Typer's style)
+    INDENT_COLOURS = [
+        "bright_cyan",  # Level 0 (root keys)
+        "bright_green",  # Level 1
+        "bright_yellow",  # Level 2
+        "bright_magenta",  # Level 3
+        "bright_blue",  # Level 4
+        "cyan",  # Level 5
+        "green",  # Level 6
+        "yellow",  # Level 7+
+    ]
+
+    def __init__(self):
+        """Initialise the formatter."""
+        self.console = Console()
+
+    def print_config(self, cfg: DictConfig) -> None:
+        """Print configuration with hierarchical colour-coding.
+
+        Args:
+            cfg: OmegaConf configuration object to format and print
+        """
+        yaml_str = OmegaConf.to_yaml(cfg)
+        output = Text()
+
+        for line in yaml_str.split("\n"):
+            formatted_line = self._format_line(line)
+            output.append(formatted_line)
+
+        self.console.print(output, end="")
+
+    def _format_line(self, line: str) -> Text:
+        """Format a single line of YAML with appropriate colouring.
+
+        Args:
+            line: A single line from the YAML output
+
+        Returns:
+            Rich Text object with formatted content
+        """
+        output = Text()
+
+        # Handle empty lines
+        if not line.strip():
+            output.append("\n")
+            return output
+
+        indent_level = self._get_indent_level(line)
+        colour = self._get_colour_for_level(indent_level)
+
+        # Handle list items specially (they contain '- ' prefix)
+        if self._is_list_item(line):
+            output.append(line)
+            output.append("\n")
+            return output
+
+        # Handle key-value pairs
+        separator_idx = self._find_key_value_separator(line)
+        if separator_idx != -1:
+            self._append_key_value_pair(output, line, separator_idx, colour)
+        else:
+            # Lines without key-value separator
+            output.append(line)
+            output.append("\n")
+
+        return output
+
+    def _get_indent_level(self, line: str) -> int:
+        """Calculate the indentation level of a line.
+
+        Args:
+            line: Line to analyse
+
+        Returns:
+            Indentation level (0 for root, 1 for first nested level, etc.)
+        """
+        return (len(line) - len(line.lstrip())) // 2
+
+    def _get_colour_for_level(self, indent_level: int) -> str:
+        """Get the colour for a given indentation level.
+
+        Args:
+            indent_level: The indentation level
+
+        Returns:
+            Colour name for Rich
+        """
+        return self.INDENT_COLOURS[min(indent_level, len(self.INDENT_COLOURS) - 1)]
+
+    def _is_list_item(self, line: str) -> bool:
+        """Check if a line is a YAML list item.
+
+        Args:
+            line: Line to check
+
+        Returns:
+            True if line is a list item (starts with '- ')
+        """
+        return line.lstrip().startswith("- ")
+
+    def _find_key_value_separator(self, line: str) -> int:
+        """Find the position of the YAML key-value separator.
+
+        This finds colons that are followed by a space or end of line,
+        avoiding colons inside keys like M[UNIMOD:35].
+
+        Args:
+            line: Line to search
+
+        Returns:
+            Index of the separator colon, or -1 if not found
+        """
+        for i, char in enumerate(line):
+            if char == ":":
+                # Check if this is followed by space, end of line, or is the last char
+                if i + 1 >= len(line) or line[i + 1] == " ":
+                    return i
+        return -1
+
+    def _append_key_value_pair(
+        self, output: Text, line: str, separator_idx: int, colour: str
+    ) -> None:
+        """Append a formatted key-value pair to the output.
+
+        Args:
+            output: Text object to append to
+            line: Original line
+            separator_idx: Index of the separator colon
+            colour: Colour to use for the key
+        """
+        key_part = line[:separator_idx]
+        value_part = line[separator_idx + 1 :]
+        indent = " " * (len(line) - len(line.lstrip()))
+
+        # Add indentation
+        output.append(indent)
+
+        # Add coloured key
+        output.append(key_part.lstrip(), style=f"bold {colour}")
+        output.append(":")
+
+        # Add value without formatting (plain text)
+        if value_part:
+            output.append(value_part)
+
+        output.append("\n")
diff --git a/winnow/scripts/main.py b/winnow/scripts/main.py
index 8726c35..ad3c7a7 100644
--- a/winnow/scripts/main.py
+++ b/winnow/scripts/main.py
@@ -11,6 +11,7 @@
 from winnow.datasets.calibration_dataset import CalibrationDataset
 from winnow.fdr.nonparametric import NonParametricFDRControl
 from winnow.fdr.database_grounded import DatabaseGroundedFDRControl
+from winnow.scripts.config_formatter import ConfigFormatter
 
 # Logging setup
 logger = logging.getLogger(__name__)
@@ -28,6 +29,24 @@
     rich_markup_mode="rich",
 )
 
+# Config command group
+config_app = typer.Typer(
+    name="config",
+    help="Configuration utilities for inspecting resolved settings.",
+    rich_markup_mode="rich",
+)
+app.add_typer(config_app)
+
+
+def print_config(cfg) -> None:
+    """Print configuration with hierarchical colour-coding based on nesting depth.
+
+    Args:
+        cfg: OmegaConf configuration object to print
+    """
+    formatter = ConfigFormatter()
+    formatter.print_config(cfg)
+
 
 def filter_dataset(dataset: CalibrationDataset) -> CalibrationDataset:
     """Filter out rows whose predictions are empty or contain unsupported PSMs.
@@ -111,17 +130,24 @@ def separate_metadata_and_predictions(
     return dataset_metadata, dataset_preds_and_fdr_metrics
 
 
-def train_entry_point(overrides: Optional[List[str]] = None) -> None:
+def train_entry_point(
+    overrides: Optional[List[str]] = None, execute: bool = True
+) -> None:
     """The main training pipeline entry point.
 
     Args:
         overrides: Optional list of config overrides.
+        execute: If False, only print the configuration and return without executing the pipeline.
     """
     with initialize(
         config_path="../../config", version_base="1.3", job_name="winnow_train"
     ):
         cfg = compose(config_name="train", overrides=overrides)
 
+    if not execute:
+        print_config(cfg)
+        return
+
     logger.info("Starting training pipeline.")
     logger.info(f"Training configuration: {cfg}")
 
@@ -159,17 +185,24 @@ def train_entry_point(overrides: Optional[List[str]] = None) -> None:
     logger.info("Training pipeline completed successfully.")
 
 
-def predict_entry_point(overrides: Optional[List[str]] = None) -> None:
+def predict_entry_point(
+    overrides: Optional[List[str]] = None, execute: bool = True
+) -> None:
     """The main prediction pipeline entry point.
 
     Args:
         overrides: Optional list of config overrides.
+        execute: If False, only print the configuration and return without executing the pipeline.
     """
     with initialize(
         config_path="../../config", version_base="1.3", job_name="winnow_predict"
     ):
         cfg = compose(config_name="predict", overrides=overrides)
 
+    if not execute:
+        print_config(cfg)
+        return
+
     logger.info("Starting prediction pipeline.")
     logger.info(f"Prediction configuration: {cfg}")
 
@@ -285,5 +318,43 @@ def predict(ctx: typer.Context) -> None:
     predict_entry_point(overrides)
 
 
+@config_app.command(
+    name="train",
+    help=(
+        "Display the resolved training configuration without running the pipeline.\n\n"
+        "This is useful for inspecting the final configuration after all defaults "
+        "and overrides have been applied.\n\n"
+        "[bold cyan]Usage:[/bold cyan]\n"
+        "  [dim]winnow config train[/dim]  # Show default config\n"
+        "  [dim]winnow config train data_loader=mztab[/dim]  # Show config with overrides\n"
+        "  [dim]winnow config train calibrator.seed=42[/dim]  # Check override application"
+    ),
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
+def config_train(ctx: typer.Context) -> None:
+    """Display the resolved training configuration."""
+    overrides = ctx.args if ctx.args else None
+    train_entry_point(overrides, execute=False)
+
+
+@config_app.command(
+    name="predict",
+    help=(
+        "Display the resolved prediction configuration without running the pipeline.\n\n"
+        "This is useful for inspecting the final configuration after all defaults "
+        "and overrides have been applied.\n\n"
+        "[bold cyan]Usage:[/bold cyan]\n"
+        "  [dim]winnow config predict[/dim]  # Show default config\n"
+        "  [dim]winnow config predict fdr_method=database_grounded[/dim]  # Show config with overrides\n"
+        "  [dim]winnow config predict fdr_control.fdr_threshold=0.01[/dim]  # Check override application"
+    ),
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
+def config_predict(ctx: typer.Context) -> None:
+    """Display the resolved prediction configuration."""
+    overrides = ctx.args if ctx.args else None
+    predict_entry_point(overrides, execute=False)
+
+
 if __name__ == "__main__":
     app()

From 20ee8b3118553689cc6a790dd1f2c61636444d68 Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 17:31:40 +0000
Subject: [PATCH 05/17] docs: document hydra config usage with winnow cli

---
 README.md             |  39 ++--
 docs/cli.md           | 388 +++++++++++++++++--------------
 docs/configuration.md | 527 ++++++++++++++++++++++++++++++++++++++++++
 mkdocs.yml            |   8 +-
 4 files changed, 766 insertions(+), 196 deletions(-)
 create mode 100644 docs/configuration.md

diff --git a/README.md b/README.md
index 4956c50..dcf9b3b 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,9 @@
     <a href="https://instadeepai.github.io/winnow/"><strong>Explore the docs »</strong></a>
     <br />
     <br />
-    <a href="https://github.com/instadeepai/winnow/issues/new?labels=bug&template=bug_report.md">Report Bug</a>
+    <a href="https://github.com/instadeepai/winnow/issues/new?labels=bug&template=bug_report.md">Report bug</a>
     &middot;
-    <a href="https://github.com/instadeepai/winnow/issues/new?labels=enhancement&template=feature_request.md">Request Feature</a>
+    <a href="https://github.com/instadeepai/winnow/issues/new?labels=enhancement&template=feature_request.md">Request feature</a>
   </p>
 </div>
 
@@ -48,16 +48,13 @@
   <summary>Table of Contents</summary>
   <ol>
     <li>
-      <a href="#about-the-project">About The Project</a>
+      <a href="#about-the-project">About the project</a>
     </li>
     <li>
       <a href="#installation">Installation</a>
     </li>
-    <li><a href="#usage">Usage</a>
-      <ul>
-        <li><a href="#CLI">CLI</a></li>
-        <li><a href="#Package">Package</a></li>
-      </ul>
+    <li>
+      <a href="#usage">Usage</a>
     </li>
     <li><a href="#contributing">Contributing</a></li>
   </ol>
@@ -70,7 +67,7 @@
 </div>
 
 <!-- ABOUT THE PROJECT -->
-## About The Project
+## About the project
 
 <!-- [![Product Name Screen Shot][product-screenshot]](https://example.com) -->
 In bottom-up proteomics workflows, peptide sequencing—matching an MS2 spectrum to a peptide—is just the first step. The resulting peptide-spectrum matches (PSMs) often contain many incorrect identifications, which can negatively impact downstream tasks like protein assembly.
@@ -80,7 +77,7 @@ To mitigate this, intermediate steps are introduced to:
 1. Assign confidence scores to PSMs that better correlate with correctness.
 2. Estimate and control the false discovery rate (FDR) by filtering identifications based on confidence scores.
 
-For database search-based peptide sequencing, PSM rescoring and target-decoy competition (TDC) are standard approaches, supported by an extensive ecosystem of tools. However, *de novo* peptide sequencing lacks standardized methods for these tasks.
+For database search-based peptide sequencing, PSM rescoring and target-decoy competition (TDC) are standard approaches, supported by an extensive ecosystem of tools. However, *de novo* peptide sequencing lacks standardised methods for these tasks.
 
 `winnow` aims to fill this gap by implementing the calibrate-estimate framework for FDR estimation. Unlike TDC, this approach is directly applicable to *de novo* sequencing models. Additionally, its calibration step naturally incorporates common confidence rescoring workflows as part of FDR estimation.
 
@@ -121,11 +118,24 @@ Installing `winnow` provides the `winnow` command with two sub-commands:
 
 By default, `winnow predict` uses a pretrained general model (`InstaDeepAI/winnow-general-model`) hosted on HuggingFace Hub, allowing you to get started immediately without training. You can also specify custom HuggingFace models or use locally trained models.
 
-Refer to the documentation for details on command-line arguments and usage examples.
+Winnow uses [Hydra](https://hydra.cc/) for flexible, hierarchical configuration management. All parameters can be configured via YAML files or overridden on the command line:
+
+```bash
+# Quick start with defaults
+winnow predict
+
+# Override specific parameters
+winnow predict fdr_control.fdr_threshold=0.01
+
+# Specify different data source and dataset paths
+winnow predict data_loader=mztab dataset.spectrum_path_or_directory=data/spectra.parquet dataset.predictions_path=data/preds.mztab
+```
+
+Refer to the [CLI Guide](cli.md) and [Configuration Guide](configuration.md) for details on usage and configuration options.
 
 ### Package
 
-The `winnow` package is organized into three sub-modules:
+The `winnow` package is organised into three sub-modules:
 
 1. `winnow.datasets` – Handles data loading and saving, including the `CalibrationDataset` class for mapping peptide sequencing output formats.
 2. `winnow.calibration` – Implements confidence calibration. Key components include:
@@ -145,10 +155,11 @@ For an example, check out the [example notebook](https://github.com/instadeepai/
 Contributions are what make the open-source community such an amazing place to learn, inspire and create, and we welcome your support! Any contributions you make are **greatly appreciated**.
 
 If you have ideas for enhancements, you can:
+
 - Fork the repository and submit a pull request.
 - Open an issue and tag it with "enhancement".
 
-### Contribution Process
+### Contribution process
 
 1. Fork the repository.
 2. Create a feature branch (`git checkout -b feature/AmazingFeature`).
@@ -159,7 +170,7 @@ Don't forget to give the project a star! Thanks again! :star:
 
 <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
-### BibTeX entry and citation info
+### BibTeX entry and citation information
 
 If you use `winnow` in your research, please cite the following preprint:
 
diff --git a/docs/cli.md b/docs/cli.md
index 131c3cb..2c119e3 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -1,6 +1,8 @@
-# Command Line Interface
+# Command line interface
 
-The winnow CLI provides a simple interface for confidence calibration and FDR control workflows. It supports both training calibration models and applying them for prediction and FDR filtering.
+This guide provides practical examples and workflows for using the `winnow` command-line interface.
+
+**Looking for configuration details?** See the **[Configuration guide](configuration.md)** for comprehensive documentation of the configuration system, YAML structure and advanced patterns.
 
 ## Installation
 
@@ -14,146 +16,118 @@ uv pip install winnow-fdr
 
 ## Commands
 
-### `winnow train`
+### `winnow config`
 
-Train a confidence calibration model on labelled data.
+Display the resolved configuration for any command without executing it. This is useful for inspecting how defaults and overrides are composed.
 
 ```bash
-winnow train \
-    --data-source instanovo \
-    --dataset-config-path train_config.yaml \
-    --model-output-folder ./calibrator_model \
-    --dataset-output-path ./training_results.csv
-```
+# Show training configuration
+winnow config train
+
+# Show prediction configuration
+winnow config predict
 
-**Arguments:**
+# Check configuration with overrides
+winnow config train data_loader=mztab model_output_dir=models/my_model
+winnow config predict fdr_method=database_grounded fdr_control.fdr_threshold=0.01
+```
 
-- `--data-source`: Type of dataset (`instanovo`, `mztab`, `winnow`)
-- `--dataset-config-path`: Path to YAML configuration file
-- `--model-output-folder`: Directory to save trained calibrator
-- `--dataset-output-path`: Path to save training results CSV
-- `--learn-prosit-missing` / `--no-learn-prosit-missing`: Whether to learn from missing Prosit features (default: True)
-- `--learn-chimeric-missing` / `--no-learn-chimeric-missing`: Whether to learn from missing chimeric features (default: True)
-- `--learn-retention-missing` / `--no-learn-retention-missing`: Whether to learn from missing retention time features (default: True)
+This command prints the final YAML configuration with colour-coded hierarchical formatting, making it easy to read and verify your settings. Keys are coloured by nesting depth to help visualise the configuration structure. The output shows all defaults, composition and overrides after they have been applied.
 
-**Missingness Handling:**
+**Note:** Some keys appear with quotes (e.g. `'N'`, `'Y'`) because they are reserved words in YAML that would otherwise be interpreted as boolean values. The quotes ensure they are treated as strings.
 
-By default, the calibrator learns from missing data by including missingness indicators as features.
+### `winnow train`
 
-If you wish to train only on complete data (training will fail if invalid spectra are found):
+Train a confidence calibration model on labelled data.
 
 ```bash
-winnow train \
-    --data-source instanovo \
-    --dataset-config-path train_config.yaml \
-    --model-output-folder ./calibrator_model \
-    --dataset-output-path ./training_results.csv \
-    --no-learn-prosit-missing \
-    --no-learn-chimeric-missing \
-    --no-learn-retention-missing
+# Use defaults (configured in config/train.yaml)
+winnow train
+
+# Override specific parameters
+winnow train data_loader=mztab model_output_dir=models/my_model
+
+# Specify dataset paths
+winnow train dataset.spectrum_path_or_directory=data/spectra.parquet dataset.predictions_path=data/preds.csv
 ```
 
-See the [Handling Missing Features](api/calibration.md#handling-missing-features) section for more details.
+**Common Parameters:**
 
-### `winnow predict`
+- `data_loader`: Type of dataset loader (`instanovo`, `mztab`, `pointnovo`, `winnow`)
+- `dataset.spectrum_path_or_directory`: Path to spectrum/metadata file (or directory for winnow format)
+- `dataset.predictions_path`: Path to predictions file (set to `null` for winnow format)
+- `model_output_dir`: Directory to save trained calibrator
+- `dataset_output_path`: Path to save training results CSV
 
-Apply calibration and FDR control to new data using a trained model. By default, uses a pretrained general model from HuggingFace Hub.
+**Advanced calibrator configuration:**
 
-```bash
-# Use default pretrained model from HuggingFace (recommended for getting started)
-winnow predict \
-    --data-source instanovo \
-    --dataset-config-path test_config.yaml \
-    --method winnow \
-    --fdr-threshold 0.01 \
-    --confidence-column confidence \
-    --output-folder ./predictions
+You can customise the calibrator architecture and features using nested parameters:
 
-# Use a custom HuggingFace model
-winnow predict \
-    --data-source instanovo \
-    --dataset-config-path test_config.yaml \
-    --huggingface-model-name my-org/my-custom-model \
-    --method winnow \
-    --fdr-threshold 0.01 \
-    --confidence-column confidence \
-    --output-folder ./predictions
+```bash
+# Change MLP architecture
+winnow train calibrator.hidden_layer_sizes=[100,50,25]
 
-# Use a local model
-winnow predict \
-    --data-source instanovo \
-    --dataset-config-path test_config.yaml \
-    --local-model-folder ./calibrator_model \
-    --method winnow \
-    --fdr-threshold 0.01 \
-    --confidence-column confidence \
-    --output-folder ./predictions
+# Configure individual features
+winnow train calibrator.features.prosit_features.mz_tolerance=0.01
 ```
 
-**Arguments:**
+For comprehensive calibrator configuration options, see:
+- [Configuration guide](configuration.md) - Complete parameter reference
+- [Calibration API](api/calibration.md#handling-missing-features) - Feature implementation details
 
-- `--data-source`: Type of dataset (`instanovo`, `winnow`, `mztab`)
-- `--dataset-config-path`: Path to YAML configuration file
-- `--huggingface-model-name`: HuggingFace model identifier (defaults to `InstaDeepAI/winnow-general-model`). Use this to load models from HuggingFace Hub.
-- `--local-model-folder`: Directory containing trained calibrator. Use this to load local models instead of HuggingFace models.
-- `--method`: FDR estimation method (`winnow` or `database-ground`)
-- `--fdr-threshold`: Target FDR threshold (e.g., 0.01 for 1%)
-- `--confidence-column`: Name of confidence score column
-- `--output-folder`: Folder path to write output files to (creates `metadata.csv` and `preds_and_fdr_metrics.csv`)
+### `winnow predict`
 
-**Note:** If neither `--local-model-folder` nor `--huggingface-model-name` are provided, the default pretrained general model from HuggingFace (`InstaDeepAI/winnow-general-model`) will be loaded automatically.
+Apply calibration and FDR control to new data using a trained model. By default, uses a pretrained general model from HuggingFace Hub.
 
-## Configuration Files
+```bash
+# Use defaults (pretrained model from HuggingFace)
+winnow predict
 
-The CLI uses YAML configuration files to specify dataset locations and parameters. The format depends on the data source.
+# Override specific parameters
+winnow predict data_loader=mztab fdr_control.fdr_threshold=0.01 fdr_method=database_grounded
 
-### InstaNovo Configuration
+# Specify dataset paths
+winnow predict dataset.spectrum_path_or_directory=data/spectra.parquet dataset.predictions_path=data/preds.csv
 
-For InstaNovo/CSV datasets:
+# Use a custom HuggingFace model
+winnow predict calibrator.pretrained_model_name_or_path=my-org/my-custom-model
 
-```yaml
-# instanovo_config.yaml
-beam_predictions_path: "/path/to/predictions.csv"
-spectrum_path: "/path/to/spectra.csv"
+# Use a local model
+winnow predict calibrator.pretrained_model_name_or_path=models/my_model
 ```
 
-**Required files:**
-
-- **Spectrum file**: Parquet/IPC file containing spectral metadata and features
-- **Predictions CSV**: Contains beam search results with columns like `preds`, `preds_beam_1`, confidence scores
-
-### MZTab Configuration
+**Common Parameters:**
 
-For MZTab format datasets (traditional search engines and Casanovo outputs):
+- `data_loader`: Type of dataset loader (`instanovo`, `mztab`, `pointnovo`, `winnow`)
+- `dataset.spectrum_path_or_directory`: Path to spectrum/metadata file (or directory for winnow format)
+- `dataset.predictions_path`: Path to predictions file
+- `fdr_method`: FDR estimation method (`nonparametric` or `database_grounded`)
+- `fdr_control.fdr_threshold`: Target FDR threshold (e.g. 0.01 for 1%)
+- `output_folder`: Folder path to write output files
 
-```yaml
-# mztab_config.yaml
-spectrum_path: "/path/to/spectra.parquet"
-predictions_path: "/path/to/predictions.mztab"
-```
+By default, `winnow predict` uses the pretrained model `InstaDeepAI/winnow-general-model` from HuggingFace Hub. To use a different model, override the calibrator settings (see [Configuration guide](configuration.md#using-a-custom-model) for details).
 
-**Required files:**
+## Configuration system
 
-- **Spectrum file**: Parquet/IPC file with spectrum metadata and row indices matching MZTab spectra_ref
-- **MZTab file**: Standard MZTab format containing predictions
+Winnow uses [Hydra](https://hydra.cc/) for configuration management. All parameters can be configured via:
 
-### Winnow Internal Configuration
+- **YAML config files** in the `config/` directory (defines defaults)
+- **Command-line overrides** using `key=value` syntax
+- **Nested parameters** using dot notation (e.g., `calibrator.seed=42`)
 
-For winnow's internal format:
 
-```yaml
-# winnow_config.yaml
-data_dir: "/path/to/winnow_dataset_directory"
-```
+For comprehensive configuration documentation, including:
 
-**Required structure:**
+- Full configuration file structure and composition
+- Config interpolation and variable references
+- Creating custom configurations
+- Advanced patterns and debugging
 
-- Directory containing `metadata.csv` and optionally `predictions.pkl`
-- Created by `CalibrationDataset.save()`
+See the **[Configuration guide](configuration.md)**.
 
-## Data Requirements
+## Data requirements
 
-### Training Data
+### Training data
 
 For training (`winnow train`), you need:
 
@@ -162,34 +136,41 @@ For training (`winnow train`), you need:
 - **Spectral data**: MS/MS spectra and metadata
 - **Unique identifiers**: Each PSM must have a unique `spectrum_id` in both input files
 
-### Prediction Data
+### Prediction data
 
 For prediction (`winnow predict`), you need:
 
-- **Unlabelled dataset**: Predictions and spectra (no ground truth required)
-- **Trained model**: Output from `winnow train`
+- **Unlabelled dataset**: Predictions and spectra (no ground truth required for non-parametric FDR)
+- **Trained model**: Pretrained model from HuggingFace or output from `winnow train`
 - **Confidence scores**: Raw confidence values to calibrate
 - **Unique identifiers**: Each PSM must have a unique `spectrum_id` in both input files
 
-## FDR Methods
+### Data formats
+
+Winnow supports multiple input formats:
+
+- **InstaNovo**: Parquet spectra + CSV predictions (beam search format)
+- **MZTab**: MGF/Parquet spectra + MZTab predictions
+- **PointNovo**: Similar to InstaNovo format
+- **Winnow**: Internal format (directory with metadata.csv and predictions.pkl)
 
-### Winnow Method (`--method winnow`)
+Specify the format using `data_source=<format>` parameter.
+
+## FDR methods
+
+### Non-parametric method (`fdr_method=nonparametric`)
 
 Uses non-parametric FDR estimation procedure:
 
 - **No ground truth required**: Works with confidence scores alone
 - **No correct/incorrect distribution modelling**: Process directly estimates FDR using calibrated confidence scores
-- **Multiple metrics**: Provides FDR, PEP and q-values.
+- **Multiple metrics**: Provides FDR, PEP and q-values
 
 ```bash
-winnow predict \
-    --method winnow \
-    --fdr-threshold 0.01 \
-    --confidence-column calibrated_confidence \
-    # ... other args
+winnow predict fdr_method=nonparametric fdr_control.fdr_threshold=0.01
 ```
 
-### Database-Grounded Method (`--method database-ground`)
+### Database-grounded method (`fdr_method=database_grounded`)
 
 Uses database search results for validation:
 
@@ -198,16 +179,12 @@ Uses database search results for validation:
 - **Direct estimates**: FDR calculated from actual correct/incorrect labels
 
 ```bash
-winnow predict \
-    --method database-ground \
-    --fdr-threshold 0.05 \
-    --confidence-column confidence \
-    # ... other args
+winnow predict fdr_method=database_grounded fdr_control.fdr_threshold=0.05
 ```
 
-## Output Files
+## Output files
 
-### Training Output
+### Training output
 
 Training produces:
 
@@ -217,7 +194,7 @@ Training produces:
 2. **Training results** (`--dataset-output-path`):
    - CSV with calibrated scores and evaluation metrics
 
-### Prediction Output
+### Prediction output
 
 Prediction produces two CSV files in the `--output-folder` directory:
 
@@ -238,94 +215,147 @@ Prediction produces two CSV files in the `--output-folder` directory:
 
 This separation allows users to work with metadata and features separately from predictions and error metrics, making downstream analysis more convenient.
 
-## Example Workflows
+## Example workflows
+
+### Quick start with defaults
+
+```bash
+# Predict using pretrained model, InstaNovo predictions and default settings
+winnow predict \
+    dataset.spectrum_path_or_directory=data/test_spectra.parquet \
+    dataset.predictions_path=data/test_predictions.csv
+```
 
-### Complete Training and Prediction Pipeline
+### Complete training and prediction pipeline
 
 ```bash
 # Step 1: Train calibrator on labelled data
 winnow train \
-    --data-source instanovo \
-    --dataset-config-path configs/train_data.yaml \
-    --model-output-folder models/my_calibrator \
-    --dataset-output-folder results/training_output.csv
+    data_loader=instanovo \
+    dataset.spectrum_path_or_directory=data/train_spectra.parquet \
+    dataset.predictions_path=data/train_predictions.csv \
+    model_output_dir=models/my_calibrator \
+    dataset_output_path=results/training_output.csv
 
-# Step 2: Apply to new data with FDR control (using default pretrained model)
-winnow predict \
-    --data-source instanovo \
-    --dataset-config-path configs/test_data.yaml \
-    --method winnow \
-    --fdr-threshold 0.01 \
-    --confidence-column confidence \
-    --output-folder results/predictions
-
-# Alternative: Use the locally trained model
+# Step 2: Apply to new data with FDR control
 winnow predict \
-    --data-source instanovo \
-    --dataset-config-path configs/test_data.yaml \
-    --local-model-folder models/my_calibrator \
-    --method winnow \
-    --fdr-threshold 0.01 \
-    --confidence-column confidence \
-    --output-folder results/predictions
+    data_loader=instanovo \
+    dataset.spectrum_path_or_directory=data/test_spectra.parquet \
+    dataset.predictions_path=data/test_predictions.csv \
+    calibrator.pretrained_model_name_or_path=models/my_calibrator \
+    fdr_method=nonparametric \
+    fdr_control.fdr_threshold=0.01 \
+    output_folder=results/predictions
 ```
 
-### Configuration File Examples
+### MZTab format
 
-**Training configuration** (`configs/train_data.yaml`):
-```yaml
-beam_predictions_path: "data/train_predictions.csv"
-spectrum_path: "data/train_spectra.csv"
-```
+```bash
+# Train with MZTab format
+winnow train \
+    data_loader=mztab \
+    dataset.spectrum_path_or_directory=data/spectra.parquet \
+    dataset.predictions_path=data/casanovo_results.mztab \
+    model_output_dir=models/mztab_model
 
-**Test configuration** (`configs/test_data.yaml`):
-```yaml
-beam_predictions_path: "data/test_predictions.csv"
-spectrum_path: "data/test_spectra.csv"
+# Predict with MZTab format
+winnow predict \
+    data_loader=mztab \
+    dataset.spectrum_path_or_directory=data/test_spectra.parquet \
+    dataset.predictions_path=data/test_results.mztab \
+    calibrator.pretrained_model_name_or_path=models/mztab_model \
+    fdr_control.fdr_threshold=0.05
 ```
 
-## Built-in Features
+### Advanced configuration
 
-The CLI automatically includes these calibration features:
+```bash
+# Train with custom calibrator settings
+winnow train \
+    dataset.spectrum_path_or_directory=data/spectra.parquet \
+    dataset.predictions_path=data/predictions.csv \
+    calibrator.hidden_layer_sizes=[100,50,25] \
+    calibrator.learning_rate_init=0.01 \
+    calibrator.max_iter=500 \
+    calibrator.features.prosit_features.mz_tolerance=0.01
+
+# Predict with database-grounded FDR
+winnow predict \
+    dataset.spectrum_path_or_directory=data/test_spectra.parquet \
+    dataset.predictions_path=data/test_predictions.csv \
+    fdr_method=database_grounded \
+    fdr_control.fdr_threshold=0.01 \
+    calibrator.pretrained_model_name_or_path=models/custom_model
+```
 
-- **Mass Error**: Difference between observed and theoretical mass
-- **Prosit Features**: ML-based intensity predictions (`mz_tolerance=0.02`)
-- **Retention Time**: iRT predictions (`hidden_dim=10`, `train_fraction=0.1`)
-- **Chimeric Features**: Chimeric spectrum detection (`mz_tolerance=0.02`)
-- **Beam Features**: Beam search diversity metrics
+## Default configuration
 
-## Default Parameters
+Winnow comes with sensible default settings for all parameters:
 
-The CLI uses these default parameters:
+- **Calibrator**: 2-layer MLP with 50 hidden units per layer
+- **Features**: Mass error, Prosit features, retention time, chimeric features, beam features
+- **FDR**: Non-parametric method with 5% threshold
+- **Model**: Pretrained general model from HuggingFace
 
-- **Seed**: 42 (for reproducibility)
-- **MZ Tolerance**: 0.02 Da
-- **FDR Learning Rate**: 0.005
-- **FDR Training Steps**: 5000
-- **Retention Time Hidden Dim**: 10
-- **Retention Time Train Fraction**: 0.1
+All defaults are defined in YAML files under `config/` and can be overridden via command line. For a complete reference of all default parameters and configuration options, see the **[Configuration guide](configuration.md)**.
 
 ## Troubleshooting
 
-### Common Issues
+### Common issues
 
-**Missing columns**: Ensure your CSV files contain expected columns:
+**Missing columns**: Ensure your data files contain expected columns:
 
 - `preds`: Main prediction
 - `confidence`: Confidence scores
-- `sequence`: Ground truth (for training/database method)
+- `sequence`: Ground truth (for training/database-grounded FDR)
+
+**File paths**: Use absolute paths in dataset path overrides to avoid path resolution issues:
+```bash
+winnow predict dataset.spectrum_path_or_directory=/absolute/path/to/spectra.parquet
+```
 
-**File paths**: Use absolute paths in configuration files to avoid path resolution issues.
+**Configuration errors**: If Hydra reports a missing config file:
+```bash
+winnow predict fdr_method=typo
+# Error: Could not find 'fdr/typo'
+# Available options in 'fdr': nonparametric, database_grounded
+```
 
-**Memory issues**: Large datasets may require more memory. Consider filtering data or using smaller batch sizes.
+**Memory issues**: Large datasets may require more memory. Consider:
+- Filtering data before processing
+- Using a machine with more RAM
+- Processing in batches (requires custom Python script)
 
-### Dataset Filtering
+### Dataset filtering
 
 The CLI automatically filters out:
 
 - Empty predictions
 - Peptides longer than 30 amino acids (Prosit limitation)
 - Precursor charges above 6 (Prosit limitation)
-- Invalid modifications and tokens
+- Invalid modifications and tokens (defined in `config/residues.yaml`)
+
+### Getting help
+
+View available options:
+
+```bash
+winnow --help           # List all commands
+winnow train --help     # Command-specific help
+winnow predict --help
+winnow config --help    # Config command help
+
+winnow config train     # View resolved training configuration
+winnow config predict   # View resolved prediction configuration
+```
+
+## Where to find information
+
+This CLI guide focuses on **practical command-line usage**. For other information, see:
 
-For more advanced usage and customisation, refer to the [Python API documentation](api/calibration.md) and [examples notebook](https://github.com/instadeepai/winnow/blob/main/examples/getting_started_with_winnow.ipynb).
+| Topic | Documentation |
+|-------|---------------|
+| Configuration system, YAML structure, advanced patterns | [Configuration guide](configuration.md) |
+| Python API, feature implementation, programmatic usage | [API reference](api/calibration.md) |
+| Interactive tutorials and examples | [Examples notebook](https://github.com/instadeepai/winnow/blob/main/examples/getting_started_with_winnow.ipynb) |
+| Contributing, development setup | [Contributing guide](contributing.md) |
diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 0000000..59470e7
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,527 @@
+# Configuration guide
+
+This guide provides comprehensive documentation of Winnow's configuration system, including YAML file structure, parameter reference, advanced patterns and customisation.
+
+**Looking for practical CLI usage?** See the **[CLI reference](cli.md)** for command examples and workflows.
+
+## Overview
+
+Winnow uses [Hydra](https://hydra.cc/) for flexible, hierarchical configuration management. This enables:
+
+- **Composable configs**: Build configurations from multiple YAML files
+- **Flexibility**: Override any parameter via command line or config files
+- **Reproducibility**: Full configuration is automatically logged
+
+## Quick start
+
+Winnow works out of the box with sensible defaults:
+
+```bash
+# Train with default settings
+winnow train
+
+# Predict with default settings
+winnow predict
+```
+
+## Configuration files
+
+Winnow's configuration files are organised in the `config/` directory:
+
+```
+config/
+├── residues.yaml              # Amino acid masses, modifications
+├── data_loader/               # Dataset format loaders
+│   ├── instanovo.yaml
+│   ├── mztab.yaml
+│   ├── pointnovo.yaml
+│   └── winnow.yaml
+├── fdr_method/                # FDR control methods
+│   ├── nonparametric.yaml
+│   └── database_grounded.yaml
+├── train.yaml                 # Main training config
+├── calibrator.yaml            # Model architecture and features
+└── predict.yaml               # Main prediction config
+```
+
+## Overriding configuration
+
+All configuration parameters have default values defined in YAML files. You can override any parameter from the command line.
+
+### Command-line overrides
+
+Override any parameter from the command line:
+
+```bash
+# Override dataset paths
+winnow train dataset.spectrum_path_or_directory=data/my_spectra.parquet dataset.predictions_path=data/my_preds.csv
+
+# Change data loader
+winnow train data_loader=mztab
+
+# Change output directory
+winnow train model_output_dir=models/my_model
+
+# Change multiple parameters
+winnow predict data_loader=mztab fdr_control.fdr_threshold=0.01 fdr_method=database_grounded
+```
+
+### Nested parameters
+
+Access nested configuration values using dot notation:
+
+```bash
+# Change calibrator seed
+winnow train calibrator.seed=123
+
+# Change MLP hidden layer sizes
+winnow train calibrator.hidden_layer_sizes=[100,50,25]
+
+# Change feature parameters
+winnow train calibrator.features.prosit_features.mz_tolerance=0.01
+```
+
+### Dataset configuration
+
+Specify dataset paths using nested notation:
+
+```bash
+# For InstaNovo format
+winnow train dataset.spectrum_path_or_directory=data/spectra.parquet dataset.predictions_path=data/preds.csv
+
+# For MZTab format
+winnow train data_loader=mztab dataset.spectrum_path_or_directory=data/spectra.parquet dataset.predictions_path=data/results.mztab
+```
+
+## Training configuration
+
+### Main training config (`config/train.yaml`)
+
+Controls dataset loading, output paths and composition:
+
+```yaml
+defaults:
+  - _self_
+  - residues
+  - calibrator
+  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
+
+dataset:
+  # Path to the spectrum data file or to folder containing saved internal Winnow dataset
+  spectrum_path_or_directory: data/spectra.ipc
+  # Path to the beam predictions file
+  # Leave as null if data source is winnow, or loading will fail
+  predictions_path: data/predictions.csv
+
+# Output paths
+model_output_dir: models/new_model
+dataset_output_path: results/calibrated_dataset.csv
+```
+
+**Key parameters:**
+
+- `data_loader`: Format of input data loader to use (via defaults: `instanovo`, `mztab`, `pointnovo`, `winnow`)
+- `dataset.spectrum_path_or_directory`: Path to spectrum/metadata file (or directory for winnow format)
+- `dataset.predictions_path`: Path to predictions file (set to null for winnow format)
+- `model_output_dir`: Where to save trained model
+- `dataset_output_path`: Where to save calibrated training results
+
+### Calibrator config (`config/calibrator.yaml`)
+
+Controls model architecture and calibration features:
+
+```yaml
+calibrator:
+  _target_: winnow.calibration.calibrator.ProbabilityCalibrator
+
+  seed: 42
+  hidden_layer_sizes: [50, 50]  # The number of neurons in each hidden layer of the MLP classifier
+  learning_rate_init: 0.001  # The initial learning rate for the MLP classifier
+  alpha: 0.0001  # L2 regularisation parameter for the MLP classifier
+  max_iter: 1000  # Maximum number of training iterations for the MLP classifier
+  early_stopping: true  # Whether to use early stopping to terminate training
+  validation_fraction: 0.1  # Proportion of training data to use for early stopping validation
+
+  features:
+    mass_error:
+      _target_: winnow.calibration.calibration_features.MassErrorFeature
+      residue_masses: ${residue_masses}
+
+    prosit_features:
+      _target_: winnow.calibration.calibration_features.PrositFeatures
+      mz_tolerance: 0.02
+      learn_from_missing: true
+      invalid_prosit_tokens: ${invalid_prosit_tokens}
+      prosit_intensity_model_name: Prosit_2020_intensity_HCD
+
+    retention_time_feature:
+      _target_: winnow.calibration.calibration_features.RetentionTimeFeature
+      hidden_dim: 10
+      train_fraction: 0.1
+      learn_from_missing: true
+      seed: 42
+      learning_rate_init: 0.001
+      alpha: 0.0001
+      max_iter: 200
+      early_stopping: false
+      validation_fraction: 0.1
+      invalid_prosit_tokens: ${invalid_prosit_tokens}
+      prosit_irt_model_name: Prosit_2019_irt
+
+    chimeric_features:
+      _target_: winnow.calibration.calibration_features.ChimericFeatures
+      mz_tolerance: 0.02
+      learn_from_missing: true
+      invalid_prosit_tokens: ${invalid_prosit_tokens}
+      prosit_intensity_model_name: Prosit_2020_intensity_HCD
+
+    beam_features:
+      _target_: winnow.calibration.calibration_features.BeamFeatures
+```
+
+**Key parameters:**
+
+- `seed`: Random seed for reproducibility
+- `hidden_layer_sizes`: Architecture of MLP classifier
+- `learning_rate_init`: Initial learning rate
+- `alpha`: L2 regularisation parameter
+- `max_iter`: Maximum training iterations
+- `early_stopping`: Whether to use early stopping
+- `validation_fraction`: Proportion of data for validation
+- `features.*`: Individual calibration feature configurations
+
+## Prediction configuration
+
+### Main prediction config (`config/predict.yaml`)
+
+Controls dataset loading, FDR estimation and output:
+
+```yaml
+defaults:
+  - _self_
+  - residues
+  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
+  - fdr_method: nonparametric  # Options: nonparametric, database_grounded
+
+dataset:
+  # Path to the spectrum data file or to folder containing saved internal Winnow dataset
+  spectrum_path_or_directory: data/spectra.ipc
+  # Path to the beam predictions file
+  # Leave as null if data source is winnow, or loading will fail
+  predictions_path: data/predictions.csv
+
+calibrator:
+  # Path to the local calibrator directory or the HuggingFace model identifier
+  # If the path is a local directory path, it will be used directly
+  # If it is a HuggingFace repository identifier, it will be downloaded from HuggingFace
+  pretrained_model_name_or_path: InstaDeepAI/winnow-general-model
+  # Directory to cache the HuggingFace model
+  cache_dir: null  # can be set to null if using local model or for the default cache directory
+
+fdr_control:
+  # Target FDR threshold (e.g. 0.01 for 1%, 0.05 for 5% etc.)
+  fdr_threshold: 0.05
+  # Name of the column with confidence scores to use for FDR estimation
+  confidence_column: calibrated_confidence
+
+# Folder path to write the outputs to
+output_folder: results/predictions
+```
+
+**Key parameters:**
+
+- `data_loader`: Format of input data loader to use (via defaults: `instanovo`, `mztab`, `pointnovo`, `winnow`)
+- `dataset.spectrum_path_or_directory`: Path to spectrum/metadata file (or directory for winnow format)
+- `dataset.predictions_path`: Path to predictions file
+- `calibrator.pretrained_model_name_or_path`: HuggingFace model identifier or local model directory path
+- `calibrator.cache_dir`: Directory to cache HuggingFace models (null for default)
+- `fdr_method`: FDR estimation method (via defaults: `nonparametric` or `database_grounded`)
+- `fdr_control.fdr_threshold`: Target FDR threshold (e.g. 0.01 for 1%, 0.05 for 5%)
+- `fdr_control.confidence_column`: Column name with confidence scores
+- `output_folder`: Where to save results
+
+### FDR method configs
+
+**Non-parametric FDR** (`config/fdr_method/nonparametric.yaml`):
+
+```yaml
+_target_: winnow.fdr.nonparametric.NonParametricFDRControl
+```
+
+No additional parameters required.
+
+**Database-grounded FDR** (`config/fdr_method/database_grounded.yaml`):
+
+```yaml
+_target_: winnow.fdr.database_grounded.DatabaseGroundedFDRControl
+confidence_feature: ${fdr_control.confidence_column}
+residue_masses: ${residue_masses}
+isotope_error_range: [0, 1]
+drop: 10
+```
+
+Requires ground truth sequences in the dataset.
+
+**Key parameters:**
+
+- `confidence_feature`: Name of the column with confidence scores (interpolated from fdr_control)
+- `residue_masses`: Amino acid and modification masses (interpolated from residues config)
+- `isotope_error_range`: Range of isotope errors to consider when matching peptides
+- `drop`: Number of top predictions to drop for stability
+
+## Shared configuration
+
+### Residues config (`config/residues.yaml`)
+
+Defines amino acid masses, modifications and invalid tokens:
+
+```yaml
+residue_masses:
+  "G": 57.021464
+  "A": 71.037114
+  "S": 87.032028
+  # ... other amino acids
+  "M[UNIMOD:35]": 147.035400  # Oxidation
+  "C[UNIMOD:4]": 160.030649   # Carboxyamidomethylation
+  "N[UNIMOD:7]": 115.026943   # Deamidation
+  "Q[UNIMOD:7]": 129.042594   # Deamidation
+  # ... other modifications
+  "[UNIMOD:1]": 42.010565     # Acetylation (terminal)
+  "[UNIMOD:5]": 43.005814     # Carbamylation (terminal)
+  "[UNIMOD:385]": -17.026549  # NH3 loss (terminal)
+
+invalid_prosit_tokens:
+  # InstaNovo
+  - "[UNIMOD:7]"
+  - "[UNIMOD:21]"
+  - "[UNIMOD:1]"
+  - "[UNIMOD:5]"
+  - "[UNIMOD:385]"
+  # Casanovo
+  - "+0.984"
+  - "+42.011"
+  - "+43.006"
+  - "-17.027"
+  - "[Deamidated]"
+  # ... other unsupported modifications
+```
+
+This configuration is shared across all pipelines and referenced via `${residue_masses}` and `${invalid_prosit_tokens}` interpolation.
+
+### Data loader configs
+
+Each data format has a dedicated loader configuration in `config/data_loader/`:
+
+**InstaNovo** (`config/data_loader/instanovo.yaml`):
+```yaml
+_target_: winnow.datasets.data_loaders.InstaNovoDatasetLoader
+residue_masses: ${residue_masses}
+residue_remapping:
+  "M(ox)": "M[UNIMOD:35]"
+  "C(+57.02)": "C[UNIMOD:4]"
+  # ... maps legacy notations to UNIMOD tokens
+```
+
+**MZTab** (`config/data_loader/mztab.yaml`):
+```yaml
+_target_: winnow.datasets.data_loaders.MZTabDatasetLoader
+residue_masses: ${residue_masses}
+residue_remapping:
+  "M+15.995": "M[UNIMOD:35]"
+  "C+57.021": "C[UNIMOD:4]"
+  "C[Carbamidomethyl]": "C[UNIMOD:4]"
+  # ... maps Casanovo notations to UNIMOD tokens
+```
+
+**PointNovo** (`config/data_loader/pointnovo.yaml`):
+```yaml
+_target_: winnow.datasets.data_loaders.PointNovoDatasetLoader
+residue_masses: ${residue_masses}
+```
+
+**Winnow** (`config/data_loader/winnow.yaml`):
+```yaml
+_target_: winnow.datasets.data_loaders.WinnowDatasetLoader
+residue_masses: ${residue_masses}
+# Internal format uses UNIMOD tokens directly, no remapping needed
+```
+
+## Common configuration patterns
+
+### Using a custom model
+
+```bash
+# Use a custom HuggingFace model
+winnow predict calibrator.pretrained_model_name_or_path=my-org/my-model
+
+# Use a locally trained model
+winnow predict calibrator.pretrained_model_name_or_path=models/my_model
+
+# Specify a custom HuggingFace cache directory
+winnow predict calibrator.cache_dir=/path/to/cache
+```
+
+### Changing FDR method and threshold
+
+```bash
+# Use database-grounded FDR at 1%
+winnow predict fdr_method=database_grounded fdr_control.fdr_threshold=0.01
+
+# Use non-parametric FDR at 5%
+winnow predict fdr_method=nonparametric fdr_control.fdr_threshold=0.05
+```
+
+### Training with different features
+
+```bash
+# Change Prosit tolerance
+winnow train calibrator.features.prosit_features.mz_tolerance=0.01
+
+# Disable missing value handling for a feature
+winnow train calibrator.features.prosit_features.learn_from_missing=false
+
+# Change retention time model architecture
+winnow train calibrator.features.retention_time_feature.hidden_dim=20
+```
+
+### Processing different data formats
+
+```bash
+# MZTab format
+winnow train data_loader=mztab dataset.spectrum_path_or_directory=data/spectra.parquet dataset.predictions_path=data/results.mztab
+
+# Previously saved Winnow dataset
+winnow train data_loader=winnow dataset.spectrum_path_or_directory=data/winnow_dataset/ dataset.predictions_path=null
+```
+
+## Config interpolation
+
+Hydra supports variable interpolation using `${...}` syntax:
+
+```yaml
+# Reference from residues config (loaded via defaults)
+features:
+  mass_error:
+    residue_masses: ${residue_masses}  # References residue_masses from residues.yaml
+
+# Reference nested values
+fdr_control:
+  confidence_column: calibrated_confidence
+
+database_grounded:
+  confidence_feature: ${fdr_control.confidence_column}  # References nested value
+
+# Use in defaults for dynamic composition
+defaults:
+  - fdr_method: nonparametric  # Loads fdr_method/nonparametric.yaml
+```
+
+Common interpolation patterns in Winnow configs:
+- `${residue_masses}` - References amino acid masses from residues.yaml
+- `${invalid_prosit_tokens}` - References invalid tokens from residues.yaml
+- `${fdr_control.confidence_column}` - References FDR confidence column setting
+
+## Creating custom configurations
+
+### Add a custom data loader
+
+1. Create loader class implementing `DatasetLoader` protocol
+2. Add configuration file: `config/data_loader/custom.yaml`
+3. Use with: `winnow train data_loader=custom`
+
+Example `config/data_loader/custom.yaml`:
+```yaml
+_target_: my_module.CustomDatasetLoader
+residue_masses: ${residue_masses}
+custom_param: value
+```
+
+### Add custom calibration features
+
+1. Create feature class inheriting from `CalibrationFeatures`
+2. Add to `config/calibrator.yaml`:
+   ```yaml
+   features:
+     custom_feature:
+       _target_: my_module.CustomFeature
+       param1: value1
+       param2: value2
+   ```
+
+### Add custom FDR method
+
+1. Create FDR class implementing the FDR interface
+2. Add configuration file: `config/fdr_method/custom_method.yaml`
+3. Use with: `winnow predict fdr_method=custom_method`
+
+Example `config/fdr_method/custom_method.yaml`:
+```yaml
+_target_: my_module.CustomFDRControl
+confidence_feature: ${fdr_control.confidence_column}
+custom_param: value
+```
+
+## Debugging configuration
+
+### View resolved configuration
+
+To see the final composed configuration without running the pipeline, use the `winnow config` command:
+
+```bash
+# View training configuration
+winnow config train
+
+# View prediction configuration
+winnow config predict
+
+# View configuration with overrides
+winnow config train data_loader=mztab model_output_dir=custom/path
+winnow config predict fdr_method=database_grounded fdr_control.fdr_threshold=0.01
+```
+
+This prints the complete resolved YAML configuration with colour-coded hierarchical formatting for easy readability. Keys are coloured by nesting depth to help visualise the configuration structure. The output shows all defaults, composition and command-line overrides after they have been applied.
+
+**Note:** Some keys appear with quotes (e.g. `'N'`, `'Y'`) because they are reserved words in YAML that would otherwise be interpreted as boolean values. The quotes ensure they are treated as strings.
+
+### Configuration validation
+
+Hydra will validate that configuration files exist and can be composed. Invalid configurations will fail early with clear error messages:
+
+```bash
+winnow predict fdr_method=typo
+# Error: Could not find 'fdr_method/typo'
+# Available options in 'fdr_method': nonparametric, database_grounded
+```
+
+## Additional resources
+
+- [Hydra documentation](https://hydra.cc/docs/intro/)
+- [OmegaConf documentation](https://omegaconf.readthedocs.io/)
+- [Winnow API documentation](api/calibration.md)
+- [Example notebook](https://github.com/instadeepai/winnow/blob/main/examples/getting_started_with_winnow.ipynb)
+
+## Migration from old CLI
+
+If you're migrating from the old argument-based CLI:
+
+| Old CLI Argument | New Hydra Config Parameter |
+|------------------|---------------------------|
+| `--data-source instanovo` | `data_loader=instanovo` |
+| `--model-output-folder models/X` | `model_output_dir=models/X` |
+| `--dataset-output-path results/X.csv` | `dataset_output_path=results/X.csv` |
+| `--fdr-threshold 0.01` | `fdr_control.fdr_threshold=0.01` |
+| `--method winnow` | `fdr_method=nonparametric` |
+| `--method database-ground` | `fdr_method=database_grounded` |
+| `--local-model-folder models/X` | `calibrator.pretrained_model_name_or_path=models/X` |
+| `--huggingface-model-name X` | `calibrator.pretrained_model_name_or_path=X` |
+| `--confidence-column X` | `fdr_control.confidence_column=X` |
+| `--output-folder results/` | `output_folder=results/` |
+
+**Key changes:**
+
+- `data_source` renamed to `data_loader` (references config/data_loader/*.yaml)
+- `fdr_threshold` and `confidence_column` now nested under `fdr_control`
+- `local_model_folder` and `huggingface_model_name` merged into `pretrained_model_name_or_path`
+- Dataset paths are now specified directly as Hydra parameters instead of via separate YAML files:
+  - **Old**: Create `config.yaml` with `spectrum_path` and `predictions_path`, then use `--dataset-config-path config.yaml`
+  - **New**: Use `dataset.spectrum_path_or_directory=... dataset.predictions_path=...` directly on command line
diff --git a/mkdocs.yml b/mkdocs.yml
index 01e8051..7ccdd11 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -60,12 +60,14 @@ plugins:
 
 nav:
   - Home: index.md
-  - API Reference:
+  - User guide:
+    - CLI reference: cli.md
+    - Configuration guide: configuration.md
+    - Examples: examples.md
+  - API reference:
     - Datasets: api/datasets.md
     - Calibration: api/calibration.md
     - FDR: api/fdr.md
-  - CLI Guide: cli.md
-  - Examples: examples.md
   - Contributing: contributing.md
   - License: license.md
 

From 252958208584460d91421b0244479671e69a663e Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 17:32:29 +0000
Subject: [PATCH 06/17] docs: make docs titles sentence case and fix bullet
 list formatting

---
 docs/api/calibration.md | 14 +++++++-------
 docs/api/datasets.md    |  4 ++--
 docs/api/fdr.md         | 12 ++++++------
 docs/contributing.md    | 16 ++++++++--------
 docs/examples.md        |  2 +-
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/docs/api/calibration.md b/docs/api/calibration.md
index e3170f7..69767c4 100644
--- a/docs/api/calibration.md
+++ b/docs/api/calibration.md
@@ -96,7 +96,7 @@ class CustomFeature(CalibrationFeatures):
 - **Column Specification**: Define output column names
 - **Dataset Integration**: Direct access to CalibrationDataset for computation
 
-## Built-in Features
+## Built-in features
 
 ### MassErrorFeature
 
@@ -159,7 +159,7 @@ feature = RetentionTimeFeature(hidden_dim=10, train_fraction=0.1)
 
 **Purpose**: Incorporates chromatographic information for confidence calibration.
 
-## Handling Missing Features
+## Handling missing features
 
 Prosit-dependent features (PrositFeatures, ChimericFeatures, RetentionTimeFeature) may not be computable for all peptides due to limitations like:
 
@@ -170,7 +170,7 @@ Prosit-dependent features (PrositFeatures, ChimericFeatures, RetentionTimeFeatur
 
 Winnow provides two strategies for handling such cases:
 
-### Learn Strategy (Default, `learn_from_missing=True`)
+### Learn strategy (Default, `learn_from_missing=True`)
 
 **Recommended for most use cases.**
 
@@ -179,7 +179,7 @@ Winnow provides two strategies for handling such cases:
 - Uses all available data, maximising recall
 - More robust across diverse datasets
 
-### Filter Strategy (`learn_from_missing=False`)
+### Filter strategy (`learn_from_missing=False`)
 
 **Use when you want strict data quality requirements.**
 
@@ -228,14 +228,14 @@ rt_feat = RetentionTimeFeature(hidden_dim=10, train_fraction=0.1, learn_from_mis
 
 ## Workflow
 
-### Training Workflow
+### Training workflow
 
 1. **Create Calibrator**: Initialise `ProbabilityCalibrator`
 2. **Add Features**: Use `add_feature()` to include desired calibration features
 3. **Fit Model**: Call `fit()` with labelled `CalibrationDataset`
 4. **Save Model**: Use `save()` to persist trained calibrator
 
-### Prediction Workflow
+### Prediction workflow
 
 1. **Load Calibrator**: Use `load()` to restore trained model from a HuggingFace repository or a local directory
    ```python
@@ -251,7 +251,7 @@ rt_feat = RetentionTimeFeature(hidden_dim=10, train_fraction=0.1, learn_from_mis
 2. **Predict**: Call `predict()` with unlabelled `CalibrationDataset`
 3. **Access Results**: Calibrated scores stored in dataset's "calibrated_confidence" column
 
-## Feature Dependencies
+## Feature dependencies
 
 The system automatically handles feature dependencies:
 
diff --git a/docs/api/datasets.md b/docs/api/datasets.md
index 638a456..7fe03a3 100644
--- a/docs/api/datasets.md
+++ b/docs/api/datasets.md
@@ -37,12 +37,12 @@ dataset.save(Path("output_directory"))
 
 **Key Features:**
 
-- **Multiple Format Support**: Load data using specialized loaders for different file formats
+- **Multiple Format Support**: Load data using specialised loaders for different file formats
 - **Data Integration**: Combines spectral data with prediction metadata
 - **Filtering**: Removes invalid tokens and unsupported modifications
 - **Evaluation**: Computes correctness labels when ground truth available
 
-### Data Loaders
+### Data loaders
 
 The datasets module provides several data loaders that implement the `DatasetLoader` protocol:
 
diff --git a/docs/api/fdr.md b/docs/api/fdr.md
index db73cec..6f3aa66 100644
--- a/docs/api/fdr.md
+++ b/docs/api/fdr.md
@@ -2,7 +2,7 @@
 
 The `winnow.fdr` module implements false discovery rate (FDR) estimation and control methods for *de novo* peptide sequencing using both database-grounded and non-parametric approaches.
 
-## Base Interface
+## Base interface
 
 ### FDRControl
 
@@ -108,9 +108,9 @@ dataset_with_q_values = fdr_control.add_psm_q_value(dataset, "confidence")
     - **Q-value**: `compute_q_value(score)` - Minimum FDR for significance
 - **No Ground Truth Required**: Works with confidence scores alone
 
-## Additional Features
+## Additional features
 
-### PSM-Specific FDR
+### PSM-specific FDR
 
 Both methods support PSM-specific FDR estimation:
 
@@ -140,7 +140,7 @@ dataset_with_q_values = fdr_control.add_psm_q_value(
 psm_q_values = dataset_with_q_values["psm_q_value"]
 ```
 
-### Confidence Curves
+### Confidence curves
 
 Generate FDR vs confidence curves for analysis:
 
@@ -159,7 +159,7 @@ plt.xlabel("FDR Threshold")
 plt.ylabel("Confidence Cutoff")
 ```
 
-### Dataset Filtering
+### Dataset filtering
 
 Filter PSM datasets at target FDR levels:
 
@@ -175,7 +175,7 @@ filtered_psms = fdr_control.filter_entries(
 print(f"Retained {len(filtered_psms)} PSMs at 1% FDR")
 ```
 
-### FDR Estimation Method Selection
+### FDR estimation method selection
 
 **Use DatabaseGroundedFDRControl when:**
 
diff --git a/docs/contributing.md b/docs/contributing.md
index 9a36ba0..e11434f 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -2,7 +2,7 @@
 
 Contributions are what make the open-source community such an amazing place to learn, inspire and create, and we welcome your support! Any contributions you make are **greatly appreciated**.
 
-## Ways to Contribute
+## Ways to contribute
 
 If you have ideas for enhancements, you can:
 
@@ -12,7 +12,7 @@ If you have ideas for enhancements, you can:
 - Improve documentation
 - Add examples or tutorials
 
-## Contribution Process
+## Contribution process
 
 1. **Fork the repository**
    ```bash
@@ -63,9 +63,9 @@ If you have ideas for enhancements, you can:
    - Reference any related issues
    - Include examples if applicable
 
-## Development Guidelines
+## Development guidelines
 
-### Code Style
+### Code style
 
 This project uses:
 
@@ -85,17 +85,17 @@ This project uses:
 - Add examples to demonstrate usage
 - Update the API documentation pages if you add new modules
 
-### Issues and Bug Reports
+### Issues and bug reports
 
 When reporting bugs, please include:
 
 - A clear and descriptive title
 - Steps to reproduce the issue
-- Expected vs. actual behavior
+- Expected vs. actual behaviour
 - Your environment details (Python version, OS, etc.)
 - Any relevant error messages or logs
 
-### Feature Requests
+### Feature requests
 
 For feature requests, please:
 
@@ -104,7 +104,7 @@ For feature requests, please:
 - Describe the proposed solution
 - Consider alternatives you've thought about
 
-## Getting Help
+## Getting help
 
 If you need help or have questions:
 
diff --git a/docs/examples.md b/docs/examples.md
index 1b390f2..4c0f24a 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -2,7 +2,7 @@
 
 For a comprehensive example demonstrating the full winnow workflow, see our example notebook:
 
-📓 **[FDR Plots Example Notebook](https://github.com/instadeepai/winnow/blob/main/examples/getting_started_with_winnow.ipynb)**
+📓 **[FDR plots example notebook](https://github.com/instadeepai/winnow/blob/main/examples/getting_started_with_winnow.ipynb)**
 
 This notebook shows you how to:
 

From d7e713c91c89c165cffdb4b438bcb4398050a730 Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 17:50:40 +0000
Subject: [PATCH 07/17] perf: optimise CLI startup time with lazy imports

---
 winnow/scripts/main.py | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/winnow/scripts/main.py b/winnow/scripts/main.py
index ad3c7a7..d685552 100644
--- a/winnow/scripts/main.py
+++ b/winnow/scripts/main.py
@@ -1,17 +1,24 @@
-from typing import Union, Tuple, Optional, List
+"""CLI entry point for winnow.
+
+Note: This module uses lazy imports to minimise CLI startup time.
+Heavy dependencies (PyTorch, InstaNovo, etc.) are imported only when
+needed, significantly reducing --help and config command times.
+"""
+
+from __future__ import annotations
+
+from typing import Union, Tuple, Optional, List, TYPE_CHECKING
 import typer
 import logging
 from rich.logging import RichHandler
-import pandas as pd
-from hydra import initialize, compose
 from pathlib import Path
-from hydra.utils import instantiate
 
-from winnow.calibration.calibrator import ProbabilityCalibrator
-from winnow.datasets.calibration_dataset import CalibrationDataset
-from winnow.fdr.nonparametric import NonParametricFDRControl
-from winnow.fdr.database_grounded import DatabaseGroundedFDRControl
-from winnow.scripts.config_formatter import ConfigFormatter
+# Lazy imports for heavy dependencies - only imported when actually needed
+if TYPE_CHECKING:
+    import pandas as pd
+    from winnow.datasets.calibration_dataset import CalibrationDataset
+    from winnow.fdr.nonparametric import NonParametricFDRControl
+    from winnow.fdr.database_grounded import DatabaseGroundedFDRControl
 
 # Logging setup
 logger = logging.getLogger(__name__)
@@ -44,6 +51,8 @@ def print_config(cfg) -> None:
     Args:
         cfg: OmegaConf configuration object to print
     """
+    from winnow.scripts.config_formatter import ConfigFormatter
+
     formatter = ConfigFormatter()
     formatter.print_config(cfg)
 
@@ -75,6 +84,8 @@ def apply_fdr_control(
     confidence_column: str,
 ) -> pd.DataFrame:
     """Apply FDR control to a dataset."""
+    from winnow.fdr.nonparametric import NonParametricFDRControl
+
     if isinstance(fdr_control, NonParametricFDRControl):
         fdr_control.fit(dataset=dataset.metadata[confidence_column])
         dataset.metadata = fdr_control.add_psm_pep(dataset.metadata, confidence_column)
@@ -112,6 +123,8 @@ def separate_metadata_and_predictions(
     Returns:
         Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the metadata dataframe and the prediction and FDR metrics dataframe.
     """
+    from winnow.fdr.nonparametric import NonParametricFDRControl
+
     # Separate out metadata from prediction and FDR metrics
     preds_and_fdr_metrics_cols = [
         "spectrum_id",
@@ -139,6 +152,9 @@ def train_entry_point(
         overrides: Optional list of config overrides.
         execute: If False, only print the configuration and return without executing the pipeline.
     """
+    from hydra import initialize, compose
+    from hydra.utils import instantiate
+
     with initialize(
         config_path="../../config", version_base="1.3", job_name="winnow_train"
     ):
@@ -148,6 +164,8 @@ def train_entry_point(
         print_config(cfg)
         return
 
+    from winnow.calibration.calibrator import ProbabilityCalibrator
+
     logger.info("Starting training pipeline.")
     logger.info(f"Training configuration: {cfg}")
 
@@ -194,6 +212,9 @@ def predict_entry_point(
         overrides: Optional list of config overrides.
         execute: If False, only print the configuration and return without executing the pipeline.
     """
+    from hydra import initialize, compose
+    from hydra.utils import instantiate
+
     with initialize(
         config_path="../../config", version_base="1.3", job_name="winnow_predict"
     ):
@@ -203,6 +224,9 @@ def predict_entry_point(
         print_config(cfg)
         return
 
+    from winnow.calibration.calibrator import ProbabilityCalibrator
+    from winnow.fdr.database_grounded import DatabaseGroundedFDRControl
+
     logger.info("Starting prediction pipeline.")
     logger.info(f"Prediction configuration: {cfg}")
 

From 980a79363696bbe5411cbaebe09a0d3911174468 Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Wed, 26 Nov 2025 18:34:05 +0000
Subject: [PATCH 08/17] chore: update gitignore to ignore extra supported files
 and images

---
 .gitignore | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.gitignore b/.gitignore
index b2a30c6..f1c63ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,12 +12,21 @@ docs_public
 *.csv
 *.parquet
 *.ipc
+*.mztab
+*.fasta
+*.mgf
 *.pkl
 *.json
 *.yaml
+*.pdf
+*.png
+
+*.ipynb
 
 examples/winnow-general-model
 examples/winnow-ms-datasets
 examples/output
 
 build/
+
+.cursorrules

From bb25d28a8538f49b33dba9c00279ef2fcd96d732 Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 13:29:34 +0000
Subject: [PATCH 09/17] fix: convert predictions_path to a Path before file
 loading

---
 winnow/datasets/data_loaders.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/winnow/datasets/data_loaders.py b/winnow/datasets/data_loaders.py
index 3e468b8..37d3436 100644
--- a/winnow/datasets/data_loaders.py
+++ b/winnow/datasets/data_loaders.py
@@ -49,16 +49,17 @@ def __init__(
 
     @staticmethod
     def _load_beam_preds(
-        predictions_path: Path,
+        predictions_path: Path | str,
     ) -> Tuple[pl.DataFrame, pl.DataFrame]:
         """Loads a dataset from a CSV file and optionally filters it.
 
         Args:
-            predictions_path (Path): The path to the CSV file containing the predictions.
+            predictions_path (Path | str): The path to the CSV file containing the predictions.
 
         Returns:
             Tuple[pl.DataFrame, pl.DataFrame]: A tuple containing the predictions and beams dataframes.
         """
+        predictions_path = Path(predictions_path)
         if predictions_path.suffix != ".csv":
             raise ValueError(
                 f"Unsupported file format for InstaNovo beam predictions: {predictions_path.suffix}. Supported format is .csv."
@@ -421,7 +422,7 @@ def _load_spectrum_data(spectrum_path: Path | str) -> Tuple[pl.DataFrame, bool]:
         return df, has_labels
 
     @staticmethod
-    def _load_dataset(predictions_path: Path) -> pl.DataFrame:
+    def _load_dataset(predictions_path: Path | str) -> pl.DataFrame:
         """Load predictions from mzTab file.
 
         Args:
@@ -430,6 +431,7 @@ def _load_dataset(predictions_path: Path) -> pl.DataFrame:
         Returns:
             DataFrame containing predictions
         """
+        predictions_path = Path(predictions_path)
         if predictions_path.suffix != ".mztab":
             raise ValueError(
                 f"Unsupported file format for MZTab predictions: {predictions_path.suffix}. Supported format is .mztab."

From a883fcdfdb294857697d95b7fbec32069e746007 Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:03:43 +0000
Subject: [PATCH 10/17] docs: add instructions on conversion from mgf to
 parquet file

---
 docs/cli.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/cli.md b/docs/cli.md
index 2c119e3..c9f4f26 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -149,13 +149,15 @@ For prediction (`winnow predict`), you need:
 
 Winnow supports multiple input formats:
 
-- **InstaNovo**: Parquet spectra + CSV predictions (beam search format)
-- **MZTab**: MGF/Parquet spectra + MZTab predictions
+- **InstaNovo**: Parquet or IPC spectra + CSV predictions (beam search format)
+- **MZTab**: Parquet or IPC spectra + MZTab predictions
 - **PointNovo**: Similar to InstaNovo format
 - **Winnow**: Internal format (directory with metadata.csv and predictions.pkl)
 
 Specify the format using `data_source=<format>` parameter.
 
+**Note on MGF files**: While many users have their input data in `.mgf` format, Winnow currently requires spectrum data to be in `.parquet` or `.ipc` format. To convert `.mgf` files to `.parquet`, you can use InstaNovo's conversion utilities. See the [InstaNovo documentation](https://instadeepai.github.io/InstaNovo/) for instructions on using `instanovo convert` or the `SpectrumDataFrame` class to perform this conversion.
+
 ## FDR methods
 
 ### Non-parametric method (`fdr_method=nonparametric`)

From e9126d9c4ee4850b0e906bfd3f97749bba297d0a Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:08:00 +0000
Subject: [PATCH 11/17] docs: remove references to old Typer CLI arguments

---
 docs/cli.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/cli.md b/docs/cli.md
index c9f4f26..ceafd77 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -190,15 +190,15 @@ winnow predict fdr_method=database_grounded fdr_control.fdr_threshold=0.05
 
 Training produces:
 
-1. **Model checkpoints** (in `--model-output-folder`):
+1. **Model checkpoints** (`model_output_dir`):
    - `calibrator.pkl`: Complete trained calibrator with all features and parameters
 
-2. **Training results** (`--dataset-output-path`):
+2. **Training results** (`dataset_output_path`):
    - CSV with calibrated scores and evaluation metrics
 
 ### Prediction output
 
-Prediction produces two CSV files in the `--output-folder` directory:
+Prediction produces two CSV files in the `output-folder` directory:
 
 1. **`metadata.csv`**: Contains all metadata and feature columns from the input dataset
    - Original metadata columns (spectrum information, precursors, etc.)

From 864095e2f3eba6f4ff3cd2ff415c20ac9c38b866 Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:29:39 +0000
Subject: [PATCH 12/17] feat: create toy data for CLI quickstart

chore: pre-commit edits to generate_sample_data
---
 .gitignore                            |   5 +
 Makefile                              |  25 +++++
 config/predict.yaml                   |   4 +-
 config/train.yaml                     |   4 +-
 examples/example_data/predictions.csv |  21 +++++
 examples/example_data/spectra.ipc     | Bin 0 -> 12102 bytes
 scripts/generate_sample_data.py       | 128 ++++++++++++++++++++++++++
 7 files changed, 183 insertions(+), 4 deletions(-)
 create mode 100644 examples/example_data/predictions.csv
 create mode 100644 examples/example_data/spectra.ipc
 create mode 100755 scripts/generate_sample_data.py

diff --git a/.gitignore b/.gitignore
index f28b420..9bdfb1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,11 @@ examples/winnow-general-model
 examples/winnow-ms-datasets
 examples/output
 
+# Sample data files
+examples/example_data/*.ipc
+examples/example_data/*.csv
+examples/example_data/*.parquet
+
 build/
 
 .cursorrules
diff --git a/Makefile b/Makefile
index 2229bb0..fa6dc45 100644
--- a/Makefile
+++ b/Makefile
@@ -108,3 +108,28 @@ set-gcp-credentials:
 ## Set the Ceph credentials
 set-ceph-credentials:
 	uv run python scripts/set_ceph_credentials.py
+
+#################################################################################
+## Sample data and CLI commands													#
+#################################################################################
+
+.PHONY: sample-data train-sample predict-sample clean clean-all
+
+## Generate sample data files for testing
+sample-data:
+	uv run python scripts/generate_sample_data.py
+
+## Run winnow train with sample data (uses defaults from config)
+train-sample:
+	winnow train
+
+## Run winnow predict with sample data (uses locally trained model from models/new_model)
+predict-sample:
+	winnow predict calibrator.pretrained_model_name_or_path=models/new_model
+
+## Clean output directories (does not delete sample data)
+clean:
+	rm -rf models/ results/
+
+## Clean outputs and regenerate sample data
+clean-all: clean sample-data
diff --git a/config/predict.yaml b/config/predict.yaml
index 743c4d2..fa53a6a 100644
--- a/config/predict.yaml
+++ b/config/predict.yaml
@@ -10,10 +10,10 @@ defaults:
 dataset:
   # Dataset paths:
   # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
-  spectrum_path_or_directory: data/spectra.ipc
+  spectrum_path_or_directory: examples/example_data/spectra.ipc
   # Path to the beam predictions file.
   # Leave as `null` if data source is `winnow`, or loading will fail.
-  predictions_path: data/predictions.csv
+  predictions_path: examples/example_data/predictions.csv
   # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
 
 calibrator:
diff --git a/config/train.yaml b/config/train.yaml
index 76ab931..839d3f8 100644
--- a/config/train.yaml
+++ b/config/train.yaml
@@ -10,10 +10,10 @@ defaults:
 dataset:
   # Dataset paths:
   # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
-  spectrum_path_or_directory: data/spectra.ipc
+  spectrum_path_or_directory: examples/example_data/spectra.ipc
   # Path to the beam predictions file.
   # Leave as `null` if data source is `winnow`, or loading will fail.
-  predictions_path: data/predictions.csv
+  predictions_path: examples/example_data/predictions.csv
   # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
 
 # Output paths:
diff --git a/examples/example_data/predictions.csv b/examples/example_data/predictions.csv
new file mode 100644
index 0000000..668dc36
--- /dev/null
+++ b/examples/example_data/predictions.csv
@@ -0,0 +1,21 @@
+spectrum_id,predictions,predictions_tokenised,log_probs,sequence,instanovo_predictions_beam_0,instanovo_log_probabilities_beam_0,token_log_probabilities_beam_0,instanovo_predictions_beam_1,instanovo_log_probabilities_beam_1,token_log_probabilities_beam_1,instanovo_predictions_beam_2,instanovo_log_probabilities_beam_2,token_log_probabilities_beam_2
+spectrum_0,PEPTIDEK,"P, E, P, T, I, D, E, K",-0.7636862219969088,PEPTIDEK,PEPTIDEK,-1.6500325728393013,"[np.float64(-0.1748586825255312), np.float64(-0.11812380256490737), np.float64(-0.4244174254105504), np.float64(-0.1257953927325198), np.float64(-0.38816068057507885), np.float64(-0.26410896668030115), np.float64(-0.6292338175201165), np.float64(-0.15217329880962896)]",DANMSILK,-1.8352181215026244,"[np.float64(-0.6473942916341903), np.float64(-0.5873544435494568), np.float64(-0.3311340037739977), np.float64(-0.461679121275345), np.float64(-0.33102174101472986), np.float64(-0.12941658831241085), np.float64(-0.5102407439210165), np.float64(-0.47393637044827164)]",KKDFIARQ,-0.5405515225165534,"[np.float64(-0.39934497225182325), np.float64(-0.3004290370757989), np.float64(-0.4109280422910388), np.float64(-0.11995081362279063), np.float64(-0.3797384736330186), np.float64(-0.2892842986776428), np.float64(-0.5312895300249746), np.float64(-0.15412980799057313)]"
+spectrum_1,MASSIVE,"M, A, S, S, I, V, E",-0.8844631148929989,MASSIVE,MASSIVE,-0.5322991528212426,"[np.float64(-0.4793246952823274), np.float64(-0.5026558721895318), np.float64(-0.6890519382716996), np.float64(-0.3322875958989861), np.float64(-0.37071802113401764), np.float64(-0.2816344349600218), np.float64(-0.11509479188677506)]",VKGVFVC,-2.2211940225832416,"[np.float64(-0.5192989955325572), np.float64(-0.12889869750317104), np.float64(-0.2862196070574901), np.float64(-0.5783869416636498), np.float64(-0.13090159847464067), np.float64(-0.5061584166340479), np.float64(-0.3285505220491112)]",WSGCVLW,-0.10823656692822622,"[np.float64(-0.18670320295967338), np.float64(-0.5818906788867247), np.float64(-0.262767601555314), np.float64(-0.6788120209033441), np.float64(-0.4933311252591855), np.float64(-0.4428554553451626), np.float64(-0.5978579728747742)]"
+spectrum_2,PEPTIDES,"P, E, P, T, I, D, E, S",-1.1445489034470746,PEPTIDES,PEPTIDES,-1.5764250273197848,"[np.float64(-0.1468017954923722), np.float64(-0.1461116442723898), np.float64(-0.34234627925918576), np.float64(-0.6132135620205873), np.float64(-0.557976914941181), np.float64(-0.12645240685032602), np.float64(-0.40829732501892785), np.float64(-0.16722782455894564)]",NKTCCEEK,-1.3851657166463507,"[np.float64(-0.6381269348405222), np.float64(-0.6096807297916665), np.float64(-0.12046817990332713), np.float64(-0.14587259813978418), np.float64(-0.37513115016624676), np.float64(-0.6357866416286385), np.float64(-0.44519071831487855), np.float64(-0.5668718493437244)]",QLYMDRYY,-0.17325524800724948,"[np.float64(-0.4626802366225312), np.float64(-0.5774600029268754), np.float64(-0.260607219094593), np.float64(-0.6087770001679803), np.float64(-0.6778328782889893), np.float64(-0.6054499435950794), np.float64(-0.6180098006060015), np.float64(-0.2686769664815069)]"
+spectrum_3,SEQQENCR,"S, E, Q, Q, E, N, C, R",-0.7311783591483478,SEQQENCR,SEQQENCR,-1.2293873828990864,"[np.float64(-0.2628321885993025), np.float64(-0.28566501693952406), np.float64(-0.4939299780095215), np.float64(-0.1523369331149818), np.float64(-0.5399857638280854), np.float64(-0.4128491573768657), np.float64(-0.10822127377413646), np.float64(-0.23030559688913488)]",QMDWSCTW,-0.21420092629033446,"[np.float64(-0.5434079040906716), np.float64(-0.25047567327076287), np.float64(-0.5176715142304991), np.float64(-0.4960917101924121), np.float64(-0.39676100053765784), np.float64(-0.15877088367986547), np.float64(-0.6193236389886507), np.float64(-0.2189340884111104)]",CADCDTWR,-0.9597610429785925,"[np.float64(-0.43203562616194674), np.float64(-0.6629826125368027), np.float64(-0.5358447546153118), np.float64(-0.6571848533550182), np.float64(-0.15123173840126744), np.float64(-0.32936825308173), np.float64(-0.5434718876732179), np.float64(-0.2543653486380659)]"
+spectrum_4,PEPTIDE,"P, E, P, T, I, D, E",-1.998773817112167,PEPTIDE,PEPTIDE,-1.0169090521774529,"[np.float64(-0.3885717601407536), np.float64(-0.32259756174369875), np.float64(-0.4087170966436953), np.float64(-0.2347162615695798), np.float64(-0.41600229827170704), np.float64(-0.26388994207667704), np.float64(-0.24604601813329632)]",SHMEIKL,-0.4648015839760259,"[np.float64(-0.16509792754122304), np.float64(-0.35623885581238096), np.float64(-0.565518987675161), np.float64(-0.14801315354602163), np.float64(-0.2241973681807531), np.float64(-0.5001368067401828), np.float64(-0.4779056652573718)]",FYHYVIF,-0.9785922619918105,"[np.float64(-0.3650651185678444), np.float64(-0.1741765659897027), np.float64(-0.6365691372850841), np.float64(-0.4028584132102789), np.float64(-0.455441439800937), np.float64(-0.1118858400579136), np.float64(-0.46078019177624896)]"
+spectrum_5,MASSIVE,"M, A, S, S, I, V, E",-0.24621410846739605,MASSIVE,MASSIVE,-0.23913321643116375,"[np.float64(-0.2959509005053035), np.float64(-0.3340558584740735), np.float64(-0.5405415673931288), np.float64(-0.5453056347013177), np.float64(-0.20053170454510355), np.float64(-0.4842922493310519), np.float64(-0.27130342954803666)]",MKLDMKF,-1.5985820569147855,"[np.float64(-0.1313095812032897), np.float64(-0.5782973191144059), np.float64(-0.3843019810165814), np.float64(-0.337482968089092), np.float64(-0.11504036937440844), np.float64(-0.2515551445238819), np.float64(-0.1541555810974277)]",VMFQEVW,-0.1868441942157686,"[np.float64(-0.1259407974887463), np.float64(-0.41180563320360125), np.float64(-0.20943366253316517), np.float64(-0.2121358261999587), np.float64(-0.46223096380945944), np.float64(-0.36831203124148165), np.float64(-0.23878055929552042)]"
+spectrum_6,PEPTIDES,"P, E, P, T, I, D, E, S",-2.053088690605781,PEPTIDES,PEPTIDES,-0.4574868196032306,"[np.float64(-0.4782830729270199), np.float64(-0.5837703088047349), np.float64(-0.41312033453939045), np.float64(-0.47143215191802523), np.float64(-0.515236660579557), np.float64(-0.30755610309096204), np.float64(-0.5139349507413116), np.float64(-0.22425886715132187)]",LIFMQWMK,-0.9718403755327223,"[np.float64(-0.5537780669436364), np.float64(-0.27390837184539524), np.float64(-0.49662970813372276), np.float64(-0.14659761878274918), np.float64(-0.3815725457345243), np.float64(-0.2223147416057317), np.float64(-0.3304959385027363), np.float64(-0.12144100705013827)]",EEEKTIMF,-1.3864529295124846,"[np.float64(-0.13118130757164556), np.float64(-0.14683209030189098), np.float64(-0.6463128810875516), np.float64(-0.46430296742107285), np.float64(-0.23572900587520723), np.float64(-0.6496205181336915), np.float64(-0.5155505175747892), np.float64(-0.35797428711694096)]"
+spectrum_7,SEQQENCR,"S, E, Q, Q, E, N, C, R",-1.0126884990735847,SEQQENCR,SEQQENCR,-0.6293656274252704,"[np.float64(-0.23818275135967854), np.float64(-0.2508946419694623), np.float64(-0.6141565126783476), np.float64(-0.1307273604299081), np.float64(-0.35476840089447226), np.float64(-0.15097470620351006), np.float64(-0.5457070454013979), np.float64(-0.3041100426845633)]",IRKMLTRR,-1.6150934193603126,"[np.float64(-0.558675777467587), np.float64(-0.5622891586192849), np.float64(-0.22262640968265274), np.float64(-0.6096692929137849), np.float64(-0.60962553974469), np.float64(-0.5755674316081473), np.float64(-0.16979723808362127), np.float64(-0.34714551812652594)]",SAKWQYTE,-0.5875916687723126,"[np.float64(-0.2739061808702664), np.float64(-0.685293229754978), np.float64(-0.3508388887063106), np.float64(-0.3221883856756329), np.float64(-0.2870135323655783), np.float64(-0.54841910271944), np.float64(-0.5390441543024372), np.float64(-0.6366216069212395)]"
+spectrum_8,PEPTIDEK,"P, E, P, T, I, D, E, K",-0.16809729420639122,PEPTIDEK,PEPTIDEK,-2.093856462922359,"[np.float64(-0.1208538841340978), np.float64(-0.10593924046597834), np.float64(-0.6739968247853784), np.float64(-0.367416235067168), np.float64(-0.4835849174484238), np.float64(-0.6434274917192637), np.float64(-0.3176606262906486), np.float64(-0.6890887991821578)]",QIGHQLPH,-0.8409884429664322,"[np.float64(-0.3865912610010219), np.float64(-0.2028054767104919), np.float64(-0.3039296650947225), np.float64(-0.29360942025434017), np.float64(-0.37896427688065293), np.float64(-0.2853872399265587), np.float64(-0.29539848571849486), np.float64(-0.5026579451662836)]",IIIPWPFP,-1.0115963336373757,"[np.float64(-0.19937714531373263), np.float64(-0.20444283504480595), np.float64(-0.4137493139784607), np.float64(-0.1179623028073559), np.float64(-0.463476904532115), np.float64(-0.14814939996838383), np.float64(-0.10762393928338725), np.float64(-0.28369617232260474)]"
+spectrum_9,MASSIVE,"M, A, S, S, I, V, E",-0.3737191989139599,MASSIVE,MASSIVE,-0.3754229707148469,"[np.float64(-0.29502760377383414), np.float64(-0.16478904447910184), np.float64(-0.15847935136804028), np.float64(-0.1258718950105468), np.float64(-0.22822452773890872), np.float64(-0.5553259322536723), np.float64(-0.3947160936618421)]",CTCTHRG,-0.2812216804756174,"[np.float64(-0.6891268220162872), np.float64(-0.6443499230468431), np.float64(-0.31203511169866915), np.float64(-0.6340747505196449), np.float64(-0.46037813089614893), np.float64(-0.32877982205598205), np.float64(-0.12267281460805764)]",YGPLTDS,-0.532976432492643,"[np.float64(-0.1364500400584697), np.float64(-0.30311192763712225), np.float64(-0.6029049630917274), np.float64(-0.22560730892774605), np.float64(-0.11261870574384422), np.float64(-0.20535709613858066), np.float64(-0.22901651262969214)]"
+spectrum_10,PEPTIDES,"P, E, P, T, I, D, E, S",-1.123583176952855,PEPTIDES,PEPTIDES,-0.8771661417424551,"[np.float64(-0.15740286452934388), np.float64(-0.5074670830930991), np.float64(-0.3896039471783252), np.float64(-0.2919916248094169), np.float64(-0.613705876429404), np.float64(-0.36234259703071175), np.float64(-0.6580083234454621), np.float64(-0.47056220486219613)]",AFVKDGHC,-0.8078643993412195,"[np.float64(-0.12952893636940435), np.float64(-0.3982594844453759), np.float64(-0.21175494102021075), np.float64(-0.4267197749543828), np.float64(-0.6299712021605661), np.float64(-0.2561911794984124), np.float64(-0.14586440600122427), np.float64(-0.14622131508054437)]",LCGIIIDM,-1.228990618946165,"[np.float64(-0.5282440472509209), np.float64(-0.13380557816112335), np.float64(-0.21462541230119056), np.float64(-0.18639319964827125), np.float64(-0.6282733718101716), np.float64(-0.6357346822374799), np.float64(-0.11126765809420396), np.float64(-0.438963993274627)]"
+spectrum_11,SEQQENCR,"S, E, Q, Q, E, N, C, R",-1.1969333886900788,SEQQENCR,SEQQENCR,-0.29889443686511197,"[np.float64(-0.22212422781795355), np.float64(-0.24054453074744123), np.float64(-0.13167973941768366), np.float64(-0.2097854855152427), np.float64(-0.5342795186427891), np.float64(-0.14851338166389896), np.float64(-0.32724699796368095), np.float64(-0.17678592070136948)]",QRAHHKLL,-0.43457509864518357,"[np.float64(-0.1105858335564288), np.float64(-0.357580347006545), np.float64(-0.22107648944414238), np.float64(-0.1794892099187191), np.float64(-0.1293304830617529), np.float64(-0.5495110663772587), np.float64(-0.3057471572887846), np.float64(-0.39238588476482134)]",RIPRPPFE,-0.36305519703656475,"[np.float64(-0.2475028944380424), np.float64(-0.5145985151342626), np.float64(-0.31922913307694395), np.float64(-0.4758349798382712), np.float64(-0.18318022233364076), np.float64(-0.44760475505366243), np.float64(-0.25799722705786743), np.float64(-0.3578587295327179)]"
+spectrum_12,PEPTIDE,"P, E, P, T, I, D, E",-2.2627327346165718,PEPTIDE,PEPTIDE,-1.1922520920581292,"[np.float64(-0.14041699433925908), np.float64(-0.18442234802062135), np.float64(-0.41903800577692385), np.float64(-0.30208495832668303), np.float64(-0.3958922269572428), np.float64(-0.2514673415874037), np.float64(-0.3654640324379145)]",VSWYEYF,-0.15609828181522786,"[np.float64(-0.22758344432549726), np.float64(-0.29888575717371463), np.float64(-0.3183888424650262), np.float64(-0.6674141573377196), np.float64(-0.30447432496258514), np.float64(-0.593466811610443), np.float64(-0.4030341591544194)]",LHLNHAF,-1.641115699845967,"[np.float64(-0.5472777119191542), np.float64(-0.5876023438768648), np.float64(-0.10828021127317443), np.float64(-0.24339750160847565), np.float64(-0.4115265979952508), np.float64(-0.6751902564015971), np.float64(-0.5386242171490149)]"
+spectrum_13,MASSIVE,"M, A, S, S, I, V, E",-0.4207237803745915,MASSIVE,MASSIVE,-1.927524559354949,"[np.float64(-0.5954686500037494), np.float64(-0.2774041402881911), np.float64(-0.5414867649227039), np.float64(-0.22566817272455073), np.float64(-0.6120338783723007), np.float64(-0.6873166164036753), np.float64(-0.45602877933358993)]",DPCSLQV,-2.2121470608974816,"[np.float64(-0.3086323809092415), np.float64(-0.13948249366539656), np.float64(-0.2680113890894485), np.float64(-0.6930560949306519), np.float64(-0.2602094457872819), np.float64(-0.2129751198875856), np.float64(-0.2179920740653772)]",AQAAMYI,-2.2216755886861574,"[np.float64(-0.2656756270782555), np.float64(-0.42577567928280485), np.float64(-0.5856596612114177), np.float64(-0.5007678184076443), np.float64(-0.5132153631989125), np.float64(-0.44772654204289475), np.float64(-0.19755642473161852)]"
+spectrum_14,PEPTIDES,"P, E, P, T, I, D, E, S",-1.9730880887099862,PEPTIDES,PEPTIDES,-0.23152997839443845,"[np.float64(-0.11911895086990873), np.float64(-0.24796113087805974), np.float64(-0.4072553888648447), np.float64(-0.6009311131221478), np.float64(-0.6459861183890181), np.float64(-0.23510901593709763), np.float64(-0.14829075674975176), np.float64(-0.6804079360614733)]",WFNMKYPY,-1.026264660310471,"[np.float64(-0.11194576854520978), np.float64(-0.506379937734463), np.float64(-0.15511785713479778), np.float64(-0.39597843192157367), np.float64(-0.458242766740372), np.float64(-0.336923656702095), np.float64(-0.2157812372341087), np.float64(-0.23802400704354176)]",THVNEWDK,-1.763136617868833,"[np.float64(-0.6889419867133187), np.float64(-0.4844283976885137), np.float64(-0.16400075165874992), np.float64(-0.2011703637060572), np.float64(-0.6803127395724825), np.float64(-0.3031217829593479), np.float64(-0.21027886324430187), np.float64(-0.6270096578994876)]"
+spectrum_15,SEQQENCR,"S, E, Q, Q, E, N, C, R",-0.6422981553561771,SEQQENCR,SEQQENCR,-1.284376255615888,"[np.float64(-0.30971457215833853), np.float64(-0.23389916096201158), np.float64(-0.4830823558098809), np.float64(-0.34373185888498303), np.float64(-0.19123223382020205), np.float64(-0.3030924824557671), np.float64(-0.42859740486194287), np.float64(-0.6329497140592579)]",WCMPYEVM,-0.7609435490228952,"[np.float64(-0.3439325461165797), np.float64(-0.10613573184789903), np.float64(-0.5653739536078608), np.float64(-0.4218009270107796), np.float64(-0.27634917662698405), np.float64(-0.14051550817522762), np.float64(-0.176729643544849), np.float64(-0.16337292259842476)]",VLNCKKWG,-0.5689391720364284,"[np.float64(-0.1711767795818404), np.float64(-0.2609198214766174), np.float64(-0.2291776759203892), np.float64(-0.1600739877147428), np.float64(-0.3882773714178049), np.float64(-0.29555260657944565), np.float64(-0.32826020629954655), np.float64(-0.6253858941986579)]"
+spectrum_16,PEPTIDEK,"P, E, P, T, I, D, E, K",-0.1396097391739677,PEPTIDEK,PEPTIDEK,-0.8580417332931659,"[np.float64(-0.36418526090876374), np.float64(-0.3750170473528839), np.float64(-0.5575436066762), np.float64(-0.6385560199240516), np.float64(-0.351885897096517), np.float64(-0.4576945870020608), np.float64(-0.5347783329153784), np.float64(-0.620151848601525)]",KEVPYKAD,-0.8307558545201461,"[np.float64(-0.16665506888765855), np.float64(-0.4580902089789952), np.float64(-0.36259850517015), np.float64(-0.5314076669237787), np.float64(-0.4903893178624768), np.float64(-0.4222928673951614), np.float64(-0.5519651708245514), np.float64(-0.3675151987697704)]",AVRCIYPR,-1.2215795426005904,"[np.float64(-0.44094024791973685), np.float64(-0.687912908195114), np.float64(-0.3110916469162062), np.float64(-0.4517853577435027), np.float64(-0.5866364468446136), np.float64(-0.21182272556682824), np.float64(-0.24681439659654547), np.float64(-0.13596327596018673)]"
+spectrum_17,MASSIVE,"M, A, S, S, I, V, E",-1.673576044172536,MASSIVE,MASSIVE,-1.0416352236295878,"[np.float64(-0.18011733411718406), np.float64(-0.44309690273126257), np.float64(-0.18546967224676997), np.float64(-0.6725901169561864), np.float64(-0.6913571219485906), np.float64(-0.13952980371681353), np.float64(-0.26191907305593165)]",AVCNQEV,-2.1064853589028734,"[np.float64(-0.49941457645173465), np.float64(-0.5613620275510856), np.float64(-0.12683698539601138), np.float64(-0.576995184417916), np.float64(-0.2158555530642328), np.float64(-0.18840672391507085), np.float64(-0.17063769574233772)]",FIDVAFY,-0.720662642157043,"[np.float64(-0.158819522076645), np.float64(-0.12507331977330707), np.float64(-0.3182599244225264), np.float64(-0.4554459665475208), np.float64(-0.11464413536235571), np.float64(-0.55061860938854), np.float64(-0.6838185169116369)]"
+spectrum_18,PEPTIDES,"P, E, P, T, I, D, E, S",-0.20385092564668866,PEPTIDES,PEPTIDES,-1.8243023305661654,"[np.float64(-0.10766780647118387), np.float64(-0.3684554790772888), np.float64(-0.5733129192232955), np.float64(-0.12679615833858135), np.float64(-0.5684051802426422), np.float64(-0.4343058484652547), np.float64(-0.13487630398608988), np.float64(-0.6298786133036921)]",YEEWHLMF,-0.21998080903850434,"[np.float64(-0.34970116792464323), np.float64(-0.12510327399597215), np.float64(-0.19074060012399183), np.float64(-0.3713073916915177), np.float64(-0.11676484767902127), np.float64(-0.1917769333555045), np.float64(-0.3137903773577807), np.float64(-0.15334435903655044)]",VARIMPMC,-2.155753908791302,"[np.float64(-0.28882205067883104), np.float64(-0.39508986900075516), np.float64(-0.42653791045742573), np.float64(-0.3632701174324455), np.float64(-0.2525320992759492), np.float64(-0.522647013018777), np.float64(-0.1830132796459985), np.float64(-0.31472506833943226)]"
+spectrum_19,SEQQENCR,"S, E, Q, Q, E, N, C, R",-0.11296685048715463,SEQQENCR,SEQQENCR,-0.2554346478178205,"[np.float64(-0.3833345369220081), np.float64(-0.15675178365843412), np.float64(-0.4655941268240131), np.float64(-0.12209625224365629), np.float64(-0.10652893116790317), np.float64(-0.3646981732451785), np.float64(-0.46439278292724706), np.float64(-0.11723563636988465)]",KLQESVHN,-0.5398811568573243,"[np.float64(-0.37747579886843446), np.float64(-0.2831553354880077), np.float64(-0.40827869647373755), np.float64(-0.41762472834219044), np.float64(-0.18005039232318404), np.float64(-0.5154473495901091), np.float64(-0.22414917531599954), np.float64(-0.24503592841782743)]",HDETWCYM,-1.2772633021909128,"[np.float64(-0.2731991203786418), np.float64(-0.1850045539068488), np.float64(-0.45263593167185406), np.float64(-0.49574184487856604), np.float64(-0.2299389411312791), np.float64(-0.2216644179226556), np.float64(-0.6078837089643009), np.float64(-0.3635693939132283)]"
diff --git a/examples/example_data/spectra.ipc b/examples/example_data/spectra.ipc
new file mode 100644
index 0000000000000000000000000000000000000000..1360c3358d1e0efe191c6bbebeb32d1634f62a28
GIT binary patch
literal 12102
zcmeHtc{rBew>OeVNraMSQ-w$<4QrW`qEbX<rbNcaJU!-FNFohXGNegL$(&4?=Xsvz
z5GCooeZTw$*L%)+&mZp}=eo{*uDw6|v)11G-osk=-p_q)iSy@`RD~!g{t)3T6$J$~
z1qH<<k>5lhH3cP+uBD)-pe0gjiXVi09U-A5ev8`0w1X(}l!#Y|$VtQzLY7X%D}R-s
zpx|5h^XC|mKSadE@{}w@T+*E+bY?`Jaap&D$PgijkP}^@I7{FL0u4<_`X-i!Bs)!Q
z3kz*K3Nj%-Nyw3iE8DzJ=r{=Z$~L=}+FTr;AxZxx0TizZ*<$^h_Dc;D`4&R9nZSS4
zUnE3WlrNUG)Hk=%H__E!l+hCN@qc@kA|YSwKLZgh^ohQc3{6cmNrpFxTF3r_I&S|`
z$IL=s*UG}u)I#&7wxuNng&CpaCEEH&pB5V@!ZaaA0vG3#mV)lD6<I9v&-%L8wJomw
z)z=iF>`p@Wk2aR(dhW01(ooP4wHMd+pLK8A6O>|v(DM<vlZcB`K}jTx6ih^EMv9Gp
z&HiF73W|B695o@OC-RnN`nn_wtDBmJdjDoui)~RX4ulAQ+t(`MnaqE+w~naWN9Y&j
zi@}i?lpukBa|Ytsi~U>d{U(Z~;V%~Zx3FCI--Js97bl4b%lJ3@Cy!nhtZaOVFYB{c
z=$HAjp1d@#%grtGWj!t$;J<`rzT7YJ>VHuZ*<~KTBu1Kl*DrIe#i=1eHtXNGoG$a_
z`ee3c&EL4pm+LP|*Aj8%T>k&&>sHL`%IB@rFZS^tc19S=zj5My_HX>)3NE~YA6mgh
zR&dc3Tx<nDyn-L0_;)`L5?sNBR`7!>xbO;oXayHp!9`bau@(IA3QlzJSgSWft0#Wc
z9ORYX9g3;h<?Xp8K{#aZwBd%0KL#Y`O?ViPv2|{P@l^sZm1t<&_B&v%L#)eZ<`CSW
zP*uCz+aG7%`EVo(2O+<tx{SjbdrT^c=6<gkiX}$<RF!eT==^A>!H+Um9L@Vk^OD{b
zMdwE+i&$JS_<VvB_f`)Sh+92j_Rtp(SWc!|oc6}1%Fk7;1@1^J?|&MW-Dnw?-EJlS
zkL1hcmT}qq{v*CpzOwxCbC%`*Y3B*$V|uQbKQ*!FCl~lmVtX=5B~{LOF~?!RsV}^j
zO`oG|D$|BHff1PfYE_{Xfe(zvZmZWv;Z+tBrh9i2Fo&IeoZ00$MtD|;MD2fx7M+7O
zS7&|EDMkA2%gf35K-vFR(UWN0X?opdFh2%wg;DN&v?>N0zKX2-v_1q6?*10e#vFzf
z)@SH+!V_>?@^H&3%NTTwp&u#Bx{d!;|3&ZB7^@`lU|*tAr1#iqjN4*sm>MUK0<r=O
zqtx<v_#_W^Sf(^4Gtk_ctT=>;!hMCU(-OFSbZdR!m=tDhGrBLlQxenWkG!3y62s$n
z7Y>GZO5o~B<F|L^B(QVaFQ0&cGZ>v1pmWz$0dEH0rhak<v0?q&sFF}=yb`v)PAgds
z^{yM94RGYaxQ&dbTr?HXRWKpHoP6rP>_06rz_T)EFUU*FC@hY34H3^sC@agW$Sl&o
zcD5zG@-kPJId@J*QR@66Uo5|*U!?!-`v23P|JR_2WO+N`T@svGLY*ZHSi<lne6@tl
zOZa06H!bbYqD!c^gp22Z#SpZFZ<p}P64owZ`x5?H!j=2Q->v*JooS!>ZPnt7n9*|3
zOveT@#gt~I9j~GE2>GnNh8I@1bMYLca>2oQOP9CeH;|(vVUz57S8U?I*SZ>7cy-$5
z>ejabs6!*@Wl}~$nQCU{y}6!PCHMQ?lR_KJ<ucN8%63KGv9-Eq$!@ssDuqpdtQi_I
z6zfpw2ccTQNagg14?cK2YoQkqh{6l+zT}(=KpX#;$2%w7aiNRuuJigp+!xvFaff7p
z#$>f4JhJ9k_JDbo)yD-j%BNGxk6y#Lytr$HTOIIN;`M{^<DMuLOJaT6<cY^iH?myH
zamScnBR3Y_TH*PHk<O_BH>8od6~k5RfRV%BKPi)fup?#uJ3oa#N^{#aSsHs|Qbu=~
zfR`mw)iy}!#E~$xt})MXoi<t)Mbv%a@J30uU-fZ6uj7jdX-%^<Uo1?uYK_V?!!J{F
zna1J)$Ts-(p_GCfGIl1s390qP)z1eUnw104|5OAO?y<#9FQjUmJFjC@eXoEtlO-07
z4{)o?I%BI6U1iEHKfHIw<i6xZe@yVCR51@$$3rh8ZfZNZ;<GojBdKXl=&9p%R6WxI
z4Z75{cg!2&+MAjo@yG2@!ur;}si%P$nBOVC!^9Tfp2}=Ll}fCarMG{2uNSJ81?-bf
zu}2QCo#{2ixIyr7(?g*kq*S{qk!VOlPI@5=K@L|W1??`4<+H(xv8X?9Wvo%tu!H5&
zGc9DDo||it_Q0{RoS*Vit{C*o%QyU#4%Y0{S9Tk*!oVSwm+sF4kV$6m^y`n7c;;&A
z;ExhxJg24O{Jz=^Zx<IS{+jg1QB?2JZZ*Ta+ezwK<5qa!%GJQzRbF_=04l$a`{8>n
z#c>}Ie~is{&mE$0LbjSa4d2)Tu<;|?xmOJSC}GoT6lxZTUCCFvEk%7%qUZ&8>Vzj!
z$UP1FMsi1%XJvxzf(}^5y?*9<a3Bf>Z91Us7KE2blqanZTch`fw-=A!w83X7vNA@6
zWX$r;$W^o>?CByyw#!j-<dNmdh%@&_->BGQJLs)3Jv_rb(pCeNPVDzJkn_Yg!G$tC
zQ8N^qIw5uDb|AjJC(I*19f&ksa`ZIK0a)InMk~+cfE3$bvYfHMg6&E2`8`Jik!?<$
zzUiVf9$>t?cStD+^-nviGKvRbemSgpzRe#GNGR$vR#;={6FKSYgAN!KJ;-r@(I02Y
zF>)`MZBaDo<_^6OQye@j#rmVr4O5EC?az_|v2zvOjvH^iutW2CmKm=*+HW;$dTDYE
zt=G(-TW{=vhYJ%Wbbkb)=x>XgMv}%jmAggk=UZ1yR;Z$9E%isw(#xqn-@P&D`<S77
znisOI?n>zD^g?rHdVwTcdu&Vf{Q2RtHM*5vbTH`l#ZBSosVWD2G51P%^01pfo;LNJ
zzfc~45s9dj=<15??2)xY;;uMiYA@No%LSiCjclJtx5NnlV+#QruHmHmAyFA$V*MxB
zJP#WN;*8eU%K^-uXt6cPPc+^Y`G)f&g`+%icynv$>l|lndeUigGTs}Xad5nS<a!OW
z5Bq69o+6`1(yx7g%*d!!y*W#C-W1>6mH1#C=ZpcaDor+tj>va3yl=aO2ku=(^Dt@N
z1B1klUdxHl#tQ)o-;B;X;72yjKZ0o<7(Q;D9VSbh$IN7y(?tCdLpHESJo7|T#)NNf
zH6}==%9qqO)5DPJ7otW&p184AN?`r02I_w14ZIWSj;T#P+VLTQ7;Y&<c|*w^y*Ipm
z5X<6-4$pQpB#C*V8H-|?(JC@ty#2n_%+?K`I`lC=`4xo5^{u6Eteo)nnq=Dz{0^w<
z<fXwk<AWE<KkN@RvP0IF>-#Ow1Y&Bsx=yIHKiYoSamQWT8Rv%70%cD4;C;g}<9C;Q
zaH8r@{Op$NxUQG%q3src$^DXil^0!+D!$mjS;G(IGp{wiM_crm=f9Ds6o9K`eKxvs
zI%0y0RT7w8#=@Jd8?wU#@KO9WLxwK~*l>K`*KMM<Sj6^G%%UL>tEJo6G+P3Z<Kv#x
zY;?!8X=^p{UJsPn=oI>Z@S6g?3xdZot?^1?&s<)*ExMAPQ}15ug@<q7`m|Y|=;uBq
zXANCnJh6S>Uc5;7>m|9%>A412RAuH`y~Y*A)hmy@`Kg2NPZmFp3UkA`1jV&$SzU19
zyt|q$;rHAw0_^<1?J%0FJ=y!YA(944nD&Mnqn)&!eutkA1_~7V?05ITHg9q1iyhbS
zC-?pu_jx@ms2$u*DQt=hm-yJ$TUekc-+9OPgnwz5PV3I^x`otT=cHObdf|PR@EF$?
z2W;rmTD`N`8;4s-r8TVqSf=quHl8&I*{7%{?WuI}u|NRtHgiW5wzu7J+wD4{yn>YL
zE<3b3R=ZFTZi?G3SoJBM^TZTdj!;H6M@$dXDL%<#iQ7l$LtI6yaOdC~-X9+JIKUsr
zC)ZBGq1S~B&of=na8ix_?QShR&wW)WV%`cfYhIJIw4IPqGCFw8CJ*d-*Mh}PR_Lg5
z>(nO)BRso1hBuT+AA6=M*H-K$VfQB2zS_GkcuDApZ}A!<JS6GRwC{T$qAL@t6`u#P
zl!VohI4m*p^qsdSsJ(FV(AuA#ZXS5@zA(MNzCBvBn%=OIHAG3ZiucN^yzpB%nMW+w
z6&H@$O1^3E!Hz%VtNb~xD4Vq~k;3MWJp4wl(qi3EBE+RFyUz`49!C%N7x?3g;n8C}
zla6?K--O+h+eYZuJXO<ar->oL>c8i&_+YbfT(Eek2P*h+)M$=+A!Sky%PVmUJj(6-
zz@^>@J1$sOeUZ6>9YG9pRzK`8zbjF{<5B?XzQ4BFi$uog5=-M7eV!OMD?3@<Ws1*6
z_Z^>MG)J^h+)cej8@W$6nOtx+#j*F4H{T9hVI)vmP8~ACHCi@nAKM#X@`yY4S*9Ck
zrq^N|#X`ojcOxGjYBop7EYll~8TL5J`sV$;ldkyO*fXo%&kHHQ*k5o&3wzg`ft)B`
zjHI;r(^=?&Y_l=H<CFC<UhduYyf^;%DoBj#BuyZOs&2VMuc?b%-`GFXRa@Z)#*u^1
zN_?>`pjXm%zYiXk;*n>MF~ivwt4lF77HB5RcBO>XfmkQLyx?pyrhV@gG9~O-QZDKE
zWeNxEv)=g|c9YR^YmZ4Wmk+LYrTh}66Nr|zjwy3*)X|wsxw?470?&OsrM;)r9_R00
zp*h+>M*2ae#;*1Ln5<gWu~*js<>>Nw#fyEB@^TA(Rjv*;i8P2ORQTcR<2%>0E9v0%
zX1|Mh>{pSNlTl*_@&5l@TVrsX?-q`2G@y%64ZwNN=#zrV?$|KR6ak|FC_XHnFDImq
z<s7814go<(F45-J8zrNDK`l!Y$s5JZXxoHE{cu0)dh?zO-WY7Do+8EPh62AFXcN52
zxZnSz=GS+AnEPU4Xj_pvzEP@92vQ5epq{>{=I!3tk}RRpXKRka``OFqYP|89y3MXe
z8aHeX;&Nx8_D1_WW&Twkd~mnbF}m;P3~>MGO!?$xSA0P;T1!?5#6t-|S|D$WoKF>%
zzgN2>bRUOdY6m<yIcRBB>Vgq;YSP~V-LP)v)WWe~H+*)ClYcnE9GSXDWQVl8uqGvi
z>Yk1PerB{ku&d4=O}0_jtlQv<jpQrY9&_IKHC-h3)d_d(agA7`d*Bv+4mfRchIr5Q
zK55pGr1HiifoDomSNo!4D(x!Pd&ao0R7lV~#~d9*!-phAys?D!;)_ITSA5*Q^^2yO
z4}S1USNg?#9Rov@ZzhVmq3DB8m1}M}V7`4uROz$}{ti;(cH!{Arr^>}J9Ndl5tW^P
z6y4BMS%V|Ll#IXD>s_5jCp4mMX$`3LK_w}lN4=Ss(U0F<(P67IlB-%*8Ps{>DgDhS
z&94RGmWT9RY`lRevf;QS>wThq$xdsD<AyjoD}B1TDhMyUo|<C(;D@fWtkm*eH_&H{
zA$J2|-#Mopf8t$#oQexpd`z4x+t}H<1c>uBWA>?#CvP0l_O$B#-3dC#M<2>j&u)js
zR6p(fQ+(0utNQoSwd%N9!_EAjs0BJ6eYy)DxZ=H;!tCB#LD=*44fz8tVLy94B_d1Q
zFt~y=9&o`QYYuq2k_o%9nra`FiZ;U=Z(KA4*ln;ro!61=l?@h+ijcJ<9nra_^I@)p
zGrj|kBPPUmluP*WC||!FT0Z>MFReiML$7p3^;-|L_>eP(E)J-FIY3G}&JShwd~!&P
zb;q4k!ipz~tugES=b<CS`glxLD=^NwV*0C-TUdDAu<R8{u{Ms3yI$Nfc4T)&^;DPS
z;AcKKGf~#q%5ohmM3pvHGuR;owT7_Ef*#t;?J(6;@<C43xFD_&D|C)L`pM;L0IIjF
zGPyeLiqwsvbNaEqc*~d3STQ~bDYW#e?KAz5nU=Cooj6Z%j>yy<E_6oo?UuI{iT&y|
zTf*+vl0Y;n<Px5V^ua`lt;Uj4#yEcPBHxbye~hk?Jm6YOMu+cv67x?6pkwa=k?+Jj
zv_)=aWZv(LlW&SljxuQACAw6x=_^*qI&=6y-eouRQJ`U<|K@?~%sckp$szV5AvbFh
zzcJqNxohYW<%eeLY_=qq2cg@KX9qt3vG4cr=4d4Q;n8O?vuuAH@d-^_<E=6u9P({R
zJNMfOHI)?1vv}<BoA~_wZN$DAO(!!m@z4x^Uizw<l3;+Q6||CD%6+ib+V19Eem$hC
z?p~Np^uu2w<n%lB&dA4kxjeZ&0PC+m(x5Z)#^}$Dor9-5u)%~W!k6&BtyT=DT^zme
z#7!x(`#Wdkco|F*<RIbh-xu|K_xj-i`*yXj#Cv*Tbk3(D!3(EJcD3C>R#;Maa$mwn
zcVv^b=A#L+N6&T7CH$Sd@r5i2Hm>tTx3{*uJSjow?l8fB*e3wvSg#e#D|zC(zVv{W
zhhDhpfoSmeGdeio(A{=z))SeTj3;Jnv{Ce9c8<$)cf93MrE%%#Eezvse_fX2hUAH?
zZVpp2DzZP|<Y=@(xA!J(>o(jV{Q7H+#w%~k8&qYu$zqN&<#O^nYz^_x3!^sepH4Vk
ztix>W?2FNLkA7Nj^1)QCV84e+I_RP;`f_8F3+|-ZZs=X<inkO;b<)uj!=5%+9Wo@&
zqtuxbPd?k@eYeAhI(poYS~5W@max~cc()((s{&B4TjtFGof~?E(ay9mxZn}!YWoG=
zDAQHc81}{;75RLaqZ_R7_>?iNd0GH|_|+}b7pjdWw|U+tR$NEhtB=&|<Q%c!{kOxV
zE;o>2cF0hOhVa{WJj0$Lz8F3c5vx{7!uwTEc<#P%$L2%7?iFMccK!bt{-=IF`5;xV
zA4Qc5ZRVY_clR_xqNKpumqjhGd9Qr(y)|{Pe@dzS?8afBxTUu3+hh}*a{Q4n8CD0k
zZwck<)^q{yokXT;+EUodp?~JdnIX9H^vGFV-7laTH8One(Ks9*G14zH8-%``!MU)8
zO5hPG%U5hq14`1+R?}BQV61uNb81jO{4u8Ir8zhNJ(dE;9vKe8?QI#tHW$9YCL2~U
zp>@q*zg25sh^rW^BP2CmGmOBM#M^scrnCds*pZ$#>MDra)}!h8X9QlljagFlbb#EJ
zqbIf-cf(gh>E<m*`=Ld!n4zh$1UjNRN-d&#z<%f)r)pLgY&#Fl@tdo_%)jH~bM-M$
zH8mf0x99<;!h*(kBK;5{k`?(rXavp$zoRr0=!JRKy$6$T)<f~sWbxEgA*3*`Qm<3!
z1PiXadk%$<gMpPa>6vN`IGtq+`4H3xYh=e*ZFdjC!lWvj8*2@i@%8u!@eRO9tNCMh
z?li;A9jeZZhP~hvWa`9f(F?JU_Ap$D>4LC_n==B13*b|qUt6lgFtjO2Mml#i0Q1Oo
z;WW-lI7qrxuIQ2j>eu+>)5Ut=SEwFG*84ut`@G5Iw^0T#)V+7eo$Lfma62z%Q4Pta
zTpPy<dSU9v{up2L3ZT4Z;(lpMKa6>dTF}2}f%2nbO`iN?aIiIPuhr8M=m~i9Ihwu+
zK7ODQmT517Yb9AT1vR6PUdvt>Ycm2d)(vL;615OpC$6<iqz`mvFOqJg4}r^A{L?#v
z-SDwMMM#9y1xe1D;_(-1AjC1Me0TjgY<C*{?dekiX1`-B(>`ZFm3*jXm~kgO`NZc1
zZ!3V|kbQ=*TQg*sH?OIR82~i~jZFWUeyB@1H8sK60P8=cM;x#2f~Srn_wO`y0;}P4
zNw!xnsGq;Q^?r2?ge$Xi2@Cc>xc!5B+Ir)lqhyFpVx8dFY^(oDVH6G@964NO)D9C?
z5BGQkHvv1dJ&h4DFMVhDSm|y0fd52aZ=2u{SQo^tl^7=0Lw3!%gd6=}pj#MXw0#I_
zQi^G5Ye#@K<&U0fTO;he8WI{o+XBhI2Va>*7eM@;!$z4+-9RRLbrwuy!$4OM?Mze$
zj9TxGqaysJ=Y!exNBwnhPR-?s0<mr)qZD)UXS2cl)^~vu>^X4OiS5hAo*{7L-sm6A
z)eHsuQUYd8ze4HE+@*-)T_8&0*AUSfgzTv-m-3n_m|q1^v3q(!=JCfnB{BK1D!3$V
zIy)Dx_RBebA$I{C({Pp0mo5l*b!n;(>IJ?D<`gfE0WkM+Ql2?84(a>(><cz_!O31{
zsc?!$kl>-~p4>77lTzo0e*LM18S3ver(d<hX-(1C(7G=$)wGqW(JTv;eNLTtFfs<#
zyT8@HmmLPh^K#zyC;A~gp^_<nx)sEB6bGhhR{$HG2AT47E;uuuOt2?+!#1z?ns&_P
z5FvTpJ@RQWWM)1deaO}XoCl=*7_|z3BlCddz1RZq)5(?7QfYu~J!3b<uaCnk{R+x#
zmkQV{9p`0ob_}LP2G*xOX#v0Mm!7tEjzO-R|GE9Lqwq2~^nlfiO6Zq7#i~}E2kAR&
zkG5VfgeNxVr>~Y|z_j-nIl7kxKn}TouDz`fJS5Dw9H;mKoWVa?y1z7ldH(uo1)pK~
zm?Oe?Y$g+UntkUv;)merX3YSP$93@Kxt7)=!(wocZVG0X?uSPeKbU0B_JZMA)&0@7
zBd}IGMdg#m063jzdBLta1o?-#r1r<w0ZFwHnjTg_a8?8R#mWK5;q)C5amt6;j|RIH
zb^Ae-+MWHmz!z``E#H2>Z3rseM)L&x`axY;&%1qpJ{X4bZq5tMhoD`~>s%g;1FAf@
z1mBw>KyD%~BdZzeN1v@Kj~ase7dFx`JB)#Cmv>K5#5gqON)D#<4#4%{<or;k7C2bB
zn_a~yAFR}`bWes2Li@EnW5Or9VJ`M}OId9jaPR$H8FFS61cN1Ba@{WiAEC2TM|3-3
zkV&RjN-Q0OZJYi4k~=_pr_5=T{|s;E8`g9=7Q=z@SS(g4hRcce50ZO(;YzG!?vqyo
zK*`F`AxY5#Hyg~Dw+@cM%=zOawuNS>xub8W-&_Y%nQwVT#74k2&hI8kt{U8pS=$;W
z#^9vQ=hb9`bg-x}5FFVx4()0dN1lAifnAX~u12jR@QM4vn_B%YxW}O6va4hqHXb=4
z!hPloZ0sTP#&dUr`#ANTK>0B^Y(XQipx*|OJE(>i#=BwA(zTI2X%vjhYb?1h=7Vdn
zhuSlX5jbKuV=>9n4aI->OTQPD13B5v;T~%noG(kgsu(c{ElfEY?X;cnrz%6u<MueL
zWtY=?x}^ogYxcLjE*bz6E}dQ}x>gWyF_E&~Gz^!oRl1IZcY}Iy7DN5EJkTo+z58Kp
z4s<#CT1g%*f&3FkE>mWw!-K(RapDeNfa;6ab*3HtpqKGybxn8|jGorL#91-`{PG43
zMJ}Vj);IfIcYP0(C{SF>B>eXW?S>sxl!LI=hb_>bc;Bt76JnEHXo1k(4CCUF?T{N=
zaaa5JC}=fh<UO`(g5k@?9T^l|Fw)JOK9n~IZY7y=3CCMOHn7Y4!?s??yEJ(47#*<=
z;u|%p{l|b-<$(cTW+^zy94*Z38in}U*+@IhP9QmUoY#=8g*DRqM$Q^{z>|xnS%t>q
zFz|ET2R_119G|D~tyO7++lN?B?lJF$$-MX+r=vBX5t5#@#qSFY2}Eyy82c5{J0H5!
z1bzjTjr-E?o-Tv)4zbO1#U&7_wyxlKd@qRWJxIIIQ2`3o%I{)ma=`q`_vud)HPGdh
zWOXOAAEq9gd29a~f%t}G+T`(0ILX41f7PoC()%3sOt^}`L4Y~0lcfXBM6Ze2<ktlq
zZ+QB@T&;m<9k<}Xfm*mQuXOX}t`=B-uin?<NG4=PtoM61)c}sGPxL8=Rl)UL5^nxd
zjZiTf^e`==7$Vmm+uPYN0vS{TW=|A)pwilYfF-vN-t+j<-5VVQu{+9Y+k!gb>s>>N
z0~PsD8;4E&&%5AVMcIdzEj?g6<+hps`2b8`XN<O&YKPZ~q*?`+uOL_`e!C&F4o=5(
z@JXL-h8EAF{YIs|;1?msZ~CMgay7!eB?p_}z%yN)u>0dcVtbG|K+MzADCctLBWbWU
zSnCe)#}5Q3w~%wIJD_E%Q>ZPj2Le+`Dk{uF5OCwbJ}0&!AfGOFH-9?}_d}9?Y$yCO
zy#LKP7;A;+vuo;uH;%z;zJl*NGREOlBl~$-kt$%yd!ea*f;b0O+Khkf9EA$n=PbIv
z`r-7s9f$U-=7L!TS&#eOAS8QOpEjo-hNjAo+~kS@czNVE#TxM;*r$7UE`4SQ!i-xJ
z*6*o=YA3;|il5aW7Vlu1Z`lhMPxo-9UK)i=(vbJS{RVj0alb8*vj_MK_Pl&eyq7%{
z1s*Ajb-?8BY1{mqeqe}deJaf{3b_GYMz{HT;f_9AeQ#zLoOtk%ZTfjTtP`PI+f&mG
z6V4s!CT&enQZ&;^H`N07RW66wdvw7eqb3P^yF56iuW-iWZZ)|19G>;4?E>1qs}X(@
z?NIgGV`Mg@5!4#5dFdS=fI~cIRO|Ws;Wp=%IepU}_^!!g6Xw_rjSK@5=GFbM{j0h3
zM@!-yd8_Hu>Q%!qSKTHlT{Q;bTPgT=?rH+@XRjVrJ6FT&5am^OOWHu4wn}mHd>#0v
zO--{8_P`Hmjs<q>QD{_r)l04}0nx;3q3VW(K+@ioMCPgmbG3}LAi~cT)?OC*BijS{
zt}3_rZWqFo$w1~z{3y8P4^yj555mK=yi?x&nb1Br`84u-FT7Ng^|v6-&$2EdcavtD
zAxX^mA^rVEh^Q#+etfYNPCXnjReI3{btRsD9B)S9H9t?by;>7wzi8<+(P;t_Y(4nQ
zYydQ$zW=3kqyvOAO~o#%RKo*7d7(-14v?_^`KF$x6x7*n&0V-v0k6})wg(4x!u$#P
z)$hF9AY=WG)%*dYU{AF=c$;i9#280Xe;)1zczTy5_j)g!%3L_D)z=E8Ath-OtlgmD
zA@yVrO)dP0p$k5gTnT-jpKFTbc0%%{d_OIf9^ftGwdMWR2>Y|@8}CMTL)!kSFVeXq
zuumzTvgBwt%$oCFz5l%*QWcpMqxFVBt-eIQFKHY;ioE0_h4z5iuHP-+Oow3V>6)pB
z#CgSPYgrqo!5APZo6T;0Dd4Wbh2FAOAS=g>R&Qto?y&=}&lCQ;P^!rJ-Lw}z6ivo<
z=nVkzWlT#hdKj{v8p=KSRt$STxlpV#9|12NkB}>=z0lcZxK_fhA3ja{k=ETDg<9&P
z56pymVdsy;pl_LtVE=I+XCUDZeKqUoMwOajK3q~qJTn`v-LPP>+%XL6)~Ic>IoSkE
zAw|<(j)P#6xAq{FY8U8jn4=hyuYmcd=k}zt)IpoSlZSF$2DI%SFy!<Z1(KM`JEN<`
zz+Hd$enx3Gcu~I+b30H6Mjg#O^n1JE6r<j%1;Q@gsR-Tpuz48NV`rldovMP&(U|pn
zcc%l=m%RNDQUa2cS6rL7kAm2*S|>4$5zt8@CzW06flzC?s@KjvAmHs@2!3P0xhX96
zD6|6OozMlR7h|w*dh#r{dI{*i*4$LHs}o|GtGTXC)<PUPQ(2w(f2Fhe$HPMFa<E+}
zgc#FdaI!4&jJQ1v3YGyHv+u@$owwqO^uscsvddrS=kA4*^}o2{n>zp%RIU7vHNn1;
zdM2i4O`ydgekM_^0K%SIM`}DAg`HdZJ2Gz(`-PDsH%G`AFdTg`;6_i>yK$sUfw2;r
z)>;1cD(eF3bZ-R-)e(?kAnRwz55S|H7xgM`4#B*&Z9_qFE1VDAZ4~jU8)k&QjpwQl
zLF5IC`Hv}GkbNb~%Tl5b`i+{Dr7Vek_}uRp1%WQ$xK&%nG~Witdg+p15a))ppM0AO
zEZX3O!>8S=Tsxr4prfwn!#E6<{ARfy_YvaKqXd;$Dq;7jy@C85EfB@q@%|6P7-;GI
z(A?0L2U=-cnwW|{!IoePbGNo!Ft88bx1OQ^ELIJUh3?9M_XEwle5gC&O_={j%F%9k
zoY18izOD{lc3v3MQEdeS39-l>R|`O^;i!&Iej~W##Bs+!7eq3&OtYODfHVDJKYwbZ
zfsIi|dU$*X%+Nhg?Q*S$SjN)s!-*q+yey~l$USi8LlhIg_$aK;Fc6k{Jr0$U8cD@b
z15i(^@>y9v4<t|M(th)9g(UYq%I7Z(gOGsM54o4sAfymWx<c45H|Gx8^n_yI;`yc}
zE0PP(B8vv-y;|VE`g}rM75%eVX7OIu;=NKNkhq5T*S*C>**fBmDkGt#C31^bLbRxe
lgb0_GaQPnU-%^VIz1vJo#Q)xH{_ovwb|h>`;;;Mj{|Cr0!Ce3V

literal 0
HcmV?d00001

diff --git a/scripts/generate_sample_data.py b/scripts/generate_sample_data.py
new file mode 100755
index 0000000..4459df2
--- /dev/null
+++ b/scripts/generate_sample_data.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""Generate minimal sample data for winnow train and predict commands."""
+
+import numpy as np
+import pandas as pd
+import polars as pl
+from pathlib import Path
+
+
+def generate_sample_data():
+    """Generate minimal sample IPC and CSV files for InstaNovo format."""
+    output_dir = Path("examples/example_data")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    n_samples = 20
+    spectrum_ids = [f"spectrum_{i}" for i in range(n_samples)]
+
+    # Generate peptides using only valid amino acids (A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y)
+    # Note: Must avoid O, U, X, Z, B, J which are not standard amino acids
+    peptides = [
+        "PEPTIDEK",
+        "MASSIVE",
+        "PEPTIDES",
+        "SEQQENCR",
+        "PEPTIDE",
+        "MASSIVE",
+        "PEPTIDES",
+        "SEQQENCR",
+        "PEPTIDEK",
+        "MASSIVE",
+        "PEPTIDES",
+        "SEQQENCR",
+        "PEPTIDE",
+        "MASSIVE",
+        "PEPTIDES",
+        "SEQQENCR",
+        "PEPTIDEK",
+        "MASSIVE",
+        "PEPTIDES",
+        "SEQQENCR",
+    ]
+
+    # Generate spectrum data (IPC format)
+    # Calculate precursor_mass from mz and charge
+    np.random.seed(42)  # For reproducibility
+    precursor_mz = np.random.uniform(400, 1200, n_samples)
+    precursor_charge = np.random.choice([2, 3, 4], n_samples)
+    proton_mass = 1.007276
+    precursor_mass = precursor_mz * precursor_charge - proton_mass * precursor_charge
+
+    # Generate spectrum arrays (mz_array and intensity_array)
+    mz_arrays = []
+    intensity_arrays = []
+    for _ in range(n_samples):
+        n_peaks = np.random.randint(10, 50)
+        mz_array = np.random.uniform(100, 1000, n_peaks).tolist()
+        intensity_array = np.random.uniform(0.1, 1.0, n_peaks).tolist()
+        mz_arrays.append(mz_array)
+        intensity_arrays.append(intensity_array)
+
+    # Create spectrum data DataFrame using polars
+    spectrum_data = pl.DataFrame(
+        {
+            "spectrum_id": spectrum_ids,
+            "precursor_mz": precursor_mz,
+            "precursor_charge": precursor_charge.astype(int),
+            "precursor_mass": precursor_mass,
+            "retention_time": np.random.uniform(10, 60, n_samples),
+            "sequence": peptides,  # Ground truth for training
+            "mz_array": mz_arrays,
+            "intensity_array": intensity_arrays,
+        }
+    )
+
+    # Generate predictions (CSV format)
+    predictions_data = {
+        "spectrum_id": spectrum_ids,
+        "predictions": peptides,
+        "predictions_tokenised": [
+            ", ".join(list(p))
+            for p in peptides  # "P, E, P, T, I, D, E, K"
+        ],
+        "log_probs": np.log(np.random.uniform(0.1, 0.9, n_samples)),
+        "sequence": peptides,  # Ground truth
+    }
+
+    # Add beam predictions (top 3 beams)
+    # Generate valid alternative peptides for runner-up beams
+    valid_aa = list("ACDEFGHIKLMNPQRSTVWY")
+    np.random.seed(43)  # Different seed for beam alternatives
+    for beam_idx in range(3):
+        if beam_idx == 0:
+            # Top beam uses the main prediction
+            beam_predictions = peptides
+        else:
+            # Generate valid alternative peptides for runner-up beams
+            beam_predictions = [
+                "".join(np.random.choice(valid_aa, size=len(peptides[i])))
+                for i in range(n_samples)
+            ]
+        predictions_data[f"instanovo_predictions_beam_{beam_idx}"] = beam_predictions
+        predictions_data[f"instanovo_log_probabilities_beam_{beam_idx}"] = [
+            np.log(np.random.uniform(0.1, 0.9)) for _ in range(n_samples)
+        ]
+        # Token log probabilities as string representation of list
+        predictions_data[f"token_log_probabilities_beam_{beam_idx}"] = [
+            str([np.log(np.random.uniform(0.5, 0.9)) for _ in range(len(p))])
+            for p in peptides
+        ]
+
+    predictions_df = pd.DataFrame(predictions_data)
+
+    # Save files
+    spectrum_path = output_dir / "spectra.ipc"
+    predictions_path = output_dir / "predictions.csv"
+
+    spectrum_data.write_ipc(str(spectrum_path))
+    predictions_df.to_csv(predictions_path, index=False)
+
+    print("✓ Generated sample data:")
+    print(f"  - {spectrum_path}")
+    print(f"  - {predictions_path}")
+    print("\n✓ You can now run:")
+    print("  winnow train  # Uses sample data from config defaults")
+
+
+if __name__ == "__main__":
+    generate_sample_data()

From d614fcfb2c28e3ed2d0413abb9dd5818e1f7e50e Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:29:57 +0000
Subject: [PATCH 13/17] docs: add documentation for quickstarting with the toy
 data

---
 README.md        | 31 +++++++++++++++++++++++++++++++
 docs/cli.md      | 30 ++++++++++++++++++++++++++++++
 docs/examples.md | 19 +++++++++++++++++++
 3 files changed, 80 insertions(+)

diff --git a/README.md b/README.md
index d065e91..6a0f9be 100644
--- a/README.md
+++ b/README.md
@@ -106,7 +106,38 @@ uv pip install winnow-fdr
 ```
 <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
+<!-- QUICK START -->
+## Quick Start
 
+Get started with `winnow` in minutes using the included sample data:
+
+```bash
+# Generate sample data (if not already present)
+make sample-data
+
+# Train a calibrator on the sample data
+make train-sample
+
+# Run prediction with the trained model
+make predict-sample
+```
+
+The sample data is automatically configured in `config/train.yaml` and `config/predict.yaml`, so you can also use the commands directly:
+
+```bash
+# Train with sample data
+winnow train
+
+# Predict with pretrained HuggingFace model
+winnow predict
+
+# Predict with your locally trained model
+winnow predict calibrator.pretrained_model_name_or_path=models/new_model
+```
+
+**Note:** The sample data is minimal (20 spectra) and intended for testing only. For production use, replace with your own datasets.
+
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
 
 <!-- USAGE EXAMPLES -->
 ## Usage
diff --git a/docs/cli.md b/docs/cli.md
index ceafd77..3f88570 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -14,6 +14,36 @@ pip install winnow-fdr
 uv pip install winnow-fdr
 ```
 
+## Quick Start
+
+Get started immediately with the included sample data:
+
+```bash
+# Generate sample data (if not already present)
+make sample-data
+
+# Train a calibrator on the sample data
+make train-sample
+
+# Run prediction with the trained model
+make predict-sample
+```
+
+The sample data is pre-configured in the default config files (`config/train.yaml` and `config/predict.yaml`), pointing to `examples/example_data/`. You can also use the commands directly:
+
+```bash
+# Train with sample data (uses defaults from config/train.yaml)
+winnow train
+
+# Predict with pretrained HuggingFace model (uses defaults from config/predict.yaml)
+winnow predict
+
+# Predict with your locally trained model
+winnow predict calibrator.pretrained_model_name_or_path=models/new_model
+```
+
+**Note:** The sample data is minimal (20 spectra) and intended for testing only. For production use, replace the dataset paths in the config files or override them on the command line.
+
 ## Commands
 
 ### `winnow config`
diff --git a/docs/examples.md b/docs/examples.md
index 4c0f24a..ecfc413 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -1,5 +1,24 @@
 # Examples
 
+## Quick Start with Sample Data
+
+Get started with `winnow` in minutes using the included sample data:
+
+```bash
+# Generate sample data
+make sample-data
+
+# Train a calibrator
+make train-sample
+
+# Run prediction
+make predict-sample
+```
+
+The sample data is automatically configured in the default config files. See the [CLI guide](cli.md#quick-start) for more details.
+
+## Comprehensive Example Notebook
+
 For a comprehensive example demonstrating the full winnow workflow, see our example notebook:
 
 📓 **[FDR plots example notebook](https://github.com/instadeepai/winnow/blob/main/examples/getting_started_with_winnow.ipynb)**

From 999e42f9abc7fd8eb4221d289a3a232dc19a5cba Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:41:44 +0000
Subject: [PATCH 14/17] fix: allow for location, overriding and composition of
 configs when installed as a package

chore: fix pre-commit on main script

chore: remove testing Make commands

fix: correct the path for config_path_utils

fix: correct the path for config_path_utils

chore: pre-commit formatting fixes for test_config_paths
---
 Makefile                                 |  17 +-
 README.md                                |   2 +-
 config/calibrator.yaml                   |  48 ------
 config/data_loader/instanovo.yaml        |  23 ---
 config/data_loader/mztab.yaml            |  20 ---
 config/data_loader/pointnovo.yaml        |   5 -
 config/data_loader/winnow.yaml           |   7 -
 config/fdr_method/database_grounded.yaml |   8 -
 config/fdr_method/nonparametric.yaml     |   3 -
 config/predict.yaml                      |  38 -----
 config/residues.yaml                     |  64 --------
 config/train.yaml                        |  21 ---
 docs/cli.md                              |  14 +-
 docs/configuration.md                    | 167 ++++++++++++++++---
 pyproject.toml                           |   9 +
 tests/scripts/test_config_paths.py       | 199 +++++++++++++++++++++++
 winnow/scripts/config_path_utils.py      | 190 ++++++++++++++++++++++
 winnow/scripts/main.py                   | 117 ++++++++++---
 18 files changed, 666 insertions(+), 286 deletions(-)
 delete mode 100644 config/calibrator.yaml
 delete mode 100644 config/data_loader/instanovo.yaml
 delete mode 100644 config/data_loader/mztab.yaml
 delete mode 100644 config/data_loader/pointnovo.yaml
 delete mode 100644 config/data_loader/winnow.yaml
 delete mode 100644 config/fdr_method/database_grounded.yaml
 delete mode 100644 config/fdr_method/nonparametric.yaml
 delete mode 100644 config/predict.yaml
 delete mode 100644 config/residues.yaml
 delete mode 100644 config/train.yaml
 create mode 100644 tests/scripts/test_config_paths.py
 create mode 100644 winnow/scripts/config_path_utils.py

diff --git a/Makefile b/Makefile
index fa6dc45..657fef8 100644
--- a/Makefile
+++ b/Makefile
@@ -86,12 +86,16 @@ install-all:
 ## Development commands														 	#
 #################################################################################
 
-.PHONY: tests test-docker bash set-gcp-credentials set-ceph-credentials
+.PHONY: tests clean-coverage test-docker bash build-package clean-build clean-workspace test-build clean-all-build test-cli-isolated test-cli-config set-gcp-credentials set-ceph-credentials
 
 ## Run all tests
 tests:
 	$(PYTEST)
 
+## Clean coverage reports
+clean-coverage:
+	rm -rf htmlcov/ .coverage coverage.xml pytest.xml
+
 ## Run all tests in the Docker Image
 test-docker:
 	docker run $(DOCKER_RUN_FLAGS) $(DOCKER_IMAGE) $(PYTEST)
@@ -100,6 +104,17 @@ test-docker:
 bash:
 	docker run -it $(DOCKER_RUN_FLAGS) $(DOCKER_IMAGE) /bin/bash
 
+## Build the winnow-fdr package (creates wheel and sdist in dist/)
+build-package:
+	uv build
+
+## Clean all build artifacts (dist/, build/, *.egg-info/)
+clean-build:
+	rm -rf dist/ build/ *.egg-info/ winnow_fdr.egg-info/
+
+## Build the package and then clean up (safe test build)
+test-build: build-package clean-build
+
 ## Set the GCP credentials
 set-gcp-credentials:
 	uv run python scripts/set_gcp_credentials.py
diff --git a/README.md b/README.md
index 6a0f9be..170a587 100644
--- a/README.md
+++ b/README.md
@@ -122,7 +122,7 @@ make train-sample
 make predict-sample
 ```
 
-The sample data is automatically configured in `config/train.yaml` and `config/predict.yaml`, so you can also use the commands directly:
+The sample data is automatically configured in `configs/train.yaml` and `configs/predict.yaml`, so you can also use the commands directly:
 
 ```bash
 # Train with sample data
diff --git a/config/calibrator.yaml b/config/calibrator.yaml
deleted file mode 100644
index 0816814..0000000
--- a/config/calibrator.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# --- Calibrator configuration ---
-
-calibrator:
-  _target_: winnow.calibration.calibrator.ProbabilityCalibrator
-
-  seed: 42
-  hidden_layer_sizes: [50, 50]  # The number of neurons in each hidden layer of the MLP classifier.
-  learning_rate_init: 0.001  # The initial learning rate for the MLP classifier.
-  alpha: 0.0001  # L2 regularisation parameter for the MLP classifier.
-  max_iter: 1000  # Maximum number of training iterations for the MLP classifier.
-  early_stopping: true  # Whether to use early stopping to terminate training.
-  validation_fraction: 0.1  # Proportion of training data to use for early stopping validation.
-
-  features:
-    mass_error:
-      _target_: winnow.calibration.calibration_features.MassErrorFeature
-      residue_masses: ${residue_masses}  # The residue masses to use for the mass error feature.
-
-    prosit_features:
-      _target_: winnow.calibration.calibration_features.PrositFeatures
-      mz_tolerance: 0.02
-      learn_from_missing: true  # Whether to learn from missing Prosit features. If False, errors will be raised when invalid spectra are encountered.
-      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit features.
-      prosit_intensity_model_name: Prosit_2020_intensity_HCD  # The name of the Prosit intensity model to use.
-
-    retention_time_feature:
-      _target_: winnow.calibration.calibration_features.RetentionTimeFeature
-      hidden_dim: 10  # The hidden dimension size for the MLP regressor used to predict iRT from observed retention times.
-      train_fraction: 0.1  # The fraction of the data to use for training the iRT predictor.
-      learn_from_missing: true  # Whether to learn from missing retention time features. If False, errors will be raised when invalid spectra are encountered.
-      seed: 42  # Random seed for the MLP regressor.
-      learning_rate_init: 0.001  # The initial learning rate for the MLP regressor.
-      alpha: 0.0001  # L2 regularisation parameter for the MLP regressor.
-      max_iter: 200  # Maximum number of training iterations for the MLP regressor.
-      early_stopping: false  # Whether to use early stopping for the MLP regressor.
-      validation_fraction: 0.1  # Proportion of training data to use for early stopping validation.
-      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit iRT features.
-      prosit_irt_model_name: Prosit_2019_irt  # The name of the Prosit iRT model to use.
-
-    chimeric_features:
-      _target_: winnow.calibration.calibration_features.ChimericFeatures
-      mz_tolerance: 0.02
-      learn_from_missing: true  # Whether to learn from missing chimeric features. If False, errors will be raised when invalid spectra are encountered.
-      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit chimeric intensity features.
-      prosit_intensity_model_name: Prosit_2020_intensity_HCD  # The name of the Prosit intensity model to use.
-
-    beam_features:
-      _target_: winnow.calibration.calibration_features.BeamFeatures
diff --git a/config/data_loader/instanovo.yaml b/config/data_loader/instanovo.yaml
deleted file mode 100644
index caf142e..0000000
--- a/config/data_loader/instanovo.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-# --- InstaNovo data loading configuration ---
-
-_target_: winnow.datasets.data_loaders.InstaNovoDatasetLoader
-
-residue_masses: ${residue_masses}
-residue_remapping:  # Used to map InstaNovo legacy notations to UNIMOD tokens.
-  "M(ox)": "M[UNIMOD:35]"  # Oxidation
-  "M(+15.99)": "M[UNIMOD:35]"  # Oxidation
-  "S(p)": "S[UNIMOD:21]"  # Phosphorylation
-  "T(p)": "T[UNIMOD:21]"  # Phosphorylation
-  "Y(p)": "Y[UNIMOD:21]"  # Phosphorylation
-  "S(+79.97)": "S[UNIMOD:21]"  # Phosphorylation
-  "T(+79.97)": "T[UNIMOD:21]"  # Phosphorylation
-  "Y(+79.97)": "Y[UNIMOD:21]"  # Phosphorylation
-  "Q(+0.98)": "Q[UNIMOD:7]"  # Deamidation
-  "N(+0.98)": "N[UNIMOD:7]"  # Deamidation
-  "Q(+.98)": "Q[UNIMOD:7]"  # Deamidation
-  "N(+.98)": "N[UNIMOD:7]"  # Deamidation
-  "C(+57.02)": "C[UNIMOD:4]"  # Carbamidomethylation
-  # N-terminal modifications.
-  "(+42.01)": "[UNIMOD:1]"  # Acetylation
-  "(+43.01)": "[UNIMOD:5]"  # Carbamylation
-  "(-17.03)": "[UNIMOD:385]"  # Ammonia loss
diff --git a/config/data_loader/mztab.yaml b/config/data_loader/mztab.yaml
deleted file mode 100644
index dc184ea..0000000
--- a/config/data_loader/mztab.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-# --- MZTab data loading configuration ---
-_target_: winnow.datasets.data_loaders.MZTabDatasetLoader
-
-residue_masses: ${residue_masses}
-residue_remapping:  # Used to map Casanovo-specific notations to UNIMOD tokens.
-  "M+15.995": "M[UNIMOD:35]"  # Oxidation
-  "Q+0.984": "Q[UNIMOD:7]"  # Deamidation
-  "N+0.984": "N[UNIMOD:7]"  # Deamidation
-  "+42.011": "[UNIMOD:1]"  # Acetylation
-  "+43.006": "[UNIMOD:5]"  # Carbamylation
-  "-17.027": "[UNIMOD:385]"  # Ammonia loss
-  "C+57.021": "C[UNIMOD:4]"  # Carbamidomethylation
-  "C[Carbamidomethyl]": "C[UNIMOD:4]"  # Carbamidomethylation
-  "M[Oxidation]": "M[UNIMOD:35]"  # Oxidation
-  "N[Deamidated]": "N[UNIMOD:7]"  # Deamidation
-  "Q[Deamidated]": "Q[UNIMOD:7]"  # Deamidation
-  # N-terminal modifications.
-  "[Acetyl]-": "[UNIMOD:1]"  # Acetylation
-  "[Carbamyl]-": "[UNIMOD:5]"  # Carbamylation
-  "[Ammonia-loss]-": "[UNIMOD:385]"  # Ammonia loss
diff --git a/config/data_loader/pointnovo.yaml b/config/data_loader/pointnovo.yaml
deleted file mode 100644
index 022691a..0000000
--- a/config/data_loader/pointnovo.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-# --- PointNovo data loading configuration ---
-
-_target_: winnow.datasets.data_loaders.PointNovoDatasetLoader
-
-residue_masses: ${residue_masses}
diff --git a/config/data_loader/winnow.yaml b/config/data_loader/winnow.yaml
deleted file mode 100644
index dbfb632..0000000
--- a/config/data_loader/winnow.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-# --- Winnow data loading configuration ---
-
-_target_: winnow.datasets.data_loaders.WinnowDatasetLoader
-
-residue_masses: ${residue_masses}
-# The internal Winnow dataset loader does not need a residue remapping
-# since it uses the UNIMOD tokens directly.
diff --git a/config/fdr_method/database_grounded.yaml b/config/fdr_method/database_grounded.yaml
deleted file mode 100644
index 41a6cc3..0000000
--- a/config/fdr_method/database_grounded.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-# --- Database-grounded FDR control configuration ---
-
-_target_: winnow.fdr.database_grounded.DatabaseGroundedFDRControl
-
-confidence_feature: ${fdr_control.confidence_column}  # Name of the column with confidence scores to use for FDR estimation.
-residue_masses: ${residue_masses}  # The residue masses from global `residues` config
-isotope_error_range: [0, 1]  # The isotope error range for matching peptides
-drop: 10  # The number of top predictions to drop for stability
diff --git a/config/fdr_method/nonparametric.yaml b/config/fdr_method/nonparametric.yaml
deleted file mode 100644
index 2d8c5a3..0000000
--- a/config/fdr_method/nonparametric.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-# --- Non-parametric FDR control configuration ---
-
-_target_: winnow.fdr.nonparametric.NonParametricFDRControl
diff --git a/config/predict.yaml b/config/predict.yaml
deleted file mode 100644
index fa53a6a..0000000
--- a/config/predict.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# --- Predicting scores and applying FDR control ---
-defaults:
-  - _self_
-  - residues
-  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
-  - fdr_method: nonparametric  # Options: nonparametric, database_grounded
-
-# --- Pipeline Execution Configuration ---
-
-dataset:
-  # Dataset paths:
-  # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
-  spectrum_path_or_directory: examples/example_data/spectra.ipc
-  # Path to the beam predictions file.
-  # Leave as `null` if data source is `winnow`, or loading will fail.
-  predictions_path: examples/example_data/predictions.csv
-  # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
-
-calibrator:
-  # Model loading:
-  # Path to the local calibrator directory or the HuggingFace model identifier.
-  # If the path is a local directory path, it will be used directly. If it is a HuggingFace repository identifier, it will be downloaded from HuggingFace.
-  pretrained_model_name_or_path: InstaDeepAI/winnow-general-model
-  # Directory to cache the HuggingFace model.
-  cache_dir: null  # can be set to `null` if using local model or for the default cache directory from HuggingFace.
-
-fdr_control:
-  # FDR settings:
-  # Target FDR threshold (e.g. 0.01 for 1%, 0.05 for 5% etc.).
-  fdr_threshold: 0.05
-  # Name of the column with confidence scores to use for FDR estimation.
-  confidence_column: calibrated_confidence
-
-# Folder path to write the outputs to.
-# This will create two CSV files in the output folder:
-# - metadata.csv: Contains all metadata and feature columns from the input dataset.
-# - preds_and_fdr_metrics.csv: Contains predictions and FDR metrics.
-output_folder: results/predictions
diff --git a/config/residues.yaml b/config/residues.yaml
deleted file mode 100644
index d76d4a3..0000000
--- a/config/residues.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-# --- Residues configuration ---
-
-# This is Winnow's internal residue representation.
-# We use this to calculate the mass error feature and during database-grounded FDR control.
-# We also use this to initialise the residue set for the Metrics class.
-residue_masses:
-  "G": 57.021464
-  "A": 71.037114
-  "S": 87.032028
-  "P": 97.052764
-  "V": 99.068414
-  "T": 101.047670
-  "C": 103.009185
-  "L": 113.084064
-  "I": 113.084064
-  "N": 114.042927
-  "D": 115.026943
-  "Q": 128.058578
-  "K": 128.094963
-  "E": 129.042593
-  "M": 131.040485
-  "H": 137.058912
-  "F": 147.068414
-  "R": 156.101111
-  "Y": 163.063329
-  "W": 186.079313
-  # Modifications
-  "M[UNIMOD:35]": 147.035400 # Oxidation
-  "C[UNIMOD:4]": 160.030649 # Carboxyamidomethylation
-  "N[UNIMOD:7]": 115.026943 # Deamidation
-  "Q[UNIMOD:7]": 129.042594 # Deamidation
-  "R[UNIMOD:7]": 157.085127 # Arginine citrullination
-  "P[UNIMOD:35]": 113.047679 # Proline hydroxylation
-  "S[UNIMOD:21]": 166.998028 # Phosphorylation + 79.966
-  "T[UNIMOD:21]": 181.01367 # Phosphorylation + 79.966
-  "Y[UNIMOD:21]": 243.029329 # Phosphorylation + 79.966
-  "C[UNIMOD:312]": 222.013284  # Cysteinylation
-  "E[UNIMOD:27]": 111.032028  # Glu -> pyro-Glu
-  "Q[UNIMOD:28]": 111.032029  # Gln -> pyro-Gln
-  # Terminal modifications
-  "[UNIMOD:1]": 42.010565 # Acetylation
-  "[UNIMOD:5]": 43.005814 # Carbamylation
-  "[UNIMOD:385]": -17.026549 # NH3 loss
-  "(+25.98)": 25.980265  # Carbamylation & NH3 loss (legacy notation)
-
-# The tokens to consider as invalid for Prosit features.
-# We also filter out non-carboxyamidomethylated Cysteine in a separate step.
-invalid_prosit_tokens:
-  # InstaNovo
-  - "[UNIMOD:7]"
-  - "[UNIMOD:21]"
-  - "[UNIMOD:1]"
-  - "[UNIMOD:5]"
-  - "[UNIMOD:385]"
-  - "(+25.98)"  # (legacy notation)
-  # Casanovo
-  - "+0.984"
-  - "+42.011"
-  - "+43.006"
-  - "-17.027"
-  - "[Ammonia-loss]-"
-  - "[Carbamyl]-"
-  - "[Acetyl]-"
-  - "[Deamidated]"
diff --git a/config/train.yaml b/config/train.yaml
deleted file mode 100644
index 839d3f8..0000000
--- a/config/train.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-# --- Training a calibrator ---
-defaults:
-  - _self_
-  - residues
-  - calibrator
-  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
-
-# --- Pipeline Execution Configuration ---
-
-dataset:
-  # Dataset paths:
-  # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
-  spectrum_path_or_directory: examples/example_data/spectra.ipc
-  # Path to the beam predictions file.
-  # Leave as `null` if data source is `winnow`, or loading will fail.
-  predictions_path: examples/example_data/predictions.csv
-  # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
-
-# Output paths:
-model_output_dir: models/new_model
-dataset_output_path: results/calibrated_dataset.csv
diff --git a/docs/cli.md b/docs/cli.md
index 3f88570..2720746 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -29,13 +29,13 @@ make train-sample
 make predict-sample
 ```
 
-The sample data is pre-configured in the default config files (`config/train.yaml` and `config/predict.yaml`), pointing to `examples/example_data/`. You can also use the commands directly:
+The sample data is pre-configured in the default config files (`configs/train.yaml` and `configs/predict.yaml`), pointing to `examples/example_data/`. You can also use the commands directly:
 
 ```bash
-# Train with sample data (uses defaults from config/train.yaml)
+# Train with sample data (uses defaults from configs/train.yaml)
 winnow train
 
-# Predict with pretrained HuggingFace model (uses defaults from config/predict.yaml)
+# Predict with pretrained HuggingFace model (uses defaults from configs/predict.yaml)
 winnow predict
 
 # Predict with your locally trained model
@@ -71,7 +71,7 @@ This command prints the final YAML configuration with colour-coded hierarchical
 Train a confidence calibration model on labelled data.
 
 ```bash
-# Use defaults (configured in config/train.yaml)
+# Use defaults (configured in configs/train.yaml)
 winnow train
 
 # Override specific parameters
@@ -141,7 +141,7 @@ By default, `winnow predict` uses the pretrained model `InstaDeepAI/winnow-gener
 
 Winnow uses [Hydra](https://hydra.cc/) for configuration management. All parameters can be configured via:
 
-- **YAML config files** in the `config/` directory (defines defaults)
+- **YAML config files** in the `configs/` directory (defines defaults)
 - **Command-line overrides** using `key=value` syntax
 - **Nested parameters** using dot notation (e.g., `calibrator.seed=42`)
 
@@ -329,7 +329,7 @@ Winnow comes with sensible default settings for all parameters:
 - **FDR**: Non-parametric method with 5% threshold
 - **Model**: Pretrained general model from HuggingFace
 
-All defaults are defined in YAML files under `config/` and can be overridden via command line. For a complete reference of all default parameters and configuration options, see the **[Configuration guide](configuration.md)**.
+All defaults are defined in YAML files under `configs/` and can be overridden via command line. For a complete reference of all default parameters and configuration options, see the **[Configuration guide](configuration.md)**.
 
 ## Troubleshooting
 
@@ -365,7 +365,7 @@ The CLI automatically filters out:
 - Empty predictions
 - Peptides longer than 30 amino acids (Prosit limitation)
 - Precursor charges above 6 (Prosit limitation)
-- Invalid modifications and tokens (defined in `config/residues.yaml`)
+- Invalid modifications and tokens (defined in `configs/residues.yaml`)
 
 ### Getting help
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 59470e7..84620e1 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -26,10 +26,10 @@ winnow predict
 
 ## Configuration files
 
-Winnow's configuration files are organised in the `config/` directory:
+Winnow's configuration files are organised in the `configs/` directory:
 
 ```
-config/
+configs/
 ├── residues.yaml              # Amino acid masses, modifications
 ├── data_loader/               # Dataset format loaders
 │   ├── instanovo.yaml
@@ -95,7 +95,7 @@ winnow train data_loader=mztab dataset.spectrum_path_or_directory=data/spectra.p
 
 ## Training configuration
 
-### Main training config (`config/train.yaml`)
+### Main training config (`configs/train.yaml`)
 
 Controls dataset loading, output paths and composition:
 
@@ -126,7 +126,7 @@ dataset_output_path: results/calibrated_dataset.csv
 - `model_output_dir`: Where to save trained model
 - `dataset_output_path`: Where to save calibrated training results
 
-### Calibrator config (`config/calibrator.yaml`)
+### Calibrator config (`configs/calibrator.yaml`)
 
 Controls model architecture and calibration features:
 
@@ -192,7 +192,7 @@ calibrator:
 
 ## Prediction configuration
 
-### Main prediction config (`config/predict.yaml`)
+### Main prediction config (`configs/predict.yaml`)
 
 Controls dataset loading, FDR estimation and output:
 
@@ -242,7 +242,7 @@ output_folder: results/predictions
 
 ### FDR method configs
 
-**Non-parametric FDR** (`config/fdr_method/nonparametric.yaml`):
+**Non-parametric FDR** (`configs/fdr_method/nonparametric.yaml`):
 
 ```yaml
 _target_: winnow.fdr.nonparametric.NonParametricFDRControl
@@ -250,7 +250,7 @@ _target_: winnow.fdr.nonparametric.NonParametricFDRControl
 
 No additional parameters required.
 
-**Database-grounded FDR** (`config/fdr_method/database_grounded.yaml`):
+**Database-grounded FDR** (`configs/fdr_method/database_grounded.yaml`):
 
 ```yaml
 _target_: winnow.fdr.database_grounded.DatabaseGroundedFDRControl
@@ -271,7 +271,7 @@ Requires ground truth sequences in the dataset.
 
 ## Shared configuration
 
-### Residues config (`config/residues.yaml`)
+### Residues config (`configs/residues.yaml`)
 
 Defines amino acid masses, modifications and invalid tokens:
 
@@ -310,9 +310,9 @@ This configuration is shared across all pipelines and referenced via `${residue_
 
 ### Data loader configs
 
-Each data format has a dedicated loader configuration in `config/data_loader/`:
+Each data format has a dedicated loader configuration in `configs/data_loader/`:
 
-**InstaNovo** (`config/data_loader/instanovo.yaml`):
+**InstaNovo** (`configs/data_loader/instanovo.yaml`):
 ```yaml
 _target_: winnow.datasets.data_loaders.InstaNovoDatasetLoader
 residue_masses: ${residue_masses}
@@ -322,7 +322,7 @@ residue_remapping:
   # ... maps legacy notations to UNIMOD tokens
 ```
 
-**MZTab** (`config/data_loader/mztab.yaml`):
+**MZTab** (`configs/data_loader/mztab.yaml`):
 ```yaml
 _target_: winnow.datasets.data_loaders.MZTabDatasetLoader
 residue_masses: ${residue_masses}
@@ -333,13 +333,13 @@ residue_remapping:
   # ... maps Casanovo notations to UNIMOD tokens
 ```
 
-**PointNovo** (`config/data_loader/pointnovo.yaml`):
+**PointNovo** (`configs/data_loader/pointnovo.yaml`):
 ```yaml
 _target_: winnow.datasets.data_loaders.PointNovoDatasetLoader
 residue_masses: ${residue_masses}
 ```
 
-**Winnow** (`config/data_loader/winnow.yaml`):
+**Winnow** (`configs/data_loader/winnow.yaml`):
 ```yaml
 _target_: winnow.datasets.data_loaders.WinnowDatasetLoader
 residue_masses: ${residue_masses}
@@ -426,10 +426,10 @@ Common interpolation patterns in Winnow configs:
 ### Add a custom data loader
 
 1. Create loader class implementing `DatasetLoader` protocol
-2. Add configuration file: `config/data_loader/custom.yaml`
+2. Add configuration file: `configs/data_loader/custom.yaml`
 3. Use with: `winnow train data_loader=custom`
 
-Example `config/data_loader/custom.yaml`:
+Example `configs/data_loader/custom.yaml`:
 ```yaml
 _target_: my_module.CustomDatasetLoader
 residue_masses: ${residue_masses}
@@ -439,7 +439,7 @@ custom_param: value
 ### Add custom calibration features
 
 1. Create feature class inheriting from `CalibrationFeatures`
-2. Add to `config/calibrator.yaml`:
+2. Add to `configs/calibrator.yaml`:
    ```yaml
    features:
      custom_feature:
@@ -451,10 +451,10 @@ custom_param: value
 ### Add custom FDR method
 
 1. Create FDR class implementing the FDR interface
-2. Add configuration file: `config/fdr_method/custom_method.yaml`
+2. Add configuration file: `configs/fdr_method/custom_method.yaml`
 3. Use with: `winnow predict fdr_method=custom_method`
 
-Example `config/fdr_method/custom_method.yaml`:
+Example `configs/fdr_method/custom_method.yaml`:
 ```yaml
 _target_: my_module.CustomFDRControl
 confidence_feature: ${fdr_control.confidence_column}
@@ -493,6 +493,135 @@ winnow predict fdr_method=typo
 # Available options in 'fdr_method': nonparametric, database_grounded
 ```
 
+## Advanced: custom config directories
+
+For advanced users who have installed winnow as a package and need to customise multiple configuration files, you can use the `--config-dir` flag to specify a custom configuration directory. This is particularly useful when you have complex customisations that would be verbose to specify via command-line overrides.
+
+**When to use custom config directories:**
+- **CLI overrides**: For simple parameter changes (1-3 values) - use command-line overrides like `winnow train calibrator.seed=42`
+- **Custom config dirs**: For complex configurations with many custom settings (advanced users) - use `--config-dir`
+- **Cloning repo**: For extending Winnow (developers) - clone the repository, make changes, and modify configs directly
+
+### Config file structure and maming
+
+Your custom config directory should mirror the structure of the package configs:
+
+```
+my_configs/
+├── residues.yaml              # Override residue masses/modifications
+├── calibrator.yaml            # Override calibrator features
+├── train.yaml                 # Override training config (if needed)
+├── predict.yaml               # Override prediction config (if needed)
+├── data_loader/               # Override data loaders (if needed)
+│   └── instanovo.yaml
+│   └── mztab.yaml
+│   └── winnow.yaml
+└── fdr_method/               # Override FDR methods (if needed)
+│   └── database_grounded.yaml
+│   └── nonparametric.yaml
+```
+
+**Important requirements:**
+- File names must match package config names **exactly** (case-sensitive)
+- Directory structure should mirror package structure (e.g., `data_loader/`, `fdr_method/`)
+- Only include files you want to override - you don't need to include everything
+- YAML files must be valid and follow the same structure as package configs
+
+### Partial configs
+
+You can use **partial configs at the file level** - only include the files you want to override. For example, if you only want to customise residue masses and calibrator settings:
+
+```bash
+my_configs/
+├── residues.yaml       # Your custom residues
+└── calibrator.yaml    # Your custom calibrator config
+```
+
+When you use `--config-dir`, winnow will:
+1. Use your custom files for files present in your directory (these completely replace package versions)
+2. Use package defaults for files not in your directory
+
+**Important limitation**: Partial configs work at the **file level**, not the **key level** within a file. If you provide a custom `calibrator.yaml`, it must contain the complete structure - you can't just override `seed` and expect other settings to come from package defaults. See "Behaviour with variables" below for details.
+
+### Behaviour with variables
+
+**How config files work**: When you provide a custom config file (e.g., `calibrator.yaml`), it **completely replaces** the package version of that file. Hydra does not merge keys within the same file - it uses your file exactly as written.
+
+**What this means:**
+- ✅ **Partial configs at file level**: You only need to include the files you want to override (e.g., just `residues.yaml` and `calibrator.yaml`). Files not in your custom directory use package defaults.
+- ❌ **Partial configs at key level don't work**: If you provide `calibrator.yaml` with only `seed: 999`, the other settings (`hidden_layer_sizes`, `features`, etc.) will be **missing**, not using package defaults. This will cause errors.
+
+**Example - What happens with minimal config:**
+```yaml
+# custom/calibrator.yaml - TOO MINIMAL
+calibrator:
+  _target_: winnow.calibration.calibrator.ProbabilityCalibrator
+  seed: 99999
+```
+
+**Result**: Only `_target_` and `seed` are present. All other keys (`hidden_layer_sizes`, `learning_rate_init`, `features`, etc.) are **missing** from the final config. This will cause errors when running the pipeline in most cases.
+
+**Example - What you need (complete structure):**
+```yaml
+# custom/calibrator.yaml - COMPLETE STRUCTURE REQUIRED
+calibrator:
+  _target_: winnow.calibration.calibrator.ProbabilityCalibrator
+  seed: 99999  # Your custom value
+  hidden_layer_sizes: [50, 50]  # Must include all settings
+  learning_rate_init: 0.001
+  alpha: 0.0001
+  max_iter: 1000
+  early_stopping: true
+  validation_fraction: 0.1
+  features:
+    mass_error:
+      _target_: winnow.calibration.calibration_features.MassErrorFeature
+      residue_masses: ${residue_masses}
+    prosit_features:
+      # ... include all features you want to keep
+    # Features you don't include will be missing (not using defaults)
+```
+
+**Removing features**: To remove features, simply **don't include them** in your custom `calibrator.yaml`. Since your file completely replaces the package version, any features you omit will be absent from the final config. It is also possible to specify this using a tilde with CLI overrides (e.g., `~calibrator.prosit_features`).
+
+**New variables**: Adding new keys that don't exist in package configs will cause them to be ignored (Hydra is not strict by default). They won't cause errors, but they also won't be used unless your code explicitly accesses them. **Stick to overriding existing keys** from package configs.
+
+**Recommendation**: Always start by copying the complete package config file, then modify only the values you need. You can get the package config structure by running `winnow config train` and copying the relevant section, or by copying from `winnow/configs/` in the [repository](https://github.com/instadeepai/winnow).
+
+### Getting package config structure
+
+Before creating custom configs, you need to know the structure of package configs. Here are ways to get them:
+
+1. **View resolved config**: Run `winnow config train` or `winnow config predict` to see the complete resolved configuration
+2. **Clone the repository**: Visit the winnow [repository](https://github.com/instadeepai/winnow) and check `winnow/configs/` directory
+3. **Inspect installed package**: Find the package location (e.g., `python -c "import winnow; print(winnow.__file__)"`) and navigate to `configs/`
+
+**Recommended workflow**: Start by creating a new config directory, copying in the package config file you want to customise, and then modify only the values you need.
+
+### Troubleshooting
+
+**Common mistakes:**
+
+1. **Wrong file names**: File names must match exactly (case-sensitive)
+   - ❌ `Residues.yaml` (wrong case)
+   - ✅ `residues.yaml` (correct)
+
+2. **Incorrect structure**: Directory structure must match package structure
+   - ❌ `my_configs/data_loaders/` (wrong directory name)
+   - ✅ `my_configs/data_loader/` (correct)
+
+3. **Typos in keys**: YAML keys must match package config keys exactly
+   - Check package configs for correct key names
+
+4. **Invalid YAML**: Ensure your YAML files are valid
+   - Use a YAML validator if unsure
+
+**How to check if custom config is being used:**
+
+1. Use `winnow config train --config-dir my_configs` and search for your custom values
+2. Compare output with `winnow config train` (without custom dir) to see differences
+3. Check logs - winnow logs which config directory is being used
+
 ## Additional resources
 
 - [Hydra documentation](https://hydra.cc/docs/intro/)
@@ -519,7 +648,7 @@ If you're migrating from the old argument-based CLI:
 
 **Key changes:**
 
-- `data_source` renamed to `data_loader` (references config/data_loader/*.yaml)
+- `data_source` renamed to `data_loader` (references configs/data_loader/*.yaml)
 - `fdr_threshold` and `confidence_column` now nested under `fdr_control`
 - `local_model_folder` and `huggingface_model_name` merged into `pretrained_model_name_or_path`
 - Dataset paths are now specified directly as Hydra parameters instead of via separate YAML files:
diff --git a/pyproject.toml b/pyproject.toml
index f8fbcaa..b113924 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,9 @@ build-backend = "setuptools.build_meta"
 [tool.setuptools]
 packages = ["winnow", "winnow.scripts", "winnow.fdr", "winnow.datasets", "winnow.calibration"]
 
+[tool.setuptools.package-data]
+winnow = ["configs/**/*.yaml", "configs/**/*.yml"]
+
 [dependency-groups]
 dev = [
     "pre-commit>=4.1.0",
@@ -87,3 +90,9 @@ exclude_lines = [
 
 [tool.coverage.html]
 directory = "htmlcov"
+
+[tool.uv.workspace]
+members = [
+    "winnow_demo",
+    "tmp",
+]
diff --git a/tests/scripts/test_config_paths.py b/tests/scripts/test_config_paths.py
new file mode 100644
index 0000000..ac3ccf1
--- /dev/null
+++ b/tests/scripts/test_config_paths.py
@@ -0,0 +1,199 @@
+"""Tests for config path resolution utilities."""
+
+from __future__ import annotations
+
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+from winnow.scripts.config_path_utils import (
+    get_config_dir,
+    get_config_search_path,
+    get_primary_config_dir,
+)
+
+
+class TestGetConfigDir:
+    """Tests for get_config_dir() function."""
+
+    def test_package_mode(self, tmp_path):
+        """Test config dir resolution in package mode."""
+        # Mock importlib.resources to simulate installed package
+        package_configs = tmp_path / "package_configs"
+        package_configs.mkdir()
+
+        # Create a mock that simulates files("winnow").joinpath("configs")
+        mock_winnow_files = MagicMock()
+        mock_configs = MagicMock()
+        mock_configs.is_dir.return_value = True
+        # Make str() return the path
+        type(mock_configs).__str__ = lambda self: str(package_configs)
+        mock_winnow_files.joinpath.return_value = mock_configs
+
+        # Mock files("winnow") to return mock_winnow_files
+        mock_files = MagicMock(return_value=mock_winnow_files)
+
+        # Patch importlib.resources.files (where it's imported from)
+        with patch("importlib.resources.files", mock_files):
+            config_dir = get_config_dir()
+            assert config_dir == package_configs
+
+    def test_dev_mode(self, tmp_path):
+        """Test config dir resolution in dev mode."""
+        # Mock importlib.resources to fail (simulating dev mode)
+        with patch("importlib.resources.files", side_effect=ModuleNotFoundError()):
+            # Create a mock repo structure
+            repo_root = tmp_path / "repo"
+            winnow_dir = repo_root / "winnow"
+            configs_dir = winnow_dir / "configs"
+            configs_dir.mkdir(parents=True)
+
+            # Mock __file__ to point to winnow/scripts/config_paths.py
+            with patch(
+                "winnow.scripts.config_path_utils.__file__",
+                str(winnow_dir / "scripts" / "config_paths.py"),
+            ):
+                config_dir = get_config_dir()
+                assert config_dir == configs_dir
+
+    def test_dev_mode_alt_location(self, tmp_path):
+        """Test config dir resolution in dev mode with configs at repo root."""
+        # Mock importlib.resources to fail
+        with patch("importlib.resources.files", side_effect=ModuleNotFoundError()):
+            # Create a mock repo structure with configs at root
+            repo_root = tmp_path / "repo"
+            configs_dir = repo_root / "configs"
+            configs_dir.mkdir(parents=True)
+
+            # Mock __file__ to point to winnow/scripts/config_paths.py
+            with patch(
+                "winnow.scripts.config_path_utils.__file__",
+                str(repo_root / "winnow" / "scripts" / "config_paths.py"),
+            ):
+                config_dir = get_config_dir()
+                assert config_dir == configs_dir
+
+    def test_not_found(self):
+        """Test error when config dir cannot be found."""
+        with patch("importlib.resources.files", side_effect=ModuleNotFoundError()):
+            with patch(
+                "winnow.scripts.config_path_utils.__file__", "/nonexistent/path"
+            ):
+                with pytest.raises(FileNotFoundError):
+                    get_config_dir()
+
+
+class TestGetConfigSearchPath:
+    """Tests for get_config_search_path() function."""
+
+    def test_custom_dir_only(self, tmp_path):
+        """Test search path with custom directory."""
+        custom_dir = tmp_path / "custom_configs"
+        custom_dir.mkdir()
+
+        with patch(
+            "winnow.scripts.config_path_utils.get_config_dir",
+            return_value=Path("/package/configs"),
+        ):
+            search_path = get_config_search_path(str(custom_dir))
+            assert len(search_path) == 2
+            assert search_path[0] == custom_dir.resolve()
+            assert search_path[1] == Path("/package/configs").resolve()
+
+    def test_no_custom_dir(self, tmp_path):
+        """Test search path without custom directory."""
+        package_dir = tmp_path / "package_configs"
+        package_dir.mkdir()
+
+        with patch(
+            "winnow.scripts.config_path_utils.get_config_dir", return_value=package_dir
+        ):
+            search_path = get_config_search_path()
+            assert len(search_path) == 1
+            assert search_path[0] == package_dir.resolve()
+
+    def test_custom_dir_not_exists(self):
+        """Test error when custom directory doesn't exist."""
+        with pytest.raises(FileNotFoundError, match="does not exist"):
+            get_config_search_path("/nonexistent/path")
+
+    def test_custom_dir_not_directory(self, tmp_path):
+        """Test error when custom path is not a directory."""
+        file_path = tmp_path / "not_a_dir"
+        file_path.touch()
+
+        with pytest.raises(ValueError, match="not a directory"):
+            get_config_search_path(str(file_path))
+
+
+class TestGetPrimaryConfigDir:
+    """Tests for get_primary_config_dir() function."""
+
+    def test_no_custom_dir(self, tmp_path):
+        """Test primary config dir without custom directory."""
+        package_dir = tmp_path / "package_configs"
+        package_dir.mkdir()
+
+        with patch(
+            "winnow.scripts.config_path_utils.get_config_dir", return_value=package_dir
+        ):
+            primary_dir = get_primary_config_dir()
+            assert primary_dir == package_dir.resolve()
+
+    def test_with_custom_dir(self, tmp_path):
+        """Test primary config dir with custom directory (merged)."""
+        custom_dir = tmp_path / "custom_configs"
+        custom_dir.mkdir()
+        (custom_dir / "residues.yaml").write_text("custom: true")
+
+        package_dir = tmp_path / "package_configs"
+        package_dir.mkdir()
+        (package_dir / "train.yaml").write_text("package: true")
+        (package_dir / "residues.yaml").write_text("package: true")
+
+        with patch(
+            "winnow.scripts.config_path_utils.get_config_dir", return_value=package_dir
+        ):
+            primary_dir = get_primary_config_dir(str(custom_dir))
+
+            # Should be a temporary merged directory
+            assert primary_dir.exists()
+            assert primary_dir.is_dir()
+
+            # Custom config should override package config
+            residues_content = (primary_dir / "residues.yaml").read_text()
+            assert "custom: true" in residues_content
+
+            # Package config should be available for files not in custom dir
+            assert (primary_dir / "train.yaml").exists()
+            train_content = (primary_dir / "train.yaml").read_text()
+            assert "package: true" in train_content
+
+    def test_partial_configs(self, tmp_path):
+        """Test that partial configs work (only custom residues.yaml)."""
+        custom_dir = tmp_path / "custom_configs"
+        custom_dir.mkdir()
+        (custom_dir / "residues.yaml").write_text("custom_residues: true")
+
+        package_dir = tmp_path / "package_configs"
+        package_dir.mkdir()
+        (package_dir / "train.yaml").write_text("train_config: true")
+        (package_dir / "residues.yaml").write_text("package_residues: true")
+        (package_dir / "calibrator.yaml").write_text("calibrator_config: true")
+
+        with patch(
+            "winnow.scripts.config_path_utils.get_config_dir", return_value=package_dir
+        ):
+            primary_dir = get_primary_config_dir(str(custom_dir))
+
+            # Custom residues should override
+            residues_content = (primary_dir / "residues.yaml").read_text()
+            assert "custom_residues" in residues_content
+            assert "package_residues" not in residues_content
+
+            # Package files not in custom dir should be available
+            assert (primary_dir / "train.yaml").exists()
+            assert (primary_dir / "calibrator.yaml").exists()
+
+            train_content = (primary_dir / "train.yaml").read_text()
+            assert "train_config" in train_content
diff --git a/winnow/scripts/config_path_utils.py b/winnow/scripts/config_path_utils.py
new file mode 100644
index 0000000..18b50b9
--- /dev/null
+++ b/winnow/scripts/config_path_utils.py
@@ -0,0 +1,190 @@
+"""Configuration path resolution utilities.
+
+This module provides robust path resolution for config directories that works
+in both development (cloned repo) and package (installed) modes.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Optional, List
+import logging
+import shutil
+import tempfile
+import atexit
+
+logger = logging.getLogger(__name__)
+
+# Track temporary directories for cleanup
+_temp_dirs: List[Path] = []
+
+
+def _cleanup_temp_dirs() -> None:
+    """Clean up temporary directories on exit."""
+    for temp_dir in _temp_dirs:
+        if temp_dir.exists():
+            shutil.rmtree(temp_dir, ignore_errors=True)
+
+
+atexit.register(_cleanup_temp_dirs)
+
+
+def get_config_dir() -> Path:
+    """Get the primary config directory (package or dev mode).
+
+    Returns:
+        Path to the config directory. In package mode, returns the package
+        config directory. In dev mode, returns the repo root config directory.
+
+    Raises:
+        FileNotFoundError: If config directory cannot be found in either mode.
+    """
+    # Try package mode first (when installed)
+    try:
+        from importlib.resources import files
+
+        config_path = files("winnow").joinpath("configs")
+        if config_path.is_dir():
+            return Path(str(config_path))
+    except (ModuleNotFoundError, TypeError, AttributeError):
+        pass
+
+    # Fallback to dev mode (cloned repo)
+    # This file is in winnow/scripts/, so go up to repo root
+    script_dir = Path(__file__).parent
+    repo_root = script_dir.parent.parent
+    dev_configs = repo_root / "winnow" / "configs"
+
+    if dev_configs.exists() and dev_configs.is_dir():
+        return dev_configs
+
+    # If neither works, try alternative dev location (configs at repo root)
+    alt_dev_configs = repo_root / "configs"
+    if alt_dev_configs.exists() and alt_dev_configs.is_dir():
+        return alt_dev_configs
+
+    raise FileNotFoundError(
+        f"Could not locate configs directory. Tried:\n"
+        f"  - Package configs: winnow.configs\n"
+        f"  - Dev configs: {dev_configs}\n"
+        f"  - Alt dev configs: {alt_dev_configs}"
+    )
+
+
+def get_config_search_path(custom_config_dir: Optional[str] = None) -> List[Path]:
+    """Get ordered list of config directories for Hydra search path.
+
+    The search path is ordered by priority (first directory has highest priority):
+    1. Custom config directory (if provided)
+    2. Package configs (when installed)
+    3. Development configs (when running from cloned repo)
+
+    Args:
+        custom_config_dir: Optional path to custom config directory.
+            If provided, this takes highest priority.
+
+    Returns:
+        List of config directory paths in priority order (highest first).
+        All paths are absolute.
+
+    Raises:
+        FileNotFoundError: If custom_config_dir is provided but doesn't exist.
+        ValueError: If custom_config_dir is provided but is not a directory.
+    """
+    search_path: List[Path] = []
+
+    # 1. Custom config directory (highest priority)
+    if custom_config_dir:
+        custom_path = Path(custom_config_dir).resolve()
+        if not custom_path.exists():
+            raise FileNotFoundError(
+                f"Custom config directory does not exist: {custom_config_dir}"
+            )
+        if not custom_path.is_dir():
+            raise ValueError(
+                f"Custom config path is not a directory: {custom_config_dir}"
+            )
+        search_path.append(custom_path)
+        logger.info(f"Using custom config directory: {custom_path}")
+
+    # 2. Package configs (fallback for files not in custom dir)
+    try:
+        package_config_dir = get_config_dir()
+        # Only add if it's different from custom dir (avoid duplicates)
+        if not search_path or package_config_dir.resolve() != search_path[0].resolve():
+            search_path.append(package_config_dir.resolve())
+            logger.debug(f"Added package config directory: {package_config_dir}")
+    except FileNotFoundError:
+        logger.warning("Package config directory not found, skipping")
+
+    return search_path
+
+
+def _merge_config_dirs(custom_dir: Path, package_dir: Path) -> Path:
+    """Create a merged config directory with custom configs overriding package configs.
+
+    Creates a temporary directory containing:
+    - All files from custom_dir (highest priority)
+    - Files from package_dir that don't exist in custom_dir (fallback)
+
+    This allows partial configs to work with Hydra's single-directory search.
+
+    Args:
+        custom_dir: Custom config directory (highest priority).
+        package_dir: Package config directory (fallback).
+
+    Returns:
+        Path to temporary merged config directory.
+    """
+    temp_dir = Path(tempfile.mkdtemp(prefix="winnow_configs_"))
+    _temp_dirs.append(temp_dir)
+
+    # First, copy all package configs (this provides fallback for missing files)
+    if package_dir.exists():
+        for item in package_dir.rglob("*"):
+            if item.is_file():
+                rel_path = item.relative_to(package_dir)
+                dest_path = temp_dir / rel_path
+                dest_path.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(item, dest_path)
+
+    # Then, copy/override with custom configs (this takes precedence)
+    if custom_dir.exists():
+        for item in custom_dir.rglob("*"):
+            if item.is_file():
+                rel_path = item.relative_to(custom_dir)
+                dest_path = temp_dir / rel_path
+                dest_path.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(item, dest_path)
+                logger.debug(f"Merged custom config: {rel_path}")
+
+    return temp_dir
+
+
+def get_primary_config_dir(custom_config_dir: Optional[str] = None) -> Path:
+    """Get the primary config directory to use with Hydra.
+
+    If custom_config_dir is provided, creates a merged directory containing
+    both custom and package configs (custom takes precedence). This allows
+    partial configs to work - users only need to include files they want to override.
+
+    Otherwise returns package/dev config directory.
+
+    Args:
+        custom_config_dir: Optional path to custom config directory.
+
+    Returns:
+        Path to primary config directory (absolute). May be a temporary directory
+        if custom_config_dir is provided.
+    """
+    if custom_config_dir:
+        custom_path = Path(custom_config_dir).resolve()
+        package_path = get_config_dir().resolve()
+        # Merge custom and package configs so partial configs work
+        merged_dir = _merge_config_dirs(custom_path, package_path)
+        logger.info(
+            f"Using merged config directory (custom: {custom_path}, "
+            f"package: {package_path}) -> {merged_dir}"
+        )
+        return merged_dir
+    return get_config_dir().resolve()
diff --git a/winnow/scripts/main.py b/winnow/scripts/main.py
index ea260fb..ec57e01 100644
--- a/winnow/scripts/main.py
+++ b/winnow/scripts/main.py
@@ -7,7 +7,7 @@
 
 from __future__ import annotations
 
-from typing import Union, Tuple, Optional, List, TYPE_CHECKING
+from typing import Union, Tuple, Optional, List, TYPE_CHECKING, Annotated
 import typer
 import logging
 from rich.logging import RichHandler
@@ -145,19 +145,30 @@ def separate_metadata_and_predictions(
 
 
 def train_entry_point(
-    overrides: Optional[List[str]] = None, execute: bool = True
+    overrides: Optional[List[str]] = None,
+    execute: bool = True,
+    config_dir: Optional[str] = None,
 ) -> None:
     """The main training pipeline entry point.
 
     Args:
         overrides: Optional list of config overrides.
         execute: If False, only print the configuration and return without executing the pipeline.
+        config_dir: Optional path to custom config directory. If provided, configs in this
+            directory take precedence over package configs. Files not in custom dir will use package defaults (file-by-file resolution).
     """
-    from hydra import initialize, compose
+    from hydra import initialize_config_dir, compose
     from hydra.utils import instantiate
+    from winnow.scripts.config_path_utils import get_primary_config_dir
 
-    with initialize(
-        config_path="../../config", version_base="1.3", job_name="winnow_train"
+    # Get primary config directory (custom if provided, otherwise package/dev)
+    primary_config_dir = get_primary_config_dir(config_dir)
+
+    # Initialise Hydra with primary config directory
+    with initialize_config_dir(
+        config_dir=str(primary_config_dir),
+        version_base="1.3",
+        job_name="winnow_train",
     ):
         cfg = compose(config_name="train", overrides=overrides)
 
@@ -205,19 +216,31 @@ def train_entry_point(
 
 
 def predict_entry_point(
-    overrides: Optional[List[str]] = None, execute: bool = True
+    overrides: Optional[List[str]] = None,
+    execute: bool = True,
+    config_dir: Optional[str] = None,
 ) -> None:
     """The main prediction pipeline entry point.
 
     Args:
         overrides: Optional list of config overrides.
         execute: If False, only print the configuration and return without executing the pipeline.
+        config_dir: Optional path to custom config directory. If provided, configs in this
+            directory take precedence over package configs. Files not in custom dir will use
+            package defaults (file-by-file resolution).
     """
-    from hydra import initialize, compose
+    from hydra import initialize_config_dir, compose
     from hydra.utils import instantiate
+    from winnow.scripts.config_path_utils import get_primary_config_dir
+
+    # Get primary config directory (custom if provided, otherwise package/dev)
+    primary_config_dir = get_primary_config_dir(config_dir)
 
-    with initialize(
-        config_path="../../config", version_base="1.3", job_name="winnow_predict"
+    # Initialize Hydra with primary config directory
+    with initialize_config_dir(
+        config_dir=str(primary_config_dir),
+        version_base="1.3",
+        job_name="winnow_predict",
     ):
         cfg = compose(config_name="predict", overrides=overrides)
 
@@ -300,6 +323,10 @@ def predict_entry_point(
         "  [dim]winnow train data_loader=mztab[/dim]  # Use MZTab format instead of InstaNovo\n"
         "  [dim]winnow train model_output_dir=models/my_model[/dim]  # Custom output location\n"
         "  [dim]winnow train calibrator.seed=42[/dim]  # Set random seed\n\n"
+        "[bold cyan]Custom config directory:[/bold cyan]\n"
+        "  [dim]winnow train --config-dir /path/to/configs[/dim]  # Use custom config directory\n"
+        "  [dim]winnow train -cp ./my_configs[/dim]  # Short form (relative or absolute path)\n"
+        "  See docs for advanced usage.\n\n"
         "[bold cyan]Configuration files to customise:[/bold cyan]\n"
         "  • config/train.yaml - Main config (data paths, output locations)\n"
         "  • config/calibrator.yaml - Model architecture and features\n"
@@ -308,11 +335,21 @@ def predict_entry_point(
     ),
     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
 )
-def train(ctx: typer.Context) -> None:
+def train(
+    ctx: typer.Context,
+    config_dir: Annotated[
+        Optional[str],
+        typer.Option(
+            "--config-dir",
+            "-cp",
+            help="Path to custom config directory (relative or absolute). See documentation for advanced usage.",
+        ),
+    ] = None,
+) -> None:
     """Passes control directly to the Hydra training pipeline."""
-    # Capture extra arguments as Hydra overrides
+    # Capture extra arguments as Hydra overrides (--config-dir already parsed out by Typer)
     overrides = ctx.args if ctx.args else None
-    train_entry_point(overrides)
+    train_entry_point(overrides, config_dir=config_dir)
 
 
 @app.command(
@@ -328,6 +365,10 @@ def train(ctx: typer.Context) -> None:
         "  [dim]winnow predict fdr_method=database_grounded[/dim]  # Use database-grounded FDR\n"
         "  [dim]winnow predict fdr_threshold=0.01[/dim]  # Target 1% FDR instead of 5%\n"
         "  [dim]winnow predict output_folder=results/my_run[/dim]  # Custom output location\n\n"
+        "[bold cyan]Custom config directory:[/bold cyan]\n"
+        "  [dim]winnow predict --config-dir /path/to/configs[/dim]  # Use custom config directory\n"
+        "  [dim]winnow predict -cp ./my_configs[/dim]  # Short form (relative or absolute path)\n"
+        "  See docs for advanced usage.\n\n"
         "[bold cyan]Configuration files to customise:[/bold cyan]\n"
         "  • config/predict.yaml - Main config (data paths, FDR settings, output)\n"
         "  • config/fdr_method/ - FDR methods (nonparametric, database_grounded)\n"
@@ -336,11 +377,21 @@ def train(ctx: typer.Context) -> None:
     ),
     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
 )
-def predict(ctx: typer.Context) -> None:
+def predict(
+    ctx: typer.Context,
+    config_dir: Annotated[
+        Optional[str],
+        typer.Option(
+            "--config-dir",
+            "-cp",
+            help="Path to custom config directory (relative or absolute). See documentation for advanced usage.",
+        ),
+    ] = None,
+) -> None:
     """Passes control directly to the Hydra predict pipeline."""
-    # Capture extra arguments as Hydra overrides
+    # Capture extra arguments as Hydra overrides (--config-dir already parsed out by Typer)
     overrides = ctx.args if ctx.args else None
-    predict_entry_point(overrides)
+    predict_entry_point(overrides, config_dir=config_dir)
 
 
 @config_app.command(
@@ -352,14 +403,26 @@ def predict(ctx: typer.Context) -> None:
         "[bold cyan]Usage:[/bold cyan]\n"
         "  [dim]winnow config train[/dim]  # Show default config\n"
         "  [dim]winnow config train data_loader=mztab[/dim]  # Show config with overrides\n"
-        "  [dim]winnow config train calibrator.seed=42[/dim]  # Check override application"
+        "  [dim]winnow config train calibrator.seed=42[/dim]  # Check override application\n"
+        "  [dim]winnow config train --config-dir /path/to/configs[/dim]  # Show config with custom directory\n"
+        "  [dim]winnow config train -cp ./my_configs[/dim]  # Short form (relative or absolute path)"
     ),
     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
 )
-def config_train(ctx: typer.Context) -> None:
+def config_train(
+    ctx: typer.Context,
+    config_dir: Annotated[
+        Optional[str],
+        typer.Option(
+            "--config-dir",
+            "-cp",
+            help="Path to custom config directory (relative or absolute). See documentation for advanced usage.",
+        ),
+    ] = None,
+) -> None:
     """Display the resolved training configuration."""
     overrides = ctx.args if ctx.args else None
-    train_entry_point(overrides, execute=False)
+    train_entry_point(overrides, execute=False, config_dir=config_dir)
 
 
 @config_app.command(
@@ -371,14 +434,26 @@ def config_train(ctx: typer.Context) -> None:
         "[bold cyan]Usage:[/bold cyan]\n"
         "  [dim]winnow config predict[/dim]  # Show default config\n"
         "  [dim]winnow config predict fdr_method=database_grounded[/dim]  # Show config with overrides\n"
-        "  [dim]winnow config predict fdr_control.fdr_threshold=0.01[/dim]  # Check override application"
+        "  [dim]winnow config predict fdr_control.fdr_threshold=0.01[/dim]  # Check override application\n"
+        "  [dim]winnow config predict --config-dir /path/to/configs[/dim]  # Show config with custom directory\n"
+        "  [dim]winnow config predict -cp ./my_configs[/dim]  # Short form (relative or absolute path)"
     ),
     context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
 )
-def config_predict(ctx: typer.Context) -> None:
+def config_predict(
+    ctx: typer.Context,
+    config_dir: Annotated[
+        Optional[str],
+        typer.Option(
+            "--config-dir",
+            "-cp",
+            help="Path to custom config directory (relative or absolute). See documentation for advanced usage.",
+        ),
+    ] = None,
+) -> None:
     """Display the resolved prediction configuration."""
     overrides = ctx.args if ctx.args else None
-    predict_entry_point(overrides, execute=False)
+    predict_entry_point(overrides, execute=False, config_dir=config_dir)
 
 
 if __name__ == "__main__":

From ad8b1f552bec3d121c7c44286a5fa4eee74338cb Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 18:41:43 +0000
Subject: [PATCH 15/17] chore: update example notebook with new object
 instantiation arguments and using config defaults

---
 examples/getting_started_with_winnow.ipynb | 140 +++++++++------------
 1 file changed, 61 insertions(+), 79 deletions(-)

diff --git a/examples/getting_started_with_winnow.ipynb b/examples/getting_started_with_winnow.ipynb
index c9fccab..c8c9623 100644
--- a/examples/getting_started_with_winnow.ipynb
+++ b/examples/getting_started_with_winnow.ipynb
@@ -70,23 +70,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #7fbfbf; text-decoration-color: #7fbfbf\">[11/06/25 18:46:36] </span><span style=\"color: #000080; text-decoration-color: #000080\">INFO    </span> Enabling RDKit <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2024.09</span>.<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> jupyter extensions                                                                                         <a href=\"file:///home/j-daniel/repos/winnow/.venv/lib/python3.12/site-packages/rdkit/__init__.py\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">__init__.py</span></a><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">:</span><a href=\"file:///home/j-daniel/repos/winnow/.venv/lib/python3.12/site-packages/rdkit/__init__.py#22\" target=\"_blank\"><span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">22</span></a>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\u001b[2;36m[11/06/25 18:46:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO    \u001b[0m Enabling RDKit \u001b[1;36m2024.09\u001b[0m.\u001b[1;36m6\u001b[0m jupyter extensions                                                                                         \u001b]8;id=885939;file:///home/j-daniel/repos/winnow/.venv/lib/python3.12/site-packages/rdkit/__init__.py\u001b\\\u001b[2m__init__.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=21754;file:///home/j-daniel/repos/winnow/.venv/lib/python3.12/site-packages/rdkit/__init__.py#22\u001b\\\u001b[2m22\u001b[0m\u001b]8;;\u001b\\\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import logging\n",
     "import warnings\n",
@@ -96,6 +82,8 @@
     "import pandas as pd\n",
     "import seaborn as sns\n",
     "from huggingface_hub import list_repo_files, snapshot_download\n",
+    "from hydra import initialize, compose\n",
+    "from hydra.utils import instantiate\n",
     "\n",
     "from winnow.calibration.calibration_features import (\n",
     "    BeamFeatures,\n",
@@ -106,9 +94,7 @@
     "    RetentionTimeFeature,\n",
     ")\n",
     "from winnow.calibration.calibrator import ProbabilityCalibrator\n",
-    "from winnow.constants import RESIDUE_MASSES\n",
     "from winnow.datasets.calibration_dataset import CalibrationDataset\n",
-    "from winnow.datasets.data_loaders import InstaNovoDatasetLoader\n",
     "from winnow.fdr.database_grounded import DatabaseGroundedFDRControl\n",
     "from winnow.fdr.nonparametric import NonParametricFDRControl\n",
     "\n",
@@ -141,17 +127,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['.gitattributes', 'README.md', 'celegans_labelled.parquet', 'celegans_labelled_beams.csv', 'celegans_raw.parquet', 'celegans_raw_beams.csv', 'general_test.parquet', 'general_test_beams.csv', 'general_train.parquet', 'general_train_beams.csv', 'general_val.parquet', 'general_val_beams.csv', 'helaqc_labelled.parquet', 'helaqc_labelled_beams.csv', 'helaqc_raw_less_train.parquet', 'helaqc_raw_less_train_beams.csv', 'immuno2_labelled.parquet', 'immuno2_labelled_beams.csv', 'immuno2_raw.parquet', 'immuno2_raw_beams.csv']\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "repo_id = \"InstaDeepAI/winnow-ms-datasets\"\n",
     "save_dir = \"winnow-ms-datasets\"\n",
@@ -162,34 +140,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5a543758fdac4806b5ff0a3ee5c9cdf7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'/home/j-daniel/repos/winnow/examples/winnow-ms-datasets'"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# -- Download the helaqc dataset\n",
     "snapshot_download(\n",
@@ -225,7 +178,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialise Hydra with the config directory\n",
+    "with initialize(\n",
+    "    config_path=\"../winnow/configs\", version_base=\"1.3\", job_name=\"winnow_notebook\"\n",
+    "):\n",
+    "    cfg = compose(config_name=\"train\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -272,7 +238,10 @@
    "source": [
     "# -- Load data\n",
     "logger.info(\"Loading dataset.\")\n",
-    "dataset = InstaNovoDatasetLoader().load(\n",
+    "data_loader = instantiate(\n",
+    "    cfg.data_loader\n",
+    ")  # Loads default (InstaNovo) data loader with config\n",
+    "dataset = data_loader.load(\n",
     "    data_path=\"winnow-ms-datasets/helaqc_labelled.parquet\",\n",
     "    predictions_path=\"winnow-ms-datasets/helaqc_labelled_beams.csv\",\n",
     ")\n",
@@ -750,7 +719,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -797,19 +766,30 @@
    "source": [
     "# -- Set up calibrator\n",
     "logger.info(\"Initialising calibrator.\")\n",
-    "SEED = 42\n",
-    "calibrator = ProbabilityCalibrator(SEED)\n",
+    "calibrator = ProbabilityCalibrator(seed=cfg.calibrator.seed)\n",
     "\n",
     "logger.info(\"Adding features to calibrator.\")\n",
-    "MZ_TOLERANCE = 0.02\n",
-    "HIDDEN_DIM = 10\n",
-    "TRAIN_FRACTION = 0.1\n",
-    "calibrator.add_feature(MassErrorFeature(residue_masses=RESIDUE_MASSES))\n",
-    "calibrator.add_feature(PrositFeatures(mz_tolerance=MZ_TOLERANCE))\n",
     "calibrator.add_feature(\n",
-    "    RetentionTimeFeature(hidden_dim=HIDDEN_DIM, train_fraction=TRAIN_FRACTION)\n",
+    "    MassErrorFeature(residue_masses=cfg.calibrator.features.mass_error.residue_masses)\n",
+    ")\n",
+    "calibrator.add_feature(\n",
+    "    PrositFeatures(\n",
+    "        mz_tolerance=cfg.calibrator.features.prosit_features.mz_tolerance,\n",
+    "        invalid_prosit_tokens=cfg.calibrator.features.prosit_features.invalid_prosit_tokens,\n",
+    "    )\n",
+    ")\n",
+    "calibrator.add_feature(\n",
+    "    RetentionTimeFeature(\n",
+    "        hidden_dim=cfg.calibrator.features.retention_time_feature.hidden_dim,\n",
+    "        train_fraction=cfg.calibrator.features.retention_time_feature.train_fraction,\n",
+    "        invalid_prosit_tokens=cfg.calibrator.features.prosit_features.invalid_prosit_tokens,\n",
+    "    )\n",
+    ")\n",
+    "calibrator.add_feature(\n",
+    "    ChimericFeatures(\n",
+    "        mz_tolerance=cfg.calibrator.features.chimeric_features.mz_tolerance,\n",
+    "    )\n",
     ")\n",
-    "calibrator.add_feature(ChimericFeatures(mz_tolerance=MZ_TOLERANCE))\n",
     "calibrator.add_feature(BeamFeatures())"
    ]
   },
@@ -1411,7 +1391,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1425,11 +1405,10 @@
    "source": [
     "# -- Database-grounded FDR control\n",
     "database_grounded_fdr_control = DatabaseGroundedFDRControl(\n",
-    "    confidence_feature=\"calibrated_confidence\"\n",
-    ")\n",
-    "database_grounded_fdr_control.fit(\n",
-    "    dataset=test_dataset.metadata, residue_masses=RESIDUE_MASSES\n",
+    "    confidence_feature=\"calibrated_confidence\",\n",
+    "    residue_masses=cfg.residue_masses,\n",
     ")\n",
+    "database_grounded_fdr_control.fit(dataset=test_dataset.metadata)\n",
     "print(\n",
     "    \"Database-grounded FDR control confidence cutoff at 5% FDR using calibrated confidence:\",\n",
     "    database_grounded_fdr_control.get_confidence_cutoff(threshold=0.05),\n",
@@ -1536,7 +1515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1583,7 +1562,7 @@
    "source": [
     "# -- Load the raw, unlabelled data\n",
     "logger.info(\"Loading raw dataset.\")\n",
-    "dataset = InstaNovoDatasetLoader().load(\n",
+    "dataset = data_loader.load(\n",
     "    data_path=\"winnow-ms-datasets/helaqc_raw_less_train.parquet\",\n",
     "    predictions_path=\"winnow-ms-datasets/helaqc_raw_less_train_beams.csv\",\n",
     ")\n",
@@ -1884,8 +1863,10 @@
    ],
    "source": [
     "# Minimal feature set: no Prosit dependency\n",
-    "cal_min = ProbabilityCalibrator(SEED)\n",
-    "cal_min.add_feature(MassErrorFeature(residue_masses=RESIDUE_MASSES))\n",
+    "cal_min = ProbabilityCalibrator(seed=cfg.calibrator.seed)\n",
+    "cal_min.add_feature(\n",
+    "    MassErrorFeature(residue_masses=cfg.calibrator.features.mass_error.residue_masses)\n",
+    ")\n",
     "cal_min.add_feature(BeamFeatures())\n",
     "\n",
     "cal_min.fit(train_dataset)\n",
@@ -1968,7 +1949,7 @@
    "source": [
     "# -- Load data\n",
     "logger.info(\"Loading dataset.\")\n",
-    "celegans_dataset = InstaNovoDatasetLoader().load(\n",
+    "celegans_dataset = data_loader.load(\n",
     "    data_path=\"winnow-ms-datasets/celegans_labelled.parquet\",\n",
     "    predictions_path=\"winnow-ms-datasets/celegans_labelled_beams.csv\",\n",
     ")"
@@ -2158,8 +2139,9 @@
     "database_grounded_fdr_control = DatabaseGroundedFDRControl(\n",
     "    confidence_feature=\"calibrated_confidence\"\n",
     ")\n",
-    "database_grounded_fdr_control.fit(\n",
-    "    dataset=celegans_filtered_dataset.metadata, residue_masses=RESIDUE_MASSES\n",
+    "database_grounded_fdr_control = DatabaseGroundedFDRControl(\n",
+    "    confidence_feature=\"calibrated_confidence\",\n",
+    "    residue_masses=cfg.residue_masses,\n",
     ")\n",
     "confidence_cutoff_dbg = database_grounded_fdr_control.get_confidence_cutoff(\n",
     "    threshold=0.05\n",

From 00a006b3802310f82c37219bbf2383fbe013e55f Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Thu, 4 Dec 2025 12:54:49 +0000
Subject: [PATCH 16/17] ci: migrate coverage badge to Gist-based dynamic system

---
 .github/workflows/tests.yml      | 24 +++++++++++++------
 .gitignore                       |  2 ++
 Makefile                         |  2 +-
 README.md                        |  2 +-
 scripts/commit_coverage_badge.sh | 31 -------------------------
 scripts/update_coverage_badge.sh | 40 --------------------------------
 6 files changed, 21 insertions(+), 80 deletions(-)
 delete mode 100755 scripts/commit_coverage_badge.sh
 delete mode 100755 scripts/update_coverage_badge.sh

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2ac0620..403c13c 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,8 +7,6 @@ on:
 jobs:
   tests:
     runs-on: ubuntu-latest
-    permissions:
-      contents: write
     steps:
       - name: Check out code
         uses: actions/checkout@v5
@@ -19,8 +17,20 @@ jobs:
       - name: Install dependencies
         run: uv sync
       - name: Run tests with coverage
-        run: uv run pytest tests --cov=winnow --cov-report=html --cov-fail-under=0
-      - name: Update coverage badge
-        run: ./scripts/update_coverage_badge.sh
-      - name: Commit updated badge
-        run: ./scripts/commit_coverage_badge.sh
+        run: uv run pytest tests --verbose --cov=winnow --cov-report xml:coverage.xml --cov-report term-missing --junitxml=pytest.xml --cov-fail-under=0
+      - name: pytest coverage comment
+        id: coverageComment
+        uses: MishaKav/pytest-coverage-comment@main
+        with:
+          pytest-xml-coverage-path: ./coverage.xml
+          junitxml-path: ./pytest.xml
+      - name: Create the badge
+        uses: schneegans/dynamic-badges-action@v1.7.0
+        with:
+          auth: ${{ secrets.BOT_ACCESS_TOKEN }}
+          gistID: f6df3d7ac249eb608e631192d2efb25e
+          filename: pytest-coverage-comment.json
+          label: Test Coverage
+          message: ${{ steps.coverageComment.outputs.coverage }}
+          color: ${{ steps.coverageComment.outputs.color }}
+          namedLogo: python
diff --git a/.gitignore b/.gitignore
index 9bdfb1a..5c94db1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,3 +39,5 @@ build/
 # Coverage reports
 htmlcov/
 .coverage
+coverage.xml
+pytest.xml
diff --git a/Makefile b/Makefile
index 657fef8..86a8f81 100644
--- a/Makefile
+++ b/Makefile
@@ -35,7 +35,7 @@ DOCKER_RUN_FLAGS_VOLUME_MOUNT_HOME = $(DOCKER_RUN_FLAGS) --volume $(PWD):$(DOCKE
 DOCKER_RUN_FLAGS_VOLUME_MOUNT_RUNS = $(DOCKER_RUN_FLAGS) --volume $(PWD)/runs:$(DOCKER_RUNS_DIRECTORY)
 DOCKER_RUN = docker run $(DOCKER_RUN_FLAGS) $(DOCKER_IMAGE_NAME)
 
-PYTEST = uv run pytest tests --cov=winnow --cov-report=html --cov-fail-under=0
+PYTEST = uv run pytest tests --verbose --cov=winnow --cov-report xml:coverage.xml --cov-report term-missing --junitxml=pytest.xml --cov-fail-under=0
 
 #################################################################################
 ## Docker build commands																#
diff --git a/README.md b/README.md
index 170a587..a1ba910 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
     </a>
     <a href="https://github.com/instadeepai/winnow/actions">
         <img
-            src="https://img.shields.io/badge/coverage-55%25-red?logo=coverage"
+            src="https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/winnowbot/f6df3d7ac249eb608e631192d2efb25e/raw/pytest-coverage-comment.json"
             alt="Test Coverage"
             style="max-width:100%;"
         >
diff --git a/scripts/commit_coverage_badge.sh b/scripts/commit_coverage_badge.sh
deleted file mode 100755
index acc5a76..0000000
--- a/scripts/commit_coverage_badge.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-# Script to commit updated coverage badge
-# Used by GitHub Actions to commit badge changes
-
-set -e  # Exit on any error
-
-echo "🔧 Configuring git..."
-
-# Configure git for GitHub Actions
-git config --local user.email "action@github.com"
-git config --local user.name "GitHub Action"
-
-echo "📝 Staging README.md..."
-git add README.md
-
-echo "🔍 Checking for changes..."
-if ! git diff --staged --quiet; then
-    # Extract coverage percentage for commit message
-    COVERAGE_BADGE=$(grep -o 'coverage-[0-9]*%25' README.md)
-    COMMIT_MSG="Update coverage badge: ${COVERAGE_BADGE}"
-
-    echo "💾 Committing changes: ${COMMIT_MSG}"
-    git commit -m "${COMMIT_MSG}"
-
-    echo "🚀 Pushing to repository..."
-    git push
-
-    echo "✅ Coverage badge updated and committed"
-else
-    echo "ℹ️ No changes to coverage badge"
-fi
diff --git a/scripts/update_coverage_badge.sh b/scripts/update_coverage_badge.sh
deleted file mode 100755
index 14afd5c..0000000
--- a/scripts/update_coverage_badge.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env bash
-# Script to update coverage badge in README
-# Used by GitHub Actions to automatically update coverage badge on push
-
-set -e  # Exit on any error
-
-echo "🔍 Extracting coverage percentage..."
-
-# Run tests with coverage and capture the percentage
-COVERAGE_OUTPUT=$(uv run pytest tests --cov=winnow --cov-report=term-missing --cov-fail-under=0 --quiet)
-COVERAGE_PERCENT=$(echo "$COVERAGE_OUTPUT" | grep -o "TOTAL.*[0-9]*%" | grep -o "[0-9]*%" | sed 's/%//')
-
-if [ -z "$COVERAGE_PERCENT" ]; then
-    echo "❌ Could not extract coverage percentage"
-    exit 1
-fi
-
-echo "📊 Coverage: ${COVERAGE_PERCENT}%"
-
-# Determine badge color based on coverage
-if [ "$COVERAGE_PERCENT" -ge 80 ]; then
-    COLOR="brightgreen"
-    EMOJI="🟢"
-elif [ "$COVERAGE_PERCENT" -ge 60 ]; then
-    COLOR="yellow"
-    EMOJI="🟡"
-else
-    COLOR="red"
-    EMOJI="🔴"
-fi
-
-# Generate badge URL
-BADGE_URL="https://img.shields.io/badge/coverage-${COVERAGE_PERCENT}%25-${COLOR}?logo=coverage"
-
-echo "${EMOJI} Badge URL: ${BADGE_URL}"
-
-# Update README with the new badge
-sed -i "s|https://img.shields.io/badge/coverage-[0-9]*%25-[a-z]*?logo=coverage|${BADGE_URL}|g" README.md
-
-echo "✅ README updated with coverage badge: ${COVERAGE_PERCENT}%"

From 571b3b35e69820df769d3b8bfa61d309d601b99f Mon Sep 17 00:00:00 2001
From: Jemma Daniel <134346753+JemmaLDaniel@users.noreply.github.com>
Date: Mon, 8 Dec 2025 12:14:56 +0000
Subject: [PATCH 17/17] chore: track new config position

---
 winnow/configs/calibrator.yaml                | 48 ++++++++++++++
 winnow/configs/data_loader/instanovo.yaml     | 23 +++++++
 winnow/configs/data_loader/mztab.yaml         | 20 ++++++
 winnow/configs/data_loader/pointnovo.yaml     |  5 ++
 winnow/configs/data_loader/winnow.yaml        |  7 ++
 .../configs/fdr_method/database_grounded.yaml |  8 +++
 winnow/configs/fdr_method/nonparametric.yaml  |  3 +
 winnow/configs/predict.yaml                   | 38 +++++++++++
 winnow/configs/residues.yaml                  | 64 +++++++++++++++++++
 winnow/configs/train.yaml                     | 21 ++++++
 10 files changed, 237 insertions(+)
 create mode 100644 winnow/configs/calibrator.yaml
 create mode 100644 winnow/configs/data_loader/instanovo.yaml
 create mode 100644 winnow/configs/data_loader/mztab.yaml
 create mode 100644 winnow/configs/data_loader/pointnovo.yaml
 create mode 100644 winnow/configs/data_loader/winnow.yaml
 create mode 100644 winnow/configs/fdr_method/database_grounded.yaml
 create mode 100644 winnow/configs/fdr_method/nonparametric.yaml
 create mode 100644 winnow/configs/predict.yaml
 create mode 100644 winnow/configs/residues.yaml
 create mode 100644 winnow/configs/train.yaml

diff --git a/winnow/configs/calibrator.yaml b/winnow/configs/calibrator.yaml
new file mode 100644
index 0000000..0816814
--- /dev/null
+++ b/winnow/configs/calibrator.yaml
@@ -0,0 +1,48 @@
+# --- Calibrator configuration ---
+
+calibrator:
+  _target_: winnow.calibration.calibrator.ProbabilityCalibrator
+
+  seed: 42
+  hidden_layer_sizes: [50, 50]  # The number of neurons in each hidden layer of the MLP classifier.
+  learning_rate_init: 0.001  # The initial learning rate for the MLP classifier.
+  alpha: 0.0001  # L2 regularisation parameter for the MLP classifier.
+  max_iter: 1000  # Maximum number of training iterations for the MLP classifier.
+  early_stopping: true  # Whether to use early stopping to terminate training.
+  validation_fraction: 0.1  # Proportion of training data to use for early stopping validation.
+
+  features:
+    mass_error:
+      _target_: winnow.calibration.calibration_features.MassErrorFeature
+      residue_masses: ${residue_masses}  # The residue masses to use for the mass error feature.
+
+    prosit_features:
+      _target_: winnow.calibration.calibration_features.PrositFeatures
+      mz_tolerance: 0.02
+      learn_from_missing: true  # Whether to learn from missing Prosit features. If False, errors will be raised when invalid spectra are encountered.
+      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit features.
+      prosit_intensity_model_name: Prosit_2020_intensity_HCD  # The name of the Prosit intensity model to use.
+
+    retention_time_feature:
+      _target_: winnow.calibration.calibration_features.RetentionTimeFeature
+      hidden_dim: 10  # The hidden dimension size for the MLP regressor used to predict iRT from observed retention times.
+      train_fraction: 0.1  # The fraction of the data to use for training the iRT predictor.
+      learn_from_missing: true  # Whether to learn from missing retention time features. If False, errors will be raised when invalid spectra are encountered.
+      seed: 42  # Random seed for the MLP regressor.
+      learning_rate_init: 0.001  # The initial learning rate for the MLP regressor.
+      alpha: 0.0001  # L2 regularisation parameter for the MLP regressor.
+      max_iter: 200  # Maximum number of training iterations for the MLP regressor.
+      early_stopping: false  # Whether to use early stopping for the MLP regressor.
+      validation_fraction: 0.1  # Proportion of training data to use for early stopping validation.
+      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit iRT features.
+      prosit_irt_model_name: Prosit_2019_irt  # The name of the Prosit iRT model to use.
+
+    chimeric_features:
+      _target_: winnow.calibration.calibration_features.ChimericFeatures
+      mz_tolerance: 0.02
+      learn_from_missing: true  # Whether to learn from missing chimeric features. If False, errors will be raised when invalid spectra are encountered.
+      invalid_prosit_tokens: ${invalid_prosit_tokens}  # The tokens to consider as invalid for Prosit chimeric intensity features.
+      prosit_intensity_model_name: Prosit_2020_intensity_HCD  # The name of the Prosit intensity model to use.
+
+    beam_features:
+      _target_: winnow.calibration.calibration_features.BeamFeatures
diff --git a/winnow/configs/data_loader/instanovo.yaml b/winnow/configs/data_loader/instanovo.yaml
new file mode 100644
index 0000000..caf142e
--- /dev/null
+++ b/winnow/configs/data_loader/instanovo.yaml
@@ -0,0 +1,23 @@
+# --- InstaNovo data loading configuration ---
+
+_target_: winnow.datasets.data_loaders.InstaNovoDatasetLoader
+
+residue_masses: ${residue_masses}
+residue_remapping:  # Used to map InstaNovo legacy notations to UNIMOD tokens.
+  "M(ox)": "M[UNIMOD:35]"  # Oxidation
+  "M(+15.99)": "M[UNIMOD:35]"  # Oxidation
+  "S(p)": "S[UNIMOD:21]"  # Phosphorylation
+  "T(p)": "T[UNIMOD:21]"  # Phosphorylation
+  "Y(p)": "Y[UNIMOD:21]"  # Phosphorylation
+  "S(+79.97)": "S[UNIMOD:21]"  # Phosphorylation
+  "T(+79.97)": "T[UNIMOD:21]"  # Phosphorylation
+  "Y(+79.97)": "Y[UNIMOD:21]"  # Phosphorylation
+  "Q(+0.98)": "Q[UNIMOD:7]"  # Deamidation
+  "N(+0.98)": "N[UNIMOD:7]"  # Deamidation
+  "Q(+.98)": "Q[UNIMOD:7]"  # Deamidation
+  "N(+.98)": "N[UNIMOD:7]"  # Deamidation
+  "C(+57.02)": "C[UNIMOD:4]"  # Carbamidomethylation
+  # N-terminal modifications.
+  "(+42.01)": "[UNIMOD:1]"  # Acetylation
+  "(+43.01)": "[UNIMOD:5]"  # Carbamylation
+  "(-17.03)": "[UNIMOD:385]"  # Ammonia loss
diff --git a/winnow/configs/data_loader/mztab.yaml b/winnow/configs/data_loader/mztab.yaml
new file mode 100644
index 0000000..dc184ea
--- /dev/null
+++ b/winnow/configs/data_loader/mztab.yaml
@@ -0,0 +1,20 @@
+# --- MZTab data loading configuration ---
+_target_: winnow.datasets.data_loaders.MZTabDatasetLoader
+
+residue_masses: ${residue_masses}
+residue_remapping:  # Used to map Casanovo-specific notations to UNIMOD tokens.
+  "M+15.995": "M[UNIMOD:35]"  # Oxidation
+  "Q+0.984": "Q[UNIMOD:7]"  # Deamidation
+  "N+0.984": "N[UNIMOD:7]"  # Deamidation
+  "+42.011": "[UNIMOD:1]"  # Acetylation
+  "+43.006": "[UNIMOD:5]"  # Carbamylation
+  "-17.027": "[UNIMOD:385]"  # Ammonia loss
+  "C+57.021": "C[UNIMOD:4]"  # Carbamidomethylation
+  "C[Carbamidomethyl]": "C[UNIMOD:4]"  # Carbamidomethylation
+  "M[Oxidation]": "M[UNIMOD:35]"  # Oxidation
+  "N[Deamidated]": "N[UNIMOD:7]"  # Deamidation
+  "Q[Deamidated]": "Q[UNIMOD:7]"  # Deamidation
+  # N-terminal modifications.
+  "[Acetyl]-": "[UNIMOD:1]"  # Acetylation
+  "[Carbamyl]-": "[UNIMOD:5]"  # Carbamylation
+  "[Ammonia-loss]-": "[UNIMOD:385]"  # Ammonia loss
diff --git a/winnow/configs/data_loader/pointnovo.yaml b/winnow/configs/data_loader/pointnovo.yaml
new file mode 100644
index 0000000..022691a
--- /dev/null
+++ b/winnow/configs/data_loader/pointnovo.yaml
@@ -0,0 +1,5 @@
+# --- PointNovo data loading configuration ---
+
+_target_: winnow.datasets.data_loaders.PointNovoDatasetLoader
+
+residue_masses: ${residue_masses}
diff --git a/winnow/configs/data_loader/winnow.yaml b/winnow/configs/data_loader/winnow.yaml
new file mode 100644
index 0000000..dbfb632
--- /dev/null
+++ b/winnow/configs/data_loader/winnow.yaml
@@ -0,0 +1,7 @@
+# --- Winnow data loading configuration ---
+
+_target_: winnow.datasets.data_loaders.WinnowDatasetLoader
+
+residue_masses: ${residue_masses}
+# The internal Winnow dataset loader does not need a residue remapping
+# since it uses the UNIMOD tokens directly.
diff --git a/winnow/configs/fdr_method/database_grounded.yaml b/winnow/configs/fdr_method/database_grounded.yaml
new file mode 100644
index 0000000..41a6cc3
--- /dev/null
+++ b/winnow/configs/fdr_method/database_grounded.yaml
@@ -0,0 +1,8 @@
+# --- Database-grounded FDR control configuration ---
+
+_target_: winnow.fdr.database_grounded.DatabaseGroundedFDRControl
+
+confidence_feature: ${fdr_control.confidence_column}  # Name of the column with confidence scores to use for FDR estimation.
+residue_masses: ${residue_masses}  # The residue masses from global `residues` config
+isotope_error_range: [0, 1]  # The isotope error range for matching peptides
+drop: 10  # The number of top predictions to drop for stability
diff --git a/winnow/configs/fdr_method/nonparametric.yaml b/winnow/configs/fdr_method/nonparametric.yaml
new file mode 100644
index 0000000..2d8c5a3
--- /dev/null
+++ b/winnow/configs/fdr_method/nonparametric.yaml
@@ -0,0 +1,3 @@
+# --- Non-parametric FDR control configuration ---
+
+_target_: winnow.fdr.nonparametric.NonParametricFDRControl
diff --git a/winnow/configs/predict.yaml b/winnow/configs/predict.yaml
new file mode 100644
index 0000000..fa53a6a
--- /dev/null
+++ b/winnow/configs/predict.yaml
@@ -0,0 +1,38 @@
+# --- Predicting scores and applying FDR control ---
+defaults:
+  - _self_
+  - residues
+  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
+  - fdr_method: nonparametric  # Options: nonparametric, database_grounded
+
+# --- Pipeline Execution Configuration ---
+
+dataset:
+  # Dataset paths:
+  # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
+  spectrum_path_or_directory: examples/example_data/spectra.ipc
+  # Path to the beam predictions file.
+  # Leave as `null` if data source is `winnow`, or loading will fail.
+  predictions_path: examples/example_data/predictions.csv
+  # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
+
+calibrator:
+  # Model loading:
+  # Path to the local calibrator directory or the HuggingFace model identifier.
+  # If the path is a local directory path, it will be used directly. If it is a HuggingFace repository identifier, it will be downloaded from HuggingFace.
+  pretrained_model_name_or_path: InstaDeepAI/winnow-general-model
+  # Directory to cache the HuggingFace model.
+  cache_dir: null  # can be set to `null` if using local model or for the default cache directory from HuggingFace.
+
+fdr_control:
+  # FDR settings:
+  # Target FDR threshold (e.g. 0.01 for 1%, 0.05 for 5% etc.).
+  fdr_threshold: 0.05
+  # Name of the column with confidence scores to use for FDR estimation.
+  confidence_column: calibrated_confidence
+
+# Folder path to write the outputs to.
+# This will create two CSV files in the output folder:
+# - metadata.csv: Contains all metadata and feature columns from the input dataset.
+# - preds_and_fdr_metrics.csv: Contains predictions and FDR metrics.
+output_folder: results/predictions
diff --git a/winnow/configs/residues.yaml b/winnow/configs/residues.yaml
new file mode 100644
index 0000000..d76d4a3
--- /dev/null
+++ b/winnow/configs/residues.yaml
@@ -0,0 +1,64 @@
+# --- Residues configuration ---
+
+# This is Winnow's internal residue representation.
+# We use this to calculate the mass error feature and during database-grounded FDR control.
+# We also use this to initialise the residue set for the Metrics class.
+residue_masses:
+  "G": 57.021464
+  "A": 71.037114
+  "S": 87.032028
+  "P": 97.052764
+  "V": 99.068414
+  "T": 101.047670
+  "C": 103.009185
+  "L": 113.084064
+  "I": 113.084064
+  "N": 114.042927
+  "D": 115.026943
+  "Q": 128.058578
+  "K": 128.094963
+  "E": 129.042593
+  "M": 131.040485
+  "H": 137.058912
+  "F": 147.068414
+  "R": 156.101111
+  "Y": 163.063329
+  "W": 186.079313
+  # Modifications
+  "M[UNIMOD:35]": 147.035400 # Oxidation
+  "C[UNIMOD:4]": 160.030649 # Carboxyamidomethylation
+  "N[UNIMOD:7]": 115.026943 # Deamidation
+  "Q[UNIMOD:7]": 129.042594 # Deamidation
+  "R[UNIMOD:7]": 157.085127 # Arginine citrullination
+  "P[UNIMOD:35]": 113.047679 # Proline hydroxylation
+  "S[UNIMOD:21]": 166.998028 # Phosphorylation + 79.966
+  "T[UNIMOD:21]": 181.01367 # Phosphorylation + 79.966
+  "Y[UNIMOD:21]": 243.029329 # Phosphorylation + 79.966
+  "C[UNIMOD:312]": 222.013284  # Cysteinylation
+  "E[UNIMOD:27]": 111.032028  # Glu -> pyro-Glu
+  "Q[UNIMOD:28]": 111.032029  # Gln -> pyro-Gln
+  # Terminal modifications
+  "[UNIMOD:1]": 42.010565 # Acetylation
+  "[UNIMOD:5]": 43.005814 # Carbamylation
+  "[UNIMOD:385]": -17.026549 # NH3 loss
+  "(+25.98)": 25.980265  # Carbamylation & NH3 loss (legacy notation)
+
+# The tokens to consider as invalid for Prosit features.
+# We also filter out non-carboxyamidomethylated Cysteine in a separate step.
+invalid_prosit_tokens:
+  # InstaNovo
+  - "[UNIMOD:7]"
+  - "[UNIMOD:21]"
+  - "[UNIMOD:1]"
+  - "[UNIMOD:5]"
+  - "[UNIMOD:385]"
+  - "(+25.98)"  # (legacy notation)
+  # Casanovo
+  - "+0.984"
+  - "+42.011"
+  - "+43.006"
+  - "-17.027"
+  - "[Ammonia-loss]-"
+  - "[Carbamyl]-"
+  - "[Acetyl]-"
+  - "[Deamidated]"
diff --git a/winnow/configs/train.yaml b/winnow/configs/train.yaml
new file mode 100644
index 0000000..839d3f8
--- /dev/null
+++ b/winnow/configs/train.yaml
@@ -0,0 +1,21 @@
+# --- Training a calibrator ---
+defaults:
+  - _self_
+  - residues
+  - calibrator
+  - data_loader: instanovo  # Options: instanovo, mztab, pointnovo, winnow
+
+# --- Pipeline Execution Configuration ---
+
+dataset:
+  # Dataset paths:
+  # Path to the spectrum data file or to folder containing saved internal Winnow dataset.
+  spectrum_path_or_directory: examples/example_data/spectra.ipc
+  # Path to the beam predictions file.
+  # Leave as `null` if data source is `winnow`, or loading will fail.
+  predictions_path: examples/example_data/predictions.csv
+  # NOTE: Make sure that the data loader type matches the data source type in this dataset section.
+
+# Output paths:
+model_output_dir: models/new_model
+dataset_output_path: results/calibrated_dataset.csv