NetherlandsForensicInstitute · wowtor · Jan 30, 2026 · Jan 30, 2026
diff --git a/lir/algorithms/isotonic_regression.py b/lir/algorithms/isotonic_regression.py
@@ -1,10 +1,13 @@
+from typing import Self
+
 import numpy as np
 import sklearn.isotonic
 from numpy.typing import ArrayLike
-from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.utils import check_array, check_consistent_length
 
-from lir.util import probability_to_logodds
+from lir import Transformer
+from lir.data.models import FeatureData, InstanceData, LLRData
+from lir.util import check_type, probability_to_logodds
 
 
 class IsotonicRegression(sklearn.isotonic.IsotonicRegression):
@@ -96,7 +99,7 @@ def transform(self, T: ArrayLike) -> np.ndarray:
         return res
 
 
-class IsotonicCalibrator(BaseEstimator, TransformerMixin):
+class IsotonicCalibrator(Transformer):
     """Calculate LR from a score belonging to one of two distributions using isotonic regression.
 
     Calculates a likelihood ratio of a score value, provided it is from one of
@@ -112,11 +115,13 @@ def __init__(self, add_misleading: int = 0):
         self.add_misleading = add_misleading
         self._ir = IsotonicRegression(out_of_bounds='clip')
 
-    def fit(self, X: np.ndarray, y: np.ndarray) -> 'IsotonicCalibrator':
+    def fit(self, instances: InstanceData) -> Self:
         """Allow fitting the estimator on the given data."""
-        assert np.all(np.unique(y) == np.arange(2)), 'y labels must be 0 and 1'
+        y = instances.check_both_labels()
+        instances = check_type(FeatureData, instances).replace_as(LLRData)
 
         # prevent extreme LRs
+        X = instances.llrs
         if self.add_misleading > 0:
             X = np.concatenate(
                 [
@@ -133,8 +138,12 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> 'IsotonicCalibrator':
 
         return self
 
-    def transform(self, X: np.ndarray) -> np.ndarray:
+    def apply(self, instances: InstanceData) -> LLRData:
         """Transform a given value, using the fitted Isotonic Regression model."""
-        self.p1 = self._ir.transform(X)
-        self.p0 = 1 - self.p1
-        return probability_to_logodds(self.p1)
+        instances = check_type(FeatureData, instances).replace_as(LLRData)
+        probs = self._ir.transform(instances.llrs)
+        return instances.replace_as(LLRData, features=probability_to_logodds(probs).reshape(-1, 1))
+
+    def fit_apply(self, instances: InstanceData) -> LLRData:
+        """Fit and apply the calibrator to the given data."""
+        return self.fit(instances).apply(instances)
diff --git a/lir/data/models.py b/lir/data/models.py
@@ -122,6 +122,19 @@ def __getitem__(self, indexes: np.ndarray | int) -> Self:
     def __add__(self, other: 'InstanceData') -> Self:
         return self.concatenate(other)
 
+    def check_both_labels(self) -> np.ndarray:
+        """
+        Return labels or raise an error if they are missing or if they do not represent both hypotheses.
+
+        :return: the labels
+        :raise: ValueError if hypothesis labels are missing or either label is not represented.
+        """
+        if self.labels is None:
+            raise ValueError('labels not set')
+        if not np.all(np.unique(self.labels) == np.arange(2)):
+            raise ValueError(f'not all classes are represented; labels found: {np.unique(self.labels)}')
+        return self.labels
+
     @classmethod
     def _concatenate_field(cls, field: str, values: list[Any]) -> Any:
         if len(values) == 0:

diff --git a/lir/metrics/__init__.py b/lir/metrics/__init__.py
@@ -36,12 +36,10 @@ def cllr_min(llr_data: LLRData, weights: tuple[float, float] = (1, 1)) -> float:
     :param weights: the relative weights of the classes
     :return: CLLR_min, a measure of discrimination
     """
-    llrs, y = llr_data.llrs, llr_data.require_labels
-
     cal = IsotonicCalibrator()
-    llrmin = cal.fit_transform(llrs, y)
+    llrmin = cal.fit_apply(llr_data)
 
-    return cllr(LLRData(features=llrmin, labels=y), weights)
+    return cllr(llrmin, weights)
 
 
 def cllr_cal(llr_data: LLRData, weights: tuple[float, float] = (1, 1)) -> float:

diff --git a/lir/metrics/devpav.py b/lir/metrics/devpav.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from lir.algorithms.isotonic_regression import IsotonicCalibrator
+from lir.data.models import LLRData
 from lir.util import Xy_to_Xn, logodds_to_odds
 
 
@@ -119,7 +120,7 @@ def _devpavcalculator(lrs: np.ndarray, pav_lrs: np.ndarray, y: np.ndarray) -> fl
         return np.nan
 
     if len(X) == 1:
-        return abs(X - Y)
+        return abs(X[0] - Y[0])
 
     # Actual devPAV calculation
     surface = sum(_calcsurface((X[i - 1], Y[i - 1]), (X[i], Y[i])) for i in range(1, len(X)))
@@ -128,10 +129,9 @@ def _devpavcalculator(lrs: np.ndarray, pav_lrs: np.ndarray, y: np.ndarray) -> fl
     return surface / deltaX
 
 
-def devpav(llrs: np.ndarray, y: np.ndarray) -> float:
+def devpav(llrs: LLRData) -> float:
     """Calculates devPAV for LR data under H1 and H2."""
-    if all(y) or not any(y):
-        raise ValueError('devpav: illegal input: at least one value is required for each class')
+    labels = llrs.check_both_labels()
     cal = IsotonicCalibrator()
-    pavllrs = cal.fit_transform(llrs, y)
-    return _devpavcalculator(logodds_to_odds(llrs), logodds_to_odds(pavllrs), y)
+    pavllrs = cal.fit_apply(llrs)
+    return _devpavcalculator(logodds_to_odds(llrs.llrs), logodds_to_odds(pavllrs.llrs), labels)
diff --git a/lir/plotting/__init__.py b/lir/plotting/__init__.py
@@ -137,7 +137,7 @@ def pav(
     y = llrdata.labels
 
     pav = IsotonicCalibrator(add_misleading=add_misleading)
-    pav_llrs = pav.fit_transform(llrs, y)
+    pav_llrs = pav.fit_apply(llrdata).llrs
 
     xrange = yrange = [
         llrs[llrs != -np.inf].min() - 0.5,
@@ -150,7 +150,7 @@ def pav(
 
     # line pre pav llrs x and post pav llrs y
     line_x = np.arange(*xrange, 0.01)
-    line_y = pav.transform(line_x)
+    line_y = pav.apply(LLRData(features=line_x.reshape(-1, 1))).llrs
 
     # filter nan values, happens when values are out of bound (x_values out of training domain for pav)
     # see: https://scikit-learn.org/stable/modules/generated/sklearn.isotonic.IsotonicRegression.html

diff --git a/lir/plotting/expected_calibration_error.py b/lir/plotting/expected_calibration_error.py
@@ -53,10 +53,7 @@ def plot_ece(
         - 'zoomed':   starts at 0 and ends slightly (10%) above the maximum ECE value of the LRs. This may cut off part
                        of the 'non-informative' reference line.
     """
-    llrs = llrdata.llrs
-    labels = llrdata.labels
-    if labels is None:
-        raise ValueError('LLRData must contain labels to plot ECE.')
+    labels = llrdata.require_labels
 
     log_prior_odds = np.arange(*log_prior_odds_range, 0.01)
     prior_odds = np.power(10, log_prior_odds)
@@ -71,7 +68,7 @@ def plot_ece(
         )
 
     # plot LRs
-    ece_values = calculate_ece(logodds_to_odds(llrs), labels, odds_to_probability(prior_odds))
+    ece_values = calculate_ece(logodds_to_odds(llrdata.llrs), labels, odds_to_probability(prior_odds))
     ax.plot(
         log_prior_odds,
         ece_values,
@@ -81,10 +78,10 @@ def plot_ece(
 
     if show_pav:
         # plot PAV LRs
-        pav_llrs = IsotonicCalibrator().fit_transform(llrs, labels)
+        pav_llrs = IsotonicCalibrator().fit_apply(llrdata)
         ax.plot(
             log_prior_odds,
-            calculate_ece(logodds_to_odds(pav_llrs), labels, odds_to_probability(prior_odds)),
+            calculate_ece(logodds_to_odds(pav_llrs.llrs), labels, odds_to_probability(prior_odds)),
             linestyle='--',
             label='PAV LRs',
         )

diff --git a/tests/test_4pl_model.py b/tests/test_4pl_model.py
@@ -48,7 +48,7 @@ def test_pl_1_is_0(self):
         four_pl_model.fit(instances)
         logodds = four_pl_model.apply(instances)
 
-        np.testing.assert_almost_equal(devpav(logodds.llrs, logodds.labels), 0.12029952948152635, decimal=5)
+        np.testing.assert_almost_equal(devpav(logodds), 0.12029952948152635, decimal=5)
 
     def test_pl_0_is_1(self):
         instances = self.get_instances()
@@ -59,7 +59,7 @@ def test_pl_0_is_1(self):
         four_pl_model.fit(instances)
         logodds = four_pl_model.apply(instances)
 
-        np.testing.assert_almost_equal(devpav(logodds.llrs, logodds.labels), 0.15273304557837525, decimal=5)
+        np.testing.assert_almost_equal(devpav(logodds), 0.15273304557837525, decimal=5)
 
     def test_pl_0_is_1_and_pl_1_is_0(self):
         instances = self.get_instances()
@@ -72,7 +72,7 @@ def test_pl_0_is_1_and_pl_1_is_0(self):
         four_pl_model.fit(instances)
 
         logodds = four_pl_model.apply(instances)
-        np.testing.assert_almost_equal(devpav(logodds.llrs, logodds.labels), 0.10475112893952891, decimal=5)
+        np.testing.assert_almost_equal(devpav(logodds), 0.10475112893952891, decimal=5)
 
 
 if __name__ == '__main__':

diff --git a/tests/test_calibration.py b/tests/test_calibration.py
@@ -37,8 +37,8 @@ def test_lr_1(self):
         score_class1 = np.arange(0, 1, 0.1)
         X, y = Xn_to_Xy(score_class0, score_class1)
         irc = IsotonicCalibrator()
-        llrs = irc.fit_transform(probability_to_logodds(X), y)
-        lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs), y)
+        llrs = irc.fit_apply(LLRData(features=probability_to_logodds(X).reshape(-1, 1), labels=y))
+        lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs.llrs), y)
         self.assertEqual(score_class0.shape, lr0.shape)
         self.assertEqual(score_class1.shape, lr1.shape)
         np.testing.assert_almost_equal(lr0, [1.0] * lr0.shape[0])
@@ -51,8 +51,8 @@ def run_cllrmin(self, lr0, lr1, places=7):
         cllr = _cllr(lr0, lr1)
 
         irc = IsotonicCalibrator()
-        llrs = irc.fit_transform(odds_to_probability(X), y)
-        lrmin0, lrmin1 = Xy_to_Xn(logodds_to_odds(llrs), y)
+        llrs = irc.fit_apply(LLRData(features=odds_to_probability(X).reshape(-1, 1), labels=y))
+        lrmin0, lrmin1 = Xy_to_Xn(logodds_to_odds(llrs.llrs), y)
 
         cllrmin = _cllr(lrmin0, lrmin1)
 
@@ -76,8 +76,8 @@ def test_lr_almost_1(self):
         score_class1 = np.arange(0.05, 1.05, 0.1)
         X, y = Xn_to_Xy(score_class0, score_class1)
         irc = IsotonicCalibrator()
-        llrs = irc.fit_transform(X, y)
-        lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs), y)
+        llrs = irc.fit_apply(LLRData(features=X.reshape(-1, 1), labels=y))
+        lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs.llrs), y)
         self.assertEqual(score_class0.shape, lr0.shape)
         self.assertEqual(score_class1.shape, lr1.shape)
         np.testing.assert_almost_equal(lr0, np.concatenate([[0], [1.0] * (lr0.shape[0] - 1)]))

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -68,49 +68,48 @@ def test_illegal_cllr(h1_llrs, h2_llrs):
 
 class TestDevPAV(unittest.TestCase):
     def test_devpav_error(self):
-        lrs = np.ones(10)
+        lrs = np.ones((10, 1))
         llrs = odds_to_logodds(lrs)
         y = np.concatenate([np.ones(10)])
         with self.assertRaises(ValueError):
-            devpav(llrs, y)
+            devpav(LLRData(features=llrs, labels=y))
 
     def test_devpav(self):
         # naive system
-        lrs = np.ones(10)
-        llrs = odds_to_logodds(lrs)
+        llrs = np.zeros((10, 1))
         y = np.concatenate([np.ones(5), np.zeros(5)])
-        self.assertEqual(devpav(llrs, y), 0)
+        self.assertEqual(devpav(LLRData(features=llrs, labels=y)), 0)
 
         # badly calibrated naive system
         lrs = 2 * np.ones(10)
         llrs = odds_to_logodds(lrs)
         y = np.concatenate([np.ones(5), np.zeros(5)])
-        self.assertEqual(devpav(llrs, y), np.log10(2))
+        self.assertEqual(devpav(LLRData(features=llrs, labels=y)), np.log10(2))
 
         # infinitely bad calibration
         lrs = np.array([5, 5, 5, 0.2, 0.2, 0.2, np.inf])
         llrs = odds_to_logodds(lrs)
         y = np.concatenate([np.ones(3), np.zeros(4)])
-        self.assertEqual(devpav(llrs, y), np.inf)
+        self.assertEqual(devpav(LLRData(features=llrs, labels=y)), np.inf)
 
         # binary system
         lrs = np.array([5, 5, 5, 0.2, 5, 0.2, 0.2, 0.2])
         llrs = odds_to_logodds(lrs)
         y = np.concatenate([np.ones(4), np.zeros(4)])
-        self.assertAlmostEqual(devpav(llrs, y), (np.log10(5) - np.log10(3)) / 2)
+        self.assertAlmostEqual(devpav(LLRData(features=llrs, labels=y)), (np.log10(5) - np.log10(3)) / 2)
 
         # somewhat normal
         lrs = np.array([6, 5, 5, 0.2, 5, 0.2, 0.2, 0.1])
         llrs = odds_to_logodds(lrs)
         y = np.concatenate([np.ones(4), np.zeros(4)])
-        self.assertAlmostEqual(devpav(llrs, y), (np.log10(5) - np.log10(2)) / 2)
+        self.assertAlmostEqual(devpav(LLRData(features=llrs, labels=y)), (np.log10(5) - np.log10(2)) / 2)
 
         # test on dummy data 3 #######################
         lrs_same = (0.1, 100)
         lrs_dif = (10**-2, 10)
         lrs, y = Xn_to_Xy(lrs_dif, lrs_same)
         llrs = odds_to_logodds(lrs)
-        self.assertEqual(devpav(llrs, y), 0.5)
+        self.assertEqual(devpav(LLRData(features=llrs, labels=y)), 0.5)
 
 
 class TestDevpavcalculator(unittest.TestCase):