diff --git a/lir/algorithms/isotonic_regression.py b/lir/algorithms/isotonic_regression.py index 576fed3a..84b32b70 100644 --- a/lir/algorithms/isotonic_regression.py +++ b/lir/algorithms/isotonic_regression.py @@ -1,10 +1,13 @@ +from typing import Self + import numpy as np import sklearn.isotonic from numpy.typing import ArrayLike -from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils import check_array, check_consistent_length -from lir.util import probability_to_logodds +from lir import Transformer +from lir.data.models import FeatureData, InstanceData, LLRData +from lir.util import check_type, probability_to_logodds class IsotonicRegression(sklearn.isotonic.IsotonicRegression): @@ -96,7 +99,7 @@ def transform(self, T: ArrayLike) -> np.ndarray: return res -class IsotonicCalibrator(BaseEstimator, TransformerMixin): +class IsotonicCalibrator(Transformer): """Calculate LR from a score belonging to one of two distributions using isotonic regression. Calculates a likelihood ratio of a score value, provided it is from one of @@ -112,11 +115,13 @@ def __init__(self, add_misleading: int = 0): self.add_misleading = add_misleading self._ir = IsotonicRegression(out_of_bounds='clip') - def fit(self, X: np.ndarray, y: np.ndarray) -> 'IsotonicCalibrator': + def fit(self, instances: InstanceData) -> Self: """Allow fitting the estimator on the given data.""" - assert np.all(np.unique(y) == np.arange(2)), 'y labels must be 0 and 1' + y = instances.check_both_labels() + instances = check_type(FeatureData, instances).replace_as(LLRData) # prevent extreme LRs + X = instances.llrs if self.add_misleading > 0: X = np.concatenate( [ @@ -133,8 +138,12 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> 'IsotonicCalibrator': return self - def transform(self, X: np.ndarray) -> np.ndarray: + def apply(self, instances: InstanceData) -> LLRData: """Transform a given value, using the fitted Isotonic Regression model.""" - self.p1 = self._ir.transform(X) - self.p0 = 1 - self.p1 - return probability_to_logodds(self.p1) + instances = check_type(FeatureData, instances).replace_as(LLRData) + probs = self._ir.transform(instances.llrs) + return instances.replace_as(LLRData, features=probability_to_logodds(probs).reshape(-1, 1)) + + def fit_apply(self, instances: InstanceData) -> LLRData: + """Fit and apply the calibrator to the given data.""" + return self.fit(instances).apply(instances) diff --git a/lir/data/models.py b/lir/data/models.py index 0a511b11..27d497d5 100644 --- a/lir/data/models.py +++ b/lir/data/models.py @@ -122,6 +122,19 @@ def __getitem__(self, indexes: np.ndarray | int) -> Self: def __add__(self, other: 'InstanceData') -> Self: return self.concatenate(other) + def check_both_labels(self) -> np.ndarray: + """ + Return labels or raise an error if they are missing or if they do not represent both hypotheses. + + :return: the labels + :raise: ValueError if hypothesis labels are missing or either label is not represented. + """ + if self.labels is None: + raise ValueError('labels not set') + if not np.all(np.unique(self.labels) == np.arange(2)): + raise ValueError(f'not all classes are represented; labels found: {np.unique(self.labels)}') + return self.labels + @classmethod def _concatenate_field(cls, field: str, values: list[Any]) -> Any: if len(values) == 0: diff --git a/lir/metrics/__init__.py b/lir/metrics/__init__.py index 5cb9f117..06bbbece 100644 --- a/lir/metrics/__init__.py +++ b/lir/metrics/__init__.py @@ -36,12 +36,10 @@ def cllr_min(llr_data: LLRData, weights: tuple[float, float] = (1, 1)) -> float: :param weights: the relative weights of the classes :return: CLLR_min, a measure of discrimination """ - llrs, y = llr_data.llrs, llr_data.require_labels - cal = IsotonicCalibrator() - llrmin = cal.fit_transform(llrs, y) + llrmin = cal.fit_apply(llr_data) - return cllr(LLRData(features=llrmin, labels=y), weights) + return cllr(llrmin, weights) def cllr_cal(llr_data: LLRData, weights: tuple[float, float] = (1, 1)) -> float: diff --git a/lir/metrics/devpav.py b/lir/metrics/devpav.py index 604eac28..1e6ed24e 100644 --- a/lir/metrics/devpav.py +++ b/lir/metrics/devpav.py @@ -1,6 +1,7 @@ import numpy as np from lir.algorithms.isotonic_regression import IsotonicCalibrator +from lir.data.models import LLRData from lir.util import Xy_to_Xn, logodds_to_odds @@ -119,7 +120,7 @@ def _devpavcalculator(lrs: np.ndarray, pav_lrs: np.ndarray, y: np.ndarray) -> fl return np.nan if len(X) == 1: - return abs(X - Y) + return abs(X[0] - Y[0]) # Actual devPAV calculation surface = sum(_calcsurface((X[i - 1], Y[i - 1]), (X[i], Y[i])) for i in range(1, len(X))) @@ -128,10 +129,9 @@ def _devpavcalculator(lrs: np.ndarray, pav_lrs: np.ndarray, y: np.ndarray) -> fl return surface / deltaX -def devpav(llrs: np.ndarray, y: np.ndarray) -> float: +def devpav(llrs: LLRData) -> float: """Calculates devPAV for LR data under H1 and H2.""" - if all(y) or not any(y): - raise ValueError('devpav: illegal input: at least one value is required for each class') + labels = llrs.check_both_labels() cal = IsotonicCalibrator() - pavllrs = cal.fit_transform(llrs, y) - return _devpavcalculator(logodds_to_odds(llrs), logodds_to_odds(pavllrs), y) + pavllrs = cal.fit_apply(llrs) + return _devpavcalculator(logodds_to_odds(llrs.llrs), logodds_to_odds(pavllrs.llrs), labels) diff --git a/lir/plotting/__init__.py b/lir/plotting/__init__.py index 0e133b47..ed02e279 100644 --- a/lir/plotting/__init__.py +++ b/lir/plotting/__init__.py @@ -137,7 +137,7 @@ def pav( y = llrdata.labels pav = IsotonicCalibrator(add_misleading=add_misleading) - pav_llrs = pav.fit_transform(llrs, y) + pav_llrs = pav.fit_apply(llrdata).llrs xrange = yrange = [ llrs[llrs != -np.inf].min() - 0.5, @@ -150,7 +150,7 @@ def pav( # line pre pav llrs x and post pav llrs y line_x = np.arange(*xrange, 0.01) - line_y = pav.transform(line_x) + line_y = pav.apply(LLRData(features=line_x.reshape(-1, 1))).llrs # filter nan values, happens when values are out of bound (x_values out of training domain for pav) # see: https://scikit-learn.org/stable/modules/generated/sklearn.isotonic.IsotonicRegression.html diff --git a/lir/plotting/expected_calibration_error.py b/lir/plotting/expected_calibration_error.py index f70953b2..4dda8c1c 100644 --- a/lir/plotting/expected_calibration_error.py +++ b/lir/plotting/expected_calibration_error.py @@ -53,10 +53,7 @@ def plot_ece( - 'zoomed': starts at 0 and ends slightly (10%) above the maximum ECE value of the LRs. This may cut off part of the 'non-informative' reference line. """ - llrs = llrdata.llrs - labels = llrdata.labels - if labels is None: - raise ValueError('LLRData must contain labels to plot ECE.') + labels = llrdata.require_labels log_prior_odds = np.arange(*log_prior_odds_range, 0.01) prior_odds = np.power(10, log_prior_odds) @@ -71,7 +68,7 @@ def plot_ece( ) # plot LRs - ece_values = calculate_ece(logodds_to_odds(llrs), labels, odds_to_probability(prior_odds)) + ece_values = calculate_ece(logodds_to_odds(llrdata.llrs), labels, odds_to_probability(prior_odds)) ax.plot( log_prior_odds, ece_values, @@ -81,10 +78,10 @@ def plot_ece( if show_pav: # plot PAV LRs - pav_llrs = IsotonicCalibrator().fit_transform(llrs, labels) + pav_llrs = IsotonicCalibrator().fit_apply(llrdata) ax.plot( log_prior_odds, - calculate_ece(logodds_to_odds(pav_llrs), labels, odds_to_probability(prior_odds)), + calculate_ece(logodds_to_odds(pav_llrs.llrs), labels, odds_to_probability(prior_odds)), linestyle='--', label='PAV LRs', ) diff --git a/tests/test_4pl_model.py b/tests/test_4pl_model.py index 0ed12773..74a9c808 100644 --- a/tests/test_4pl_model.py +++ b/tests/test_4pl_model.py @@ -48,7 +48,7 @@ def test_pl_1_is_0(self): four_pl_model.fit(instances) logodds = four_pl_model.apply(instances) - np.testing.assert_almost_equal(devpav(logodds.llrs, logodds.labels), 0.12029952948152635, decimal=5) + np.testing.assert_almost_equal(devpav(logodds), 0.12029952948152635, decimal=5) def test_pl_0_is_1(self): instances = self.get_instances() @@ -59,7 +59,7 @@ def test_pl_0_is_1(self): four_pl_model.fit(instances) logodds = four_pl_model.apply(instances) - np.testing.assert_almost_equal(devpav(logodds.llrs, logodds.labels), 0.15273304557837525, decimal=5) + np.testing.assert_almost_equal(devpav(logodds), 0.15273304557837525, decimal=5) def test_pl_0_is_1_and_pl_1_is_0(self): instances = self.get_instances() @@ -72,7 +72,7 @@ def test_pl_0_is_1_and_pl_1_is_0(self): four_pl_model.fit(instances) logodds = four_pl_model.apply(instances) - np.testing.assert_almost_equal(devpav(logodds.llrs, logodds.labels), 0.10475112893952891, decimal=5) + np.testing.assert_almost_equal(devpav(logodds), 0.10475112893952891, decimal=5) if __name__ == '__main__': diff --git a/tests/test_calibration.py b/tests/test_calibration.py index f5c9ad01..3de46e0e 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -37,8 +37,8 @@ def test_lr_1(self): score_class1 = np.arange(0, 1, 0.1) X, y = Xn_to_Xy(score_class0, score_class1) irc = IsotonicCalibrator() - llrs = irc.fit_transform(probability_to_logodds(X), y) - lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs), y) + llrs = irc.fit_apply(LLRData(features=probability_to_logodds(X).reshape(-1, 1), labels=y)) + lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs.llrs), y) self.assertEqual(score_class0.shape, lr0.shape) self.assertEqual(score_class1.shape, lr1.shape) np.testing.assert_almost_equal(lr0, [1.0] * lr0.shape[0]) @@ -51,8 +51,8 @@ def run_cllrmin(self, lr0, lr1, places=7): cllr = _cllr(lr0, lr1) irc = IsotonicCalibrator() - llrs = irc.fit_transform(odds_to_probability(X), y) - lrmin0, lrmin1 = Xy_to_Xn(logodds_to_odds(llrs), y) + llrs = irc.fit_apply(LLRData(features=odds_to_probability(X).reshape(-1, 1), labels=y)) + lrmin0, lrmin1 = Xy_to_Xn(logodds_to_odds(llrs.llrs), y) cllrmin = _cllr(lrmin0, lrmin1) @@ -76,8 +76,8 @@ def test_lr_almost_1(self): score_class1 = np.arange(0.05, 1.05, 0.1) X, y = Xn_to_Xy(score_class0, score_class1) irc = IsotonicCalibrator() - llrs = irc.fit_transform(X, y) - lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs), y) + llrs = irc.fit_apply(LLRData(features=X.reshape(-1, 1), labels=y)) + lr0, lr1 = Xy_to_Xn(logodds_to_odds(llrs.llrs), y) self.assertEqual(score_class0.shape, lr0.shape) self.assertEqual(score_class1.shape, lr1.shape) np.testing.assert_almost_equal(lr0, np.concatenate([[0], [1.0] * (lr0.shape[0] - 1)])) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 5ca57b35..7c1ac5bb 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -68,49 +68,48 @@ def test_illegal_cllr(h1_llrs, h2_llrs): class TestDevPAV(unittest.TestCase): def test_devpav_error(self): - lrs = np.ones(10) + lrs = np.ones((10, 1)) llrs = odds_to_logodds(lrs) y = np.concatenate([np.ones(10)]) with self.assertRaises(ValueError): - devpav(llrs, y) + devpav(LLRData(features=llrs, labels=y)) def test_devpav(self): # naive system - lrs = np.ones(10) - llrs = odds_to_logodds(lrs) + llrs = np.zeros((10, 1)) y = np.concatenate([np.ones(5), np.zeros(5)]) - self.assertEqual(devpav(llrs, y), 0) + self.assertEqual(devpav(LLRData(features=llrs, labels=y)), 0) # badly calibrated naive system lrs = 2 * np.ones(10) llrs = odds_to_logodds(lrs) y = np.concatenate([np.ones(5), np.zeros(5)]) - self.assertEqual(devpav(llrs, y), np.log10(2)) + self.assertEqual(devpav(LLRData(features=llrs, labels=y)), np.log10(2)) # infinitely bad calibration lrs = np.array([5, 5, 5, 0.2, 0.2, 0.2, np.inf]) llrs = odds_to_logodds(lrs) y = np.concatenate([np.ones(3), np.zeros(4)]) - self.assertEqual(devpav(llrs, y), np.inf) + self.assertEqual(devpav(LLRData(features=llrs, labels=y)), np.inf) # binary system lrs = np.array([5, 5, 5, 0.2, 5, 0.2, 0.2, 0.2]) llrs = odds_to_logodds(lrs) y = np.concatenate([np.ones(4), np.zeros(4)]) - self.assertAlmostEqual(devpav(llrs, y), (np.log10(5) - np.log10(3)) / 2) + self.assertAlmostEqual(devpav(LLRData(features=llrs, labels=y)), (np.log10(5) - np.log10(3)) / 2) # somewhat normal lrs = np.array([6, 5, 5, 0.2, 5, 0.2, 0.2, 0.1]) llrs = odds_to_logodds(lrs) y = np.concatenate([np.ones(4), np.zeros(4)]) - self.assertAlmostEqual(devpav(llrs, y), (np.log10(5) - np.log10(2)) / 2) + self.assertAlmostEqual(devpav(LLRData(features=llrs, labels=y)), (np.log10(5) - np.log10(2)) / 2) # test on dummy data 3 ####################### lrs_same = (0.1, 100) lrs_dif = (10**-2, 10) lrs, y = Xn_to_Xy(lrs_dif, lrs_same) llrs = odds_to_logodds(lrs) - self.assertEqual(devpav(llrs, y), 0.5) + self.assertEqual(devpav(LLRData(features=llrs, labels=y)), 0.5) class TestDevpavcalculator(unittest.TestCase):