From 2e40a95a7743e489a0ea58b3b529cfac5746d408 Mon Sep 17 00:00:00 2001
From: Leen van der Ham <l.van.der.ham@nfi.nl>
Date: Tue, 25 Nov 2025 14:28:28 +0100
Subject: [PATCH 1/7] First do bounding per LR-system, then calculate
 percentiles.

---
 lrmodule/mcmc.py                          | 14 +++++++++++++-
 tests/mcmc_matlab_comparison/test_mcmc.py |  7 ++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/lrmodule/mcmc.py b/lrmodule/mcmc.py
index 48db446..6e501f8 100644
--- a/lrmodule/mcmc.py
+++ b/lrmodule/mcmc.py
@@ -3,6 +3,7 @@
 import arviz as az
 import numpy as np
 import pymc as pm
+from lir.bounding import LLRBounder
 from lir.data.models import FeatureData, LLRData
 from lir.transform import Transformer
 from scipy.stats import betabinom, binom, norm
@@ -17,12 +18,13 @@ class McmcLLRModel(Transformer):
     determined.
     """
 
-    def __init__(
+    def __init__(  # noqa: PLR0913
         self,
         distribution_h1: str,
         parameters_h1: dict[str, dict[str, int]] | None,
         distribution_h2: str,
         parameters_h2: dict[str, dict[str, int]] | None,
+        bounder: LLRBounder | None,
         interval: tuple[float, float] = (0.05, 0.95),
         **mcmc_kwargs,
     ):
@@ -33,17 +35,25 @@ def __init__(
         :param parameters_h1: definition of the parameters of distribution_h1, and their prior distributions
         :param distribution_h2: statistical distribution used to model H2, for example 'normal' or 'binomial'
         :param parameters_h2: definition of the parameters of distribution_h2, and their prior distributions
+        :param bounder: bounding method to apply to the unbound llrs, to prevent overextrapolation
         :param interval: lower and upper bounds of the credible interval in range 0..1; default: (0.05, 0.95)
         :param mcmc_kwargs: mcmc simulation settings, see `McmcModel.__init__` for more details.
         """
         self.model_h1 = McmcModel(distribution_h1, parameters_h1, **mcmc_kwargs)
         self.model_h2 = McmcModel(distribution_h2, parameters_h2, **mcmc_kwargs)
+        self.bounder = bounder
         self.interval = interval
 
     def fit(self, instances: FeatureData) -> Self:
         """Fit the defined model to the supplied instances."""
         self.model_h1.fit(instances.features[instances.labels == 1])
         self.model_h2.fit(instances.features[instances.labels == 0])
+        if self.bounder is not None:
+            # determine the bounds based on the LLRs of the training data
+            logp_h1 = self.model_h1.transform(instances.features)
+            logp_h2 = self.model_h2.transform(instances.features)
+            llrs = logp_h1 - logp_h2
+            self.bounder = self.bounder.fit(llrs, instances.labels)
         return self
 
     def transform(self, instances: FeatureData) -> LLRData:
@@ -51,6 +61,8 @@ def transform(self, instances: FeatureData) -> LLRData:
         logp_h1 = self.model_h1.transform(instances.features)
         logp_h2 = self.model_h2.transform(instances.features)
         llrs = logp_h1 - logp_h2
+        if self.bounder is not None:
+            llrs = self.bounder.transform(llrs)
         quantiles = np.quantile(llrs, [0.5] + list(self.interval), axis=1, method="midpoint")
         return instances.replace_as(LLRData, features=quantiles.transpose(1, 0))
 
diff --git a/tests/mcmc_matlab_comparison/test_mcmc.py b/tests/mcmc_matlab_comparison/test_mcmc.py
index f160da4..d063a25 100644
--- a/tests/mcmc_matlab_comparison/test_mcmc.py
+++ b/tests/mcmc_matlab_comparison/test_mcmc.py
@@ -88,7 +88,12 @@ def test_llr_dataset(dataset_name: str):
     labels = np.concatenate([np.ones(scores_km.shape[0]), np.zeros(scores_knm.shape[0])])
 
     model = McmcLLRModel(
-        cfg.distribution_h1, cfg.parameters_h1, cfg.distribution_h2, cfg.parameters_h2, random_seed=cfg.random_seed
+        cfg.distribution_h1,
+        cfg.parameters_h1,
+        cfg.distribution_h2,
+        cfg.parameters_h2,
+        bounder=None,
+        random_seed=cfg.random_seed,
     )
     model.fit(FeatureData(features=features, labels=labels))
     llrs = model.transform(FeatureData(features=scores_eval))

From 303cc34a130dcb50f74242ce0e4794c7b7c34de6 Mon Sep 17 00:00:00 2001
From: Leen van der Ham <l.van.der.ham@nfi.nl>
Date: Wed, 26 Nov 2025 15:18:40 +0100
Subject: [PATCH 2/7] Store bounds, instead of a bounders with many bounds.

---
 lrmodule/mcmc.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/lrmodule/mcmc.py b/lrmodule/mcmc.py
index 6e501f8..498d463 100644
--- a/lrmodule/mcmc.py
+++ b/lrmodule/mcmc.py
@@ -42,6 +42,8 @@ def __init__(  # noqa: PLR0913
         self.model_h1 = McmcModel(distribution_h1, parameters_h1, **mcmc_kwargs)
         self.model_h2 = McmcModel(distribution_h2, parameters_h2, **mcmc_kwargs)
         self.bounder = bounder
+        self.lower_llr_bounds = None
+        self.upper_llr_bounds = None
         self.interval = interval
 
     def fit(self, instances: FeatureData) -> Self:
@@ -49,11 +51,17 @@ def fit(self, instances: FeatureData) -> Self:
         self.model_h1.fit(instances.features[instances.labels == 1])
         self.model_h2.fit(instances.features[instances.labels == 0])
         if self.bounder is not None:
-            # determine the bounds based on the LLRs of the training data
+            # determine the bounds based on the LLRs of the training data, each sample results into an LR-system
             logp_h1 = self.model_h1.transform(instances.features)
             logp_h2 = self.model_h2.transform(instances.features)
             llrs = logp_h1 - logp_h2
-            self.bounder = self.bounder.fit(llrs, instances.labels)
+            # determine the bounds for each LR-system individually
+            self.lower_llr_bounds = np.empty(llrs.shape[1])
+            self.upper_llr_bounds = np.empty(llrs.shape[1])
+            for i_system in range(llrs.shape[1]):
+                bounder = self.bounder.fit(llrs[:, i_system], instances.labels)
+                self.lower_llr_bounds[i_system] = bounder.lower_llr_bound
+                self.upper_llr_bounds[i_system] = bounder.upper_llr_bound
         return self
 
     def transform(self, instances: FeatureData) -> LLRData:
@@ -62,7 +70,9 @@ def transform(self, instances: FeatureData) -> LLRData:
         logp_h2 = self.model_h2.transform(instances.features)
         llrs = logp_h1 - logp_h2
         if self.bounder is not None:
-            llrs = self.bounder.transform(llrs)
+            # apply the bounds to all the LR-systems in one go
+            llrs = np.where(self.lower_llr_bounds < llrs, llrs, self.lower_llr_bounds)
+            llrs = np.where(self.upper_llr_bounds > llrs, llrs, self.upper_llr_bounds)
         quantiles = np.quantile(llrs, [0.5] + list(self.interval), axis=1, method="midpoint")
         return instances.replace_as(LLRData, features=quantiles.transpose(1, 0))
 

From af12190b089c078299755b4b982d0fa4f39f3ca7 Mon Sep 17 00:00:00 2001
From: Leen van der Ham <l.van.der.ham@nfi.nl>
Date: Wed, 26 Nov 2025 15:28:10 +0100
Subject: [PATCH 3/7] Fix invalid-argument-type error

---
 lrmodule/mcmc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lrmodule/mcmc.py b/lrmodule/mcmc.py
index 498d463..29ca1d7 100644
--- a/lrmodule/mcmc.py
+++ b/lrmodule/mcmc.py
@@ -48,6 +48,8 @@ def __init__(  # noqa: PLR0913
 
     def fit(self, instances: FeatureData) -> Self:
         """Fit the defined model to the supplied instances."""
+        if instances.labels is None:
+            raise ValueError("Labels are required to fit this model.")
         self.model_h1.fit(instances.features[instances.labels == 1])
         self.model_h2.fit(instances.features[instances.labels == 0])
         if self.bounder is not None:

From f47fda4371c8024ace882566ee71a8cc85386ae6 Mon Sep 17 00:00:00 2001
From: Leen van der Ham <l.van.der.ham@nfi.nl>
Date: Thu, 27 Nov 2025 14:34:44 +0100
Subject: [PATCH 4/7] Added details on noqa: code

---
 lrmodule/mcmc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lrmodule/mcmc.py b/lrmodule/mcmc.py
index 29ca1d7..424c185 100644
--- a/lrmodule/mcmc.py
+++ b/lrmodule/mcmc.py
@@ -18,7 +18,7 @@ class McmcLLRModel(Transformer):
     determined.
     """
 
-    def __init__(  # noqa: PLR0913
+    def __init__(  # noqa: PLR0913 (more than five arguments are allowed)
         self,
         distribution_h1: str,
         parameters_h1: dict[str, dict[str, int]] | None,
@@ -80,7 +80,7 @@ def transform(self, instances: FeatureData) -> LLRData:
 
 
 class McmcModel:
-    def __init__(  # noqa: PLR0913
+    def __init__(  # noqa: PLR0913 (more than five arguments are allowed)
         self,
         distribution: str,
         parameters: dict[str, dict[str, int]] | None,

From b75d7b075c73da738bd15cb14bd0cf576ed3ac3e Mon Sep 17 00:00:00 2001
From: Leen van der Ham <l.van.der.ham@nfi.nl>
Date: Thu, 27 Nov 2025 16:11:03 +0100
Subject: [PATCH 5/7] Store the bounders, instead of the bounds; transform the
 systems one by one, instead of all at once.

---
 lrmodule/mcmc.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/lrmodule/mcmc.py b/lrmodule/mcmc.py
index 424c185..6d9fd45 100644
--- a/lrmodule/mcmc.py
+++ b/lrmodule/mcmc.py
@@ -24,7 +24,7 @@ def __init__(  # noqa: PLR0913 (more than five arguments are allowed)
         parameters_h1: dict[str, dict[str, int]] | None,
         distribution_h2: str,
         parameters_h2: dict[str, dict[str, int]] | None,
-        bounder: LLRBounder | None,
+        bounding: LLRBounder | None,
         interval: tuple[float, float] = (0.05, 0.95),
         **mcmc_kwargs,
     ):
@@ -41,9 +41,8 @@ def __init__(  # noqa: PLR0913 (more than five arguments are allowed)
         """
         self.model_h1 = McmcModel(distribution_h1, parameters_h1, **mcmc_kwargs)
         self.model_h2 = McmcModel(distribution_h2, parameters_h2, **mcmc_kwargs)
-        self.bounder = bounder
-        self.lower_llr_bounds = None
-        self.upper_llr_bounds = None
+        self.bounding = bounding
+        self.bounders = None
         self.interval = interval
 
     def fit(self, instances: FeatureData) -> Self:
@@ -52,18 +51,15 @@ def fit(self, instances: FeatureData) -> Self:
             raise ValueError("Labels are required to fit this model.")
         self.model_h1.fit(instances.features[instances.labels == 1])
         self.model_h2.fit(instances.features[instances.labels == 0])
-        if self.bounder is not None:
+        if self.bounding is not None:
             # determine the bounds based on the LLRs of the training data, each sample results into an LR-system
             logp_h1 = self.model_h1.transform(instances.features)
             logp_h2 = self.model_h2.transform(instances.features)
             llrs = logp_h1 - logp_h2
             # determine the bounds for each LR-system individually
-            self.lower_llr_bounds = np.empty(llrs.shape[1])
-            self.upper_llr_bounds = np.empty(llrs.shape[1])
+            self.bounders = [self.bounding.__class__() for _ in range(llrs.shape[1])]
             for i_system in range(llrs.shape[1]):
-                bounder = self.bounder.fit(llrs[:, i_system], instances.labels)
-                self.lower_llr_bounds[i_system] = bounder.lower_llr_bound
-                self.upper_llr_bounds[i_system] = bounder.upper_llr_bound
+                self.bounders[i_system] = self.bounders[i_system].fit(llrs[:, i_system], instances.labels)
         return self
 
     def transform(self, instances: FeatureData) -> LLRData:
@@ -71,10 +67,10 @@ def transform(self, instances: FeatureData) -> LLRData:
         logp_h1 = self.model_h1.transform(instances.features)
         logp_h2 = self.model_h2.transform(instances.features)
         llrs = logp_h1 - logp_h2
-        if self.bounder is not None:
-            # apply the bounds to all the LR-systems in one go
-            llrs = np.where(self.lower_llr_bounds < llrs, llrs, self.lower_llr_bounds)
-            llrs = np.where(self.upper_llr_bounds > llrs, llrs, self.upper_llr_bounds)
+        if self.bounding is not None:
+            # apply the bounders one by one
+            for i_system in range(llrs.shape[1]):
+                llrs[:, i_system] = self.bounders[i_system].transform(llrs[:, i_system])
         quantiles = np.quantile(llrs, [0.5] + list(self.interval), axis=1, method="midpoint")
         return instances.replace_as(LLRData, features=quantiles.transpose(1, 0))
 

From f42f66d88ee8a033f91abf21924409aae84681fd Mon Sep 17 00:00:00 2001
From: Leen van der Ham <l.van.der.ham@nfi.nl>
Date: Mon, 1 Dec 2025 11:30:40 +0100
Subject: [PATCH 6/7] Small fixes; non-subscriptable error, test input
 argument.

---
 lrmodule/mcmc.py                          | 2 +-
 tests/mcmc_matlab_comparison/test_mcmc.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lrmodule/mcmc.py b/lrmodule/mcmc.py
index 6d9fd45..0aa08ce 100644
--- a/lrmodule/mcmc.py
+++ b/lrmodule/mcmc.py
@@ -67,7 +67,7 @@ def transform(self, instances: FeatureData) -> LLRData:
         logp_h1 = self.model_h1.transform(instances.features)
         logp_h2 = self.model_h2.transform(instances.features)
         llrs = logp_h1 - logp_h2
-        if self.bounding is not None:
+        if (self.bounding is not None) and (self.bounders is not None):
             # apply the bounders one by one
             for i_system in range(llrs.shape[1]):
                 llrs[:, i_system] = self.bounders[i_system].transform(llrs[:, i_system])
diff --git a/tests/mcmc_matlab_comparison/test_mcmc.py b/tests/mcmc_matlab_comparison/test_mcmc.py
index d063a25..83fd51e 100644
--- a/tests/mcmc_matlab_comparison/test_mcmc.py
+++ b/tests/mcmc_matlab_comparison/test_mcmc.py
@@ -92,7 +92,7 @@ def test_llr_dataset(dataset_name: str):
         cfg.parameters_h1,
         cfg.distribution_h2,
         cfg.parameters_h2,
-        bounder=None,
+        bounding=None,
         random_seed=cfg.random_seed,
     )
     model.fit(FeatureData(features=features, labels=labels))

From 537be38c02ee7f88c8d42653c72d614692b82465 Mon Sep 17 00:00:00 2001
From: Leen van der Ham <l.van.der.ham@nfi.nl>
Date: Mon, 1 Dec 2025 13:10:46 +0100
Subject: [PATCH 7/7] Use ELUB as default bounding method.

---
 lrmodule/mcmc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lrmodule/mcmc.py b/lrmodule/mcmc.py
index 0aa08ce..7a6b4c5 100644
--- a/lrmodule/mcmc.py
+++ b/lrmodule/mcmc.py
@@ -3,6 +3,7 @@
 import arviz as az
 import numpy as np
 import pymc as pm
+from lir.algorithms.bayeserror import ELUBBounder
 from lir.bounding import LLRBounder
 from lir.data.models import FeatureData, LLRData
 from lir.transform import Transformer
@@ -24,7 +25,7 @@ def __init__(  # noqa: PLR0913 (more than five arguments are allowed)
         parameters_h1: dict[str, dict[str, int]] | None,
         distribution_h2: str,
         parameters_h2: dict[str, dict[str, int]] | None,
-        bounding: LLRBounder | None,
+        bounding: LLRBounder | None = ELUBBounder(),
         interval: tuple[float, float] = (0.05, 0.95),
         **mcmc_kwargs,
     ):