sbi-benchmark · LoryPack · Jan 30, 2021 · Mar 5, 2021 · Mar 5, 2021 · Mar 5, 2021
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ sbi-logs/
 # Editors
 .tags
 .vscode
+.idea
 
 # macOS
 *.DS_Store

diff --git a/sbibm/algorithms/abcpy/__init__.py b/sbibm/algorithms/abcpy/__init__.py
diff --git a/sbibm/algorithms/abcpy/abcpy_utils.py b/sbibm/algorithms/abcpy/abcpy_utils.py
@@ -0,0 +1,146 @@
+from typing import Callable
+
+import numpy as np
+import torch
+from abcpy.continuousmodels import Continuous, InputConnector
+from abcpy.distances import Euclidean, LogReg, PenLogReg
+from abcpy.output import Journal
+from abcpy.probabilisticmodels import ProbabilisticModel
+from abcpy.statistics import Statistics
+
+
+class ABCpyPrior(ProbabilisticModel, Continuous):
+    def __init__(self, task, name="ABCpy_prior"):
+        self.prior_forward = task.get_prior()
+        self.dim_parameters = task.dim_parameters
+        self.name = task.name if task.name is not None else name
+
+        input_parameters = InputConnector.from_list([])
+        super(ABCpyPrior, self).__init__(input_parameters, self.name + "_prior")
+
+    def forward_simulate(
+        self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()
+    ):
+        result = np.array(self.prior_forward(num_forward_simulations))
+        return [
+            np.array([x]).reshape(
+                -1,
+            )
+            for x in result
+        ]
+
+    def get_output_dimension(self):
+        return self.dim_parameters
+
+    def _check_input(self, input_values):
+        return True
+
+    def _check_output(self, values):
+        return True
+
+
+class ABCpySimulator(ProbabilisticModel, Continuous):
+    def __init__(self, parameters, task, max_calls, name="ABCpy_simulator"):
+        self.simulator = task.get_simulator(max_calls=max_calls)
+        self.output_dim = task.dim_data
+        self.name = task.name if task.name is not None else name
+
+        input_parameters = InputConnector.from_list(parameters)
+        super(ABCpySimulator, self).__init__(input_parameters, self.name)
+
+    def forward_simulate(
+        self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()
+    ):
+        tensor_param = torch.tensor(abcpy_input_values)
+        tensor_res = [
+            self.simulator(tensor_param) for k in range(num_forward_simulations)
+        ]
+        # print(tensor_res)
+        # print([np.array(x).reshape(-1, ) for x in tensor_res])
+        return [
+            np.array(x).reshape(
+                -1,
+            )
+            for x in tensor_res
+        ]
+
+    def get_output_dimension(self):
+        return self.output_dim
+
+    def _check_input(self, input_values):
+        return True
+
+    def _check_output(self, values):
+        return True
+
+
+def get_distance(distance: str, statistics: Statistics) -> Callable:
+    """Return distance function for ABCpy."""
+
+    if distance == "l2":
+        distance_calc = Euclidean(statistics)
+
+    elif distance == "log_reg":
+        distance_calc = LogReg(statistics)
+
+    elif distance == "pen_log_reg":
+        distance_calc = PenLogReg(statistics)
+
+    elif distance == "Wasserstein":
+        raise NotImplementedError(
+            "Wasserstein distace not yet implemented as we are considering only one single "
+            "simulation for parameter value"
+        )
+
+    else:
+        raise NotImplementedError(f"Distance '{distance}' not implemented.")
+
+    return distance_calc
+
+
+def journal_cleanup_rejABC(journal, percentile=None, threshold=None):
+    """This function takes a Journal file (typically produced by an Rejection ABC run with very large epsilon value)
+    and the keeps only the samples which achieve performance less than either some percentile of the achieved distances,
+     or either some specified threshold. It is
+    a very simple way to obtain a Rejection ABC which works on a percentile of the obtained distances."""
+
+    if (threshold is None) == (percentile is None):
+        raise RuntimeError(
+            "Exactly one of percentile or epsilon needs to be specified."
+        )
+
+    if percentile is not None:
+        distance_cutoff = np.percentile(journal.distances[-1], percentile)
+    else:
+        distance_cutoff = threshold
+    picked_simulations = journal.distances[-1] < distance_cutoff
+    new_distances = journal.distances[-1][picked_simulations]
+    if len(picked_simulations) == 0:
+        raise RuntimeError(
+            "The specified value of threshold is too low, no simulations are selected."
+        )
+    n_reduced_samples = np.sum(picked_simulations)
+    new_journal = Journal(journal._type)
+    new_journal.configuration["n_samples"] = n_reduced_samples
+    new_journal.configuration["n_samples_per_param"] = journal.configuration[
+        "n_samples_per_param"
+    ]
+    new_journal.configuration["epsilon"] = distance_cutoff
+
+    new_accepted_parameters = []
+    param_names = journal.get_parameters().keys()
+    new_names_and_parameters = {name: [] for name in param_names}
+    for i in np.where(picked_simulations)[0]:
+        if picked_simulations[i]:
+            new_accepted_parameters.append(journal.get_accepted_parameters()[i])
+            for name in param_names:
+                new_names_and_parameters[name].append(journal.get_parameters()[name][i])
+
+    new_journal.add_accepted_parameters(new_accepted_parameters)
+    new_journal.add_weights(np.ones((n_reduced_samples, 1)))
+    new_journal.add_ESS_estimate(np.ones((n_reduced_samples, 1)))
+    new_journal.add_distances(new_distances)
+    new_journal.add_user_parameters(new_names_and_parameters)
+    new_journal.number_of_simulations.append(journal.number_of_simulations[-1])
+
+    return new_journal
diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -0,0 +1,166 @@
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+from abcpy.backends import BackendDummy
+from abcpy.inferences import RejectionABC
+from abcpy.statistics import Identity
+
+import sbibm
+from sbibm.algorithms.abcpy.abcpy_utils import (
+    ABCpyPrior,
+    ABCpySimulator,
+    get_distance,
+    journal_cleanup_rejABC,
+)
+from sbibm.tasks.task import Task
+from sbibm.utils.io import save_tensor_to_csv
+from sbibm.utils.kde import get_kde
+from sbibm.utils.torch import sample
+
+
+def run(
+    task: Task,
+    num_samples: int,
+    num_simulations: int,
+    num_simulations_per_param: Optional[int] = 1,
+    num_observation: Optional[int] = None,
+    observation: Optional[torch.Tensor] = None,
+    num_top_samples: Optional[int] = 100,
+    quantile: Optional[float] = None,
+    eps: Optional[float] = None,
+    distance: str = "l2",
+    save_distances: bool = False,
+    sass: bool = False,
+    sass_fraction: float = 0.5,
+    sass_feature_expansion_degree: int = 3,
+    kde_bandwidth: Optional[str] = None,
+) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
+    """Runs REJ-ABC from `ABCpy`
+
+    ABCpy does not implement LRA post processing. SASS is supported in ABCpy but not yet implemented here.
+
+    Choose one of `num_top_samples`, `quantile`, `eps`.
+
+    Args:
+        task: Task instance
+        num_samples: Number of samples to generate from posterior
+        num_simulations: Simulation budget
+        num_simulations_per_param: The number of the total simulations budget to produce for each parameter value;
+            used to better estimate the ABC distance from the observation for each parameter value. Useful to
+            investigate whether splitting the budget to do multi-simulations per parameter is helpful or not.
+        num_observation: Observation number to load, alternative to `observation`
+        observation: Observation, alternative to `num_observation`
+        num_top_samples: If given, will use `top=True` with num_top_samples
+        quantile: Quantile to use
+        eps: Epsilon threshold to use
+        distance: Distance to use; can be "l2" (Euclidean Distance), "log_reg" (LogReg) or "pen_log_reg" (PenLogReg)
+        save_distances: If True, stores distances of samples to disk
+        sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012.
+            Not yet implemented, left for compatibility with other algorithms.
+        sass_fraction: Fraction of simulation budget to use for sass. Unused for now, left for compatibility
+        with other algorithms.
+        sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. Unused for now, only
+            left for compatibility with other algorithms.
+        kde_bandwidth: If not None, will resample using KDE when necessary, set
+            e.g. to "cv" for cross-validated bandwidth selection
+    Returns:
+        Samples from posterior, number of simulator calls, log probability of true params if computable
+    """
+    if sass:
+        raise NotImplementedError("SASS not yet implemented")
+
+    assert not (num_observation is None and observation is None)
+    assert not (num_observation is not None and observation is not None)
+
+    if (eps is not None) + (num_top_samples is not None) + (quantile is not None) != 1:
+        raise RuntimeError(
+            "Exactly one of `num_top_samples`, `quantile`, `eps` needs to be specified."
+        )
+
+    log = sbibm.get_logger(__name__)
+    log.info(f"Running REJ-ABC")
+
+    if observation is None:
+        observation = task.get_observation(num_observation)
+
+    if num_top_samples is not None:
+        if sass:
+            quantile = num_top_samples / (
+                num_simulations - int(sass_fraction * num_simulations)
+            )
+        else:
+            quantile = num_top_samples / num_simulations
+
+    # use the dummy backend only (for now)
+    backend = BackendDummy()
+
+    # as Rejection ABC works by directly drawing until you get below some threshold -> here we set the threshold to
+    # very large value and then we select the best objects later.
+
+    statistics = Identity(
+        degree=1, cross=True
+    )  # we assume statistics are already computed inside the task
+    distance_calc = get_distance(distance, statistics)
+
+    # define prior and model from the task simulator:
+    parameters = ABCpyPrior(task)
+    model = ABCpySimulator([parameters], task, max_calls=num_simulations)
+
+    # inference
+    sampler = RejectionABC(
+        root_models=[model], distances=[distance_calc], backend=backend
+    )
+    journal_standard_ABC = sampler.sample(
+        [[np.array(observation)]],
+        n_samples=num_simulations // num_simulations_per_param,
+        n_samples_per_param=num_simulations_per_param,
+        epsilon=10 ** 50,
+    )  # set epsilon to very large number
+    if (
+        eps is None
+    ):  # then we use quantile to select the posterior samples from the generated ones
+        journal_standard_ABC_reduced = journal_cleanup_rejABC(
+            journal_standard_ABC, percentile=quantile * 100
+        )
+    else:
+        journal_standard_ABC_reduced = journal_cleanup_rejABC(
+            journal_standard_ABC, threshold=eps
+        )
+
+    actual_n_simulations = (
+        journal_standard_ABC.number_of_simulations[-1]
+        * journal_standard_ABC.configuration["n_samples_per_param"]
+    )
+    # this takes into account the fact that num_simulations can be not divisible by num_simulations_per_param
+    expected_n_simulations = (
+        num_simulations // num_simulations_per_param
+    ) * num_simulations_per_param
+    assert actual_n_simulations == expected_n_simulations
+
+    if save_distances:
+        distances = torch.tensor(journal_standard_ABC_reduced.distances[-1])
+        save_tensor_to_csv("distances.csv", distances)
+
+    samples = torch.tensor(
+        journal_standard_ABC_reduced.get_accepted_parameters()
+    ).squeeze()  # that should work
+
+    if kde_bandwidth is not None:
+        # fit KDE on data to obtain samples
+        log.info(
+            f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}"
+        )
+        kde = get_kde(
+            samples,
+            bandwidth=kde_bandwidth,
+        )
+        samples = kde.sample(num_samples)
+    else:
+        # otherwise sample with replacement until you get the num_samples you want
+        log.info(f"Sampling {num_samples} samples from trace")
+        samples = sample(samples, num_samples=num_samples, replace=True)
+
+    # The latter is None as we cannot compute posterior probabilities with ABCpy (it would be the posterior probability
+    # of the true parameter space in case that was available)
+    return samples, actual_n_simulations, None
diff --git a/tests/algorithms/test_abcpy.py b/tests/algorithms/test_abcpy.py
@@ -0,0 +1,60 @@
+import numpy as np
+import pytest
+import torch
+
+import sbibm
+from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc
+
+
+@pytest.mark.parametrize(
+    "task_name",
+    [(task_name) for task_name in ["gaussian_linear_uniform"]],
+)
+def test_abcpy_rejection(
+    task_name,
+    num_observation=1,
+    num_samples=10_000,
+    num_simulations=1_000,
+):
+    # set random seeds:
+    torch.manual_seed(1)
+    np.random.seed(1)
+
+    task = sbibm.get_task(task_name)
+
+    posterior_samples, actual_num_simulations, _ = rej_abc(
+        task=task,
+        num_samples=num_samples,
+        num_observation=num_observation,
+        num_simulations=num_simulations,
+        quantile=0.1,
+        num_top_samples=None,
+        kde_bandwidth="cv",
+    )
+    assert actual_num_simulations == num_simulations
+    assert posterior_samples.shape[0] == num_samples
+
+    posterior_samples, actual_num_simulations, _ = rej_abc(
+        task=task,
+        num_samples=num_samples,
+        num_observation=num_observation,
+        num_simulations=num_simulations,
+        num_top_samples=500,
+        kde_bandwidth="cv",
+    )
+    assert actual_num_simulations == num_simulations
+    assert posterior_samples.shape[0] == num_samples
+
+    # test multiple simulations per parameter
+    posterior_samples, actual_num_simulations, _ = rej_abc(
+        task=task,
+        num_samples=num_samples,
+        num_observation=num_observation,
+        num_simulations_per_param=2,
+        num_simulations=num_simulations,
+        eps=6,
+        num_top_samples=None,
+        kde_bandwidth="cv",
+    )
+    assert actual_num_simulations == num_simulations
+    assert posterior_samples.shape[0] == num_samples
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,6 +13,7 @@ sbi-logs/ @@
     # Editors
     .tags
     .vscode
+    .idea
     # macOS
     *.DS_Store
@@ Expand Down @@