From eb7f8fb435de35e89ce286a93162272277ef66f0 Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Sat, 30 Jan 2021 18:04:33 +0100
Subject: [PATCH 1/9] First attempt at wrapping ABCpy

---
 .gitignore                              |   1 +
 sbibm/algorithms/abcpy/__init__.py      |   0
 sbibm/algorithms/abcpy/abcpy_utils.py   | 115 ++++++++++++++++++++++
 sbibm/algorithms/abcpy/rejection_abc.py | 124 ++++++++++++++++++++++++
 try_ABCpy.py                            |  32 ++++++
 5 files changed, 272 insertions(+)
 create mode 100644 sbibm/algorithms/abcpy/__init__.py
 create mode 100644 sbibm/algorithms/abcpy/abcpy_utils.py
 create mode 100644 sbibm/algorithms/abcpy/rejection_abc.py
 create mode 100644 try_ABCpy.py

diff --git a/.gitignore b/.gitignore
index f26de6aa..537a9828 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ sbi-logs/
 # Editors
 .tags
 .vscode
+.idea
 
 # macOS
 *.DS_Store
diff --git a/sbibm/algorithms/abcpy/__init__.py b/sbibm/algorithms/abcpy/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/sbibm/algorithms/abcpy/abcpy_utils.py b/sbibm/algorithms/abcpy/abcpy_utils.py
new file mode 100644
index 00000000..88bc86d3
--- /dev/null
+++ b/sbibm/algorithms/abcpy/abcpy_utils.py
@@ -0,0 +1,115 @@
+from typing import Callable
+import torch
+import numpy as np
+from abcpy.continuousmodels import Continuous, InputConnector
+from abcpy.distances import Euclidean, LogReg, PenLogReg
+from abcpy.output import Journal
+from abcpy.probabilisticmodels import ProbabilisticModel
+from abcpy.statistics import Statistics
+
+
+class ABCpyPrior(ProbabilisticModel, Continuous):
+    def __init__(self, task, name='ABCpy_prior'):
+        self.prior_forward = task.get_prior()
+        self.dim_parameters = task.dim_parameters
+        self.name = task.name if task.name is not None else name
+
+        input_parameters = InputConnector.from_list([])
+        super(ABCpyPrior, self).__init__(input_parameters, self.name + "_prior")
+
+    def forward_simulate(self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()):
+        result = np.array(self.prior_forward(num_forward_simulations))
+        # print(num_forward_simulations, result.shape)
+        return [np.array([x]).reshape(-1, ) for x in result]
+
+    def get_output_dimension(self):
+        return self.dim_parameters
+
+    def _check_input(self, input_values):
+        return True
+
+    def _check_output(self, values):
+        return True
+
+
+class ABCpySimulator(ProbabilisticModel, Continuous):
+    def __init__(self, parameters, task, num_simulations, name='ABCpy_simulator'):
+        self.simulator = task.get_simulator(max_calls=num_simulations)
+        self.output_dim = task.dim_data
+        self.name = task.name if task.name is not None else name
+
+        input_parameters = InputConnector.from_list(parameters)
+        super(ABCpySimulator, self).__init__(input_parameters, self.name)
+
+    def forward_simulate(self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()):
+        tensor_param = torch.tensor(abcpy_input_values)
+        tensor_res = [self.simulator(tensor_param) for k in range(num_forward_simulations)]
+        # print(tensor_res)
+        # print([np.array(x).reshape(-1, ) for x in tensor_res])
+        return [np.array(x).reshape(-1, ) for x in tensor_res]
+
+    def get_output_dimension(self):
+        return self.output_dim
+
+    def _check_input(self, input_values):
+        return True
+
+    def _check_output(self, values):
+        return True
+
+
+def get_distance(distance: str, statistics: Statistics) -> Callable:
+    """Return distance function for ABCpy."""
+
+    if distance == "l2":
+        distance_calc = Euclidean(statistics)
+
+    elif distance == "log_reg":
+        distance_calc = LogReg(statistics)
+
+    elif distance == "pen_log_reg":
+        distance_calc = PenLogReg(statistics)
+
+    elif distance == "Wasserstein":
+        raise NotImplementedError("Wasserstein distace not yet implemented as we are considering only one single "
+                                  "simulation for parameter value")
+
+    else:
+        raise NotImplementedError(f"Distance '{distance}' not implemented.")
+
+    return distance_calc
+
+
+def journal_cleanup_rejABC(journal, percentile):
+    """This function takes a Journal file (typically produced by an Rejection ABC run with very large epsilon value)
+    and the keeps only the samples which achieve performance less than some percentile of the achieved distances. It is
+    a very simple way to obtain a Rejection ABC which works on a percentile of the obtained distances. """
+
+    distance_cutoff = np.percentile(journal.distances[-1], percentile)
+    picked_simulations = journal.distances[-1] < distance_cutoff
+    new_distances = journal.distances[-1][picked_simulations]
+
+    new_journal = Journal(journal._type)
+    new_journal.configuration["n_samples"] = journal.configuration["n_samples"]
+    new_journal.configuration["n_samples_per_param"] = journal.configuration["n_samples_per_param"]
+    new_journal.configuration["epsilon"] = journal.configuration["epsilon"]
+
+    n_reduced_samples = np.sum(picked_simulations)
+
+    new_accepted_parameters = []
+    param_names = journal.get_parameters().keys()
+    new_names_and_parameters = {name: [] for name in param_names}
+    for i in range(len(picked_simulations)):
+        if picked_simulations[i]:
+            new_accepted_parameters.append(journal.get_accepted_parameters()[i])
+            for name in param_names:
+                new_names_and_parameters[name].append(journal.get_parameters()[name][i])
+
+    new_journal.add_accepted_parameters(new_accepted_parameters)
+    new_journal.add_weights(np.ones((n_reduced_samples, 1)))
+    new_journal.add_ESS_estimate(np.ones((n_reduced_samples, 1)))
+    new_journal.add_distances(new_distances)
+    new_journal.add_user_parameters(new_names_and_parameters)
+    new_journal.number_of_simulations.append(journal.number_of_simulations[-1])
+
+    return new_journal
diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
new file mode 100644
index 00000000..e10818ca
--- /dev/null
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -0,0 +1,124 @@
+from typing import Optional, Tuple
+import numpy as np
+import torch
+from abcpy.backends import BackendDummy
+from abcpy.inferences import RejectionABC
+from abcpy.statistics import Identity
+
+import sbibm
+from sbibm.algorithms.abcpy.abcpy_utils import ABCpyPrior, get_distance, ABCpySimulator, journal_cleanup_rejABC
+from sbibm.tasks.task import Task
+from sbibm.utils.io import save_tensor_to_csv
+
+
+def run(
+        task: Task,
+        num_samples: int,
+        num_simulations: int,
+        num_observation: Optional[int] = None,
+        observation: Optional[torch.Tensor] = None,
+        num_top_samples: Optional[int] = 100,
+        quantile: Optional[float] = None,
+        eps: Optional[float] = None,
+        distance: str = "l2",
+        save_distances: bool = False,
+        sass: bool = False,
+        sass_fraction: float = 0.5,
+        sass_feature_expansion_degree: int = 3,
+) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
+    """Runs REJ-ABC from `ABCpy`
+
+    For now, we do not include generating more than one simulation for parameter value; can do that in the future.
+
+    Moreover, we are not using a KDE on the ABC posterior samples in order to obtain a density estimate, as done in the
+    pyABC wrap; for this reason we are assuming here that num_samples is the same as num_simulations.
+
+    I believe however we are not returning the expected number of samples, as that would be num_samples, but instead we
+    take the given quantile of num_simulations.
+
+    We are also not doing SASS for now, and LRA after sampling.
+
+    Choose one of `num_top_samples`, `quantile`, `eps`. We are actually not using eps for now, as I don't know how to
+    combine that with the required simulation budget.
+
+    Args:
+        task: Task instance
+        num_samples: Number of samples to generate from posterior
+        num_simulations: Simulation budget
+        num_observation: Observation number to load, alternative to `observation`
+        observation: Observation, alternative to `num_observation`
+        num_top_samples: If given, will use `top=True` with num_top_samples
+        quantile: Quantile to use
+        eps: Epsilon threshold to use
+        distance: Distance to use; can be "l2" (Euclidean Distance), "log_reg" (LogReg) or "pen_log_reg" (PenLogReg)
+        save_distances: If True, stores distances of samples to disk
+        sass: If True, summary statistics are learned as in
+            Fearnhead & Prangle 2012.
+        sass_fraction: Fraction of simulation budget to use for sass.
+        sass_feature_expansion_degree: Degree of polynomial expansion of the summary
+            statistics.
+    Returns:
+        Samples from posterior, number of simulator calls, log probability of true params if computable
+    """
+    if sass:
+        raise NotImplementedError("SASS not yet implemented")
+    if eps is not None:
+        raise NotImplementedError
+
+    assert num_simulations == num_samples
+
+    assert not (num_observation is None and observation is None)
+    assert not (num_observation is not None and observation is not None)
+
+    assert not (num_top_samples is None and quantile is None and eps is None)
+
+    log = sbibm.get_logger(__name__)
+    log.info(f"Running REJ-ABC")
+
+    if observation is None:
+        observation = task.get_observation(num_observation)
+
+    if num_top_samples is not None and quantile is None:
+        if sass:
+            quantile = num_top_samples / (
+                    num_simulations - int(sass_fraction * num_simulations)
+            )
+        else:
+            quantile = num_top_samples / num_simulations
+
+    # use the dummy backend only (for now)
+    backend = BackendDummy()
+
+    # as Rejection ABC works by directly drawing until you get below some threshold -> here we set the threshold to
+    # very large value and then we select the best objects later.
+
+    statistics = Identity(degree=1, cross=True)  # we assume statistics are already computed inside the task
+    distance_calc = get_distance(distance, statistics)
+
+    # define prior and model from the task simulator:
+    parameters = ABCpyPrior(task)
+    model = ABCpySimulator([parameters], task, num_simulations=num_simulations)
+
+    # inference; not sure how to use random seed
+    sampler = RejectionABC(root_models=[model], distances=[distance_calc], backend=backend, seed=None)
+    # print("obs", [observation])
+    # print("obs", [np.array(observation)])
+    journal_standard_ABC = sampler.sample([[np.array(observation)]], n_samples=num_simulations,
+                                          n_samples_per_param=1, epsilon=10 ** 50)  # set epsilon to very large number
+    journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, percentile=quantile * 100)
+
+    assert journal_standard_ABC.number_of_simulations[-1] == num_simulations
+
+    if save_distances:
+        distances = torch.tensor(journal_standard_ABC_reduced.distances[-1])
+        save_tensor_to_csv("/home/lorenzo/Scrivania/OxWaSP/ABC-project/Code/sbibm/distances.csv", distances)
+
+    samples = torch.tensor(journal_standard_ABC_reduced.get_accepted_parameters()).squeeze()  # that should work
+
+    if num_observation is not None:
+        # true_parameters = task.get_true_parameters(num_observation=num_observation)
+        # log_prob_true_parameters = posterior.log_prob(true_parameters)
+        # we can't compute posterior probability with ABCpy
+        return samples, journal_standard_ABC.number_of_simulations[-1], None
+    else:
+        return samples, journal_standard_ABC.number_of_simulations[-1], None
diff --git a/try_ABCpy.py b/try_ABCpy.py
new file mode 100644
index 00000000..5fc82bd2
--- /dev/null
+++ b/try_ABCpy.py
@@ -0,0 +1,32 @@
+import sbibm
+task_name = "gaussian_linear"
+task_name = "two_moons"
+
+task = sbibm.get_task(task_name)  # See sbibm.get_available_tasks() for all tasks
+prior = task.get_prior()
+simulator = task.get_simulator()
+observation = task.get_observation(num_observation=1)  # 10 per task
+
+# Alternatively, we can import existing algorithms, e.g:
+from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc  # See help(rej_abc) for keywords
+num_samples = 10000
+posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, num_simulations=num_samples, quantile=0.03)
+# posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, num_simulations=num_samples, num_top_samples=100)
+
+print("Posterior samples shape", posterior_samples.shape)
+
+# Once we got samples from an approximate posterior, compare them to the reference:
+# from sbibm.metrics import c2st
+# reference_samples = task.get_reference_posterior_samples(num_observation=1)
+# c2st_accuracy = c2st(reference_samples, posterior_samples)
+
+# Visualise both posteriors:
+from sbibm.visualisation import fig_posterior
+fig = fig_posterior(task_name=task_name, observation=1, samples_tensor=posterior_samples, num_samples=100, prior=False)
+fig.show()
+# Note: Use fig.show() or fig.save() to show or save the figure
+
+# Get results from other algorithms for comparison:
+# from sbibm.visualisation import fig_metric
+# results_df = sbibm.get_results(dataset="main_paper.csv")
+# fig = fig_metric(results_df.query("task == 'two_moons'"), metric="C2ST")
\ No newline at end of file

From e8d59a75ad83ee8ddc0da621fa7e3af41fb6c528 Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Fri, 5 Mar 2021 16:46:57 +0100
Subject: [PATCH 2/9] Add KDE for generating the correct number of posterior
 samples from simulations

---
 sbibm/algorithms/abcpy/rejection_abc.py | 36 +++++++++++++++----------
 try_ABCpy.py                            | 12 ++++++---
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
index e10818ca..639ef92d 100644
--- a/sbibm/algorithms/abcpy/rejection_abc.py
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -9,6 +9,8 @@
 from sbibm.algorithms.abcpy.abcpy_utils import ABCpyPrior, get_distance, ABCpySimulator, journal_cleanup_rejABC
 from sbibm.tasks.task import Task
 from sbibm.utils.io import save_tensor_to_csv
+from sbibm.utils.kde import get_kde
+from sbibm.utils.torch import sample
 
 
 def run(
@@ -25,17 +27,12 @@ def run(
         sass: bool = False,
         sass_fraction: float = 0.5,
         sass_feature_expansion_degree: int = 3,
+        kde_bandwidth: Optional[str] = None,
 ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
     """Runs REJ-ABC from `ABCpy`
 
     For now, we do not include generating more than one simulation for parameter value; can do that in the future.
 
-    Moreover, we are not using a KDE on the ABC posterior samples in order to obtain a density estimate, as done in the
-    pyABC wrap; for this reason we are assuming here that num_samples is the same as num_simulations.
-
-    I believe however we are not returning the expected number of samples, as that would be num_samples, but instead we
-    take the given quantile of num_simulations.
-
     We are also not doing SASS for now, and LRA after sampling.
 
     Choose one of `num_top_samples`, `quantile`, `eps`. We are actually not using eps for now, as I don't know how to
@@ -57,6 +54,8 @@ def run(
         sass_fraction: Fraction of simulation budget to use for sass.
         sass_feature_expansion_degree: Degree of polynomial expansion of the summary
             statistics.
+        kde_bandwidth: If not None, will resample using KDE when necessary, set
+            e.g. to "cv" for cross-validated bandwidth selection
     Returns:
         Samples from posterior, number of simulator calls, log probability of true params if computable
     """
@@ -65,8 +64,6 @@ def run(
     if eps is not None:
         raise NotImplementedError
 
-    assert num_simulations == num_samples
-
     assert not (num_observation is None and observation is None)
     assert not (num_observation is not None and observation is not None)
 
@@ -115,10 +112,21 @@ def run(
 
     samples = torch.tensor(journal_standard_ABC_reduced.get_accepted_parameters()).squeeze()  # that should work
 
-    if num_observation is not None:
-        # true_parameters = task.get_true_parameters(num_observation=num_observation)
-        # log_prob_true_parameters = posterior.log_prob(true_parameters)
-        # we can't compute posterior probability with ABCpy
-        return samples, journal_standard_ABC.number_of_simulations[-1], None
+    if kde_bandwidth is not None:
+        # fit KDE on data to obtain samples
+        log.info(
+            f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}"
+        )
+        kde = get_kde(
+            samples,
+            bandwidth=kde_bandwidth,
+        )
+        samples = kde.sample(num_samples)
     else:
-        return samples, journal_standard_ABC.number_of_simulations[-1], None
+        # otherwise sample with replacement until you get the num_samples you want
+        log.info(f"Sampling {num_samples} samples from trace")
+        samples = sample(samples, num_samples=num_samples, replace=True)
+
+    # The latter is None as we cannot compute posterior probabilities with ABCpy (it would be the posterior probability
+    # of the true parameter space in case that was available)
+    return samples, journal_standard_ABC.number_of_simulations[-1], None
diff --git a/try_ABCpy.py b/try_ABCpy.py
index 5fc82bd2..f488890d 100644
--- a/try_ABCpy.py
+++ b/try_ABCpy.py
@@ -1,4 +1,5 @@
 import sbibm
+
 task_name = "gaussian_linear"
 task_name = "two_moons"
 
@@ -9,9 +10,13 @@
 
 # Alternatively, we can import existing algorithms, e.g:
 from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc  # See help(rej_abc) for keywords
+
+num_simulations = 10000
 num_samples = 10000
-posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, num_simulations=num_samples, quantile=0.03)
-# posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, num_simulations=num_samples, num_top_samples=100)
+posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1,
+                                  num_simulations=num_simulations, quantile=0.03, kde_bandwidth="cv")
+# posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1,
+#                                   num_simulations=num_simulations, num_top_samples=300, kde_bandwidth="cv")
 
 print("Posterior samples shape", posterior_samples.shape)
 
@@ -22,6 +27,7 @@
 
 # Visualise both posteriors:
 from sbibm.visualisation import fig_posterior
+
 fig = fig_posterior(task_name=task_name, observation=1, samples_tensor=posterior_samples, num_samples=100, prior=False)
 fig.show()
 # Note: Use fig.show() or fig.save() to show or save the figure
@@ -29,4 +35,4 @@
 # Get results from other algorithms for comparison:
 # from sbibm.visualisation import fig_metric
 # results_df = sbibm.get_results(dataset="main_paper.csv")
-# fig = fig_metric(results_df.query("task == 'two_moons'"), metric="C2ST")
\ No newline at end of file
+# fig = fig_metric(results_df.query("task == 'two_moons'"), metric="C2ST")

From f81edba1730dec4fcc86b3c8888c7761c68af73a Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Fri, 5 Mar 2021 16:58:01 +0100
Subject: [PATCH 3/9] Add option of choosing epsilon directly instead of
 quantile or num_top_samples

---
 sbibm/algorithms/abcpy/abcpy_utils.py   | 15 ++++++++++++---
 sbibm/algorithms/abcpy/rejection_abc.py | 15 ++++++++-------
 try_ABCpy.py                            |  6 ++++--
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/sbibm/algorithms/abcpy/abcpy_utils.py b/sbibm/algorithms/abcpy/abcpy_utils.py
index 88bc86d3..310628c8 100644
--- a/sbibm/algorithms/abcpy/abcpy_utils.py
+++ b/sbibm/algorithms/abcpy/abcpy_utils.py
@@ -80,14 +80,23 @@ def get_distance(distance: str, statistics: Statistics) -> Callable:
     return distance_calc
 
 
-def journal_cleanup_rejABC(journal, percentile):
+def journal_cleanup_rejABC(journal, percentile=None, threshold=None):
     """This function takes a Journal file (typically produced by an Rejection ABC run with very large epsilon value)
-    and the keeps only the samples which achieve performance less than some percentile of the achieved distances. It is
+    and the keeps only the samples which achieve performance less than either some percentile of the achieved distances,
+     or either some specified threshold. It is
     a very simple way to obtain a Rejection ABC which works on a percentile of the obtained distances. """
 
-    distance_cutoff = np.percentile(journal.distances[-1], percentile)
+    if (threshold is None) == (percentile is None):
+        raise RuntimeError("Exactly one of percentile or epsilon needs to be specified.")
+
+    if percentile is not None:
+        distance_cutoff = np.percentile(journal.distances[-1], percentile)
+    else:
+        distance_cutoff = threshold
     picked_simulations = journal.distances[-1] < distance_cutoff
     new_distances = journal.distances[-1][picked_simulations]
+    if len(picked_simulations) == 0:
+        raise RuntimeError("The specified value of threshold is too low, no simulations are selected.")
 
     new_journal = Journal(journal._type)
     new_journal.configuration["n_samples"] = journal.configuration["n_samples"]
diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
index 639ef92d..cda2799c 100644
--- a/sbibm/algorithms/abcpy/rejection_abc.py
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -35,8 +35,7 @@ def run(
 
     We are also not doing SASS for now, and LRA after sampling.
 
-    Choose one of `num_top_samples`, `quantile`, `eps`. We are actually not using eps for now, as I don't know how to
-    combine that with the required simulation budget.
+    Choose one of `num_top_samples`, `quantile`, `eps`.
 
     Args:
         task: Task instance
@@ -61,13 +60,12 @@ def run(
     """
     if sass:
         raise NotImplementedError("SASS not yet implemented")
-    if eps is not None:
-        raise NotImplementedError
 
     assert not (num_observation is None and observation is None)
     assert not (num_observation is not None and observation is not None)
 
-    assert not (num_top_samples is None and quantile is None and eps is None)
+    if (eps is not None) + (num_top_samples is not None) + (quantile is not None) != 1:
+        raise RuntimeError("Exactly one of `num_top_samples`, `quantile`, `eps` needs to be specified.")
 
     log = sbibm.get_logger(__name__)
     log.info(f"Running REJ-ABC")
@@ -75,7 +73,7 @@ def run(
     if observation is None:
         observation = task.get_observation(num_observation)
 
-    if num_top_samples is not None and quantile is None:
+    if num_top_samples is not None:
         if sass:
             quantile = num_top_samples / (
                     num_simulations - int(sass_fraction * num_simulations)
@@ -102,7 +100,10 @@ def run(
     # print("obs", [np.array(observation)])
     journal_standard_ABC = sampler.sample([[np.array(observation)]], n_samples=num_simulations,
                                           n_samples_per_param=1, epsilon=10 ** 50)  # set epsilon to very large number
-    journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, percentile=quantile * 100)
+    if eps is None:  # then we use quantile to select the posterior samples from the generated ones
+        journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, percentile=quantile * 100)
+    else:
+        journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, threshold=eps)
 
     assert journal_standard_ABC.number_of_simulations[-1] == num_simulations
 
diff --git a/try_ABCpy.py b/try_ABCpy.py
index f488890d..19a09357 100644
--- a/try_ABCpy.py
+++ b/try_ABCpy.py
@@ -13,10 +13,12 @@
 
 num_simulations = 10000
 num_samples = 10000
-posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1,
-                                  num_simulations=num_simulations, quantile=0.03, kde_bandwidth="cv")
+# posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, kde_bandwidth="cv",
+#                                   num_simulations=num_simulations, quantile=0.03, num_top_samples=None)
 # posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1,
 #                                   num_simulations=num_simulations, num_top_samples=300, kde_bandwidth="cv")
+posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, kde_bandwidth="cv",
+                                  num_simulations=num_simulations, eps=0.15, num_top_samples=None)
 
 print("Posterior samples shape", posterior_samples.shape)
 

From 9d5abef3a1f33b3952830ddb0a39fd977d6fee39 Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Fri, 5 Mar 2021 18:34:15 +0100
Subject: [PATCH 4/9] Add option of using num_samples_per_param > 1

---
 sbibm/algorithms/abcpy/abcpy_utils.py   |  4 ++--
 sbibm/algorithms/abcpy/rejection_abc.py | 22 +++++++++++++++-------
 try_ABCpy.py                            |  5 +++--
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/sbibm/algorithms/abcpy/abcpy_utils.py b/sbibm/algorithms/abcpy/abcpy_utils.py
index 310628c8..8bae90d9 100644
--- a/sbibm/algorithms/abcpy/abcpy_utils.py
+++ b/sbibm/algorithms/abcpy/abcpy_utils.py
@@ -33,8 +33,8 @@ def _check_output(self, values):
 
 
 class ABCpySimulator(ProbabilisticModel, Continuous):
-    def __init__(self, parameters, task, num_simulations, name='ABCpy_simulator'):
-        self.simulator = task.get_simulator(max_calls=num_simulations)
+    def __init__(self, parameters, task, max_calls, name='ABCpy_simulator'):
+        self.simulator = task.get_simulator(max_calls=max_calls)
         self.output_dim = task.dim_data
         self.name = task.name if task.name is not None else name
 
diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
index cda2799c..49975bb2 100644
--- a/sbibm/algorithms/abcpy/rejection_abc.py
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -17,6 +17,7 @@ def run(
         task: Task,
         num_samples: int,
         num_simulations: int,
+        num_simulations_per_param: Optional[int] = 1,
         num_observation: Optional[int] = None,
         observation: Optional[torch.Tensor] = None,
         num_top_samples: Optional[int] = 100,
@@ -31,9 +32,7 @@ def run(
 ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
     """Runs REJ-ABC from `ABCpy`
 
-    For now, we do not include generating more than one simulation for parameter value; can do that in the future.
-
-    We are also not doing SASS for now, and LRA after sampling.
+    We do not implement SASS for now, and LRA after sampling.
 
     Choose one of `num_top_samples`, `quantile`, `eps`.
 
@@ -41,6 +40,9 @@ def run(
         task: Task instance
         num_samples: Number of samples to generate from posterior
         num_simulations: Simulation budget
+        num_simulations_per_param: The number of the total simulations budget to produce for each parameter value;
+            used to better estimate the ABC distance from the observation for each parameter value. Useful to
+            investigate whether splitting the budget to do multi-simulations per parameter is helpful or not.
         num_observation: Observation number to load, alternative to `observation`
         observation: Observation, alternative to `num_observation`
         num_top_samples: If given, will use `top=True` with num_top_samples
@@ -92,20 +94,26 @@ def run(
 
     # define prior and model from the task simulator:
     parameters = ABCpyPrior(task)
-    model = ABCpySimulator([parameters], task, num_simulations=num_simulations)
+    model = ABCpySimulator([parameters], task, max_calls=num_simulations)
 
     # inference; not sure how to use random seed
     sampler = RejectionABC(root_models=[model], distances=[distance_calc], backend=backend, seed=None)
     # print("obs", [observation])
     # print("obs", [np.array(observation)])
-    journal_standard_ABC = sampler.sample([[np.array(observation)]], n_samples=num_simulations,
-                                          n_samples_per_param=1, epsilon=10 ** 50)  # set epsilon to very large number
+    journal_standard_ABC = sampler.sample([[np.array(observation)]],
+                                          n_samples=num_simulations // num_simulations_per_param,
+                                          n_samples_per_param=num_simulations_per_param,
+                                          epsilon=10 ** 50)  # set epsilon to very large number
     if eps is None:  # then we use quantile to select the posterior samples from the generated ones
         journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, percentile=quantile * 100)
     else:
         journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, threshold=eps)
 
-    assert journal_standard_ABC.number_of_simulations[-1] == num_simulations
+    actual_n_simulations = journal_standard_ABC.number_of_simulations[-1] * journal_standard_ABC.configuration[
+        "n_samples_per_param"]
+    # this takes into account the fact that num_simulations can be not divisible by num_simulations_per_param
+    expected_n_simulations = (num_simulations // num_simulations_per_param) * num_simulations_per_param
+    assert actual_n_simulations == expected_n_simulations
 
     if save_distances:
         distances = torch.tensor(journal_standard_ABC_reduced.distances[-1])
diff --git a/try_ABCpy.py b/try_ABCpy.py
index 19a09357..9392fe4c 100644
--- a/try_ABCpy.py
+++ b/try_ABCpy.py
@@ -11,14 +11,15 @@
 # Alternatively, we can import existing algorithms, e.g:
 from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc  # See help(rej_abc) for keywords
 
-num_simulations = 10000
+num_simulations = 1000
 num_samples = 10000
 # posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, kde_bandwidth="cv",
 #                                   num_simulations=num_simulations, quantile=0.03, num_top_samples=None)
 # posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1,
 #                                   num_simulations=num_simulations, num_top_samples=300, kde_bandwidth="cv")
 posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, kde_bandwidth="cv",
-                                  num_simulations=num_simulations, eps=0.15, num_top_samples=None)
+                                  num_simulations_per_param=3,
+                                  num_simulations=num_simulations, eps=0.3, num_top_samples=None)
 
 print("Posterior samples shape", posterior_samples.shape)
 

From 055f74c72842e3409081fd4afbf49a46573038c9 Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Sun, 14 Mar 2021 14:59:54 +0100
Subject: [PATCH 5/9] Some small fixes

---
 sbibm/algorithms/abcpy/abcpy_utils.py   |  1 -
 sbibm/algorithms/abcpy/rejection_abc.py | 20 ++++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/sbibm/algorithms/abcpy/abcpy_utils.py b/sbibm/algorithms/abcpy/abcpy_utils.py
index 8bae90d9..e1c62506 100644
--- a/sbibm/algorithms/abcpy/abcpy_utils.py
+++ b/sbibm/algorithms/abcpy/abcpy_utils.py
@@ -19,7 +19,6 @@ def __init__(self, task, name='ABCpy_prior'):
 
     def forward_simulate(self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()):
         result = np.array(self.prior_forward(num_forward_simulations))
-        # print(num_forward_simulations, result.shape)
         return [np.array([x]).reshape(-1, ) for x in result]
 
     def get_output_dimension(self):
diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
index 49975bb2..b737f5cd 100644
--- a/sbibm/algorithms/abcpy/rejection_abc.py
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -29,6 +29,7 @@ def run(
         sass_fraction: float = 0.5,
         sass_feature_expansion_degree: int = 3,
         kde_bandwidth: Optional[str] = None,
+        seed: Optional[int] = None,
 ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
     """Runs REJ-ABC from `ABCpy`
 
@@ -50,11 +51,12 @@ def run(
         eps: Epsilon threshold to use
         distance: Distance to use; can be "l2" (Euclidean Distance), "log_reg" (LogReg) or "pen_log_reg" (PenLogReg)
         save_distances: If True, stores distances of samples to disk
-        sass: If True, summary statistics are learned as in
-            Fearnhead & Prangle 2012.
-        sass_fraction: Fraction of simulation budget to use for sass.
-        sass_feature_expansion_degree: Degree of polynomial expansion of the summary
-            statistics.
+        sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012.
+            Not yet implemented, left for compatibility with other algorithms.
+        sass_fraction: Fraction of simulation budget to use for sass. Unused for now, left for compatibility
+        with other algorithms.
+        sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. Unused for now, only
+            left for compatibility with other algorithms.
         kde_bandwidth: If not None, will resample using KDE when necessary, set
             e.g. to "cv" for cross-validated bandwidth selection
     Returns:
@@ -96,10 +98,8 @@ def run(
     parameters = ABCpyPrior(task)
     model = ABCpySimulator([parameters], task, max_calls=num_simulations)
 
-    # inference; not sure how to use random seed
-    sampler = RejectionABC(root_models=[model], distances=[distance_calc], backend=backend, seed=None)
-    # print("obs", [observation])
-    # print("obs", [np.array(observation)])
+    # inference
+    sampler = RejectionABC(root_models=[model], distances=[distance_calc], backend=backend, seed=seed)
     journal_standard_ABC = sampler.sample([[np.array(observation)]],
                                           n_samples=num_simulations // num_simulations_per_param,
                                           n_samples_per_param=num_simulations_per_param,
@@ -117,7 +117,7 @@ def run(
 
     if save_distances:
         distances = torch.tensor(journal_standard_ABC_reduced.distances[-1])
-        save_tensor_to_csv("/home/lorenzo/Scrivania/OxWaSP/ABC-project/Code/sbibm/distances.csv", distances)
+        save_tensor_to_csv("distances.csv", distances)
 
     samples = torch.tensor(journal_standard_ABC_reduced.get_accepted_parameters()).squeeze()  # that should work
 

From a409676c5b20534f7f62f6cfbc87c985b2a2711c Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Sun, 14 Mar 2021 15:06:15 +0100
Subject: [PATCH 6/9] isort and black

---
 sbibm/algorithms/abcpy/abcpy_utils.py   | 53 +++++++++++----
 sbibm/algorithms/abcpy/rejection_abc.py | 90 ++++++++++++++++---------
 2 files changed, 97 insertions(+), 46 deletions(-)

diff --git a/sbibm/algorithms/abcpy/abcpy_utils.py b/sbibm/algorithms/abcpy/abcpy_utils.py
index e1c62506..4bee280a 100644
--- a/sbibm/algorithms/abcpy/abcpy_utils.py
+++ b/sbibm/algorithms/abcpy/abcpy_utils.py
@@ -1,6 +1,7 @@
 from typing import Callable
-import torch
+
 import numpy as np
+import torch
 from abcpy.continuousmodels import Continuous, InputConnector
 from abcpy.distances import Euclidean, LogReg, PenLogReg
 from abcpy.output import Journal
@@ -9,7 +10,7 @@
 
 
 class ABCpyPrior(ProbabilisticModel, Continuous):
-    def __init__(self, task, name='ABCpy_prior'):
+    def __init__(self, task, name="ABCpy_prior"):
         self.prior_forward = task.get_prior()
         self.dim_parameters = task.dim_parameters
         self.name = task.name if task.name is not None else name
@@ -17,9 +18,16 @@ def __init__(self, task, name='ABCpy_prior'):
         input_parameters = InputConnector.from_list([])
         super(ABCpyPrior, self).__init__(input_parameters, self.name + "_prior")
 
-    def forward_simulate(self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()):
+    def forward_simulate(
+        self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()
+    ):
         result = np.array(self.prior_forward(num_forward_simulations))
-        return [np.array([x]).reshape(-1, ) for x in result]
+        return [
+            np.array([x]).reshape(
+                -1,
+            )
+            for x in result
+        ]
 
     def get_output_dimension(self):
         return self.dim_parameters
@@ -32,7 +40,7 @@ def _check_output(self, values):
 
 
 class ABCpySimulator(ProbabilisticModel, Continuous):
-    def __init__(self, parameters, task, max_calls, name='ABCpy_simulator'):
+    def __init__(self, parameters, task, max_calls, name="ABCpy_simulator"):
         self.simulator = task.get_simulator(max_calls=max_calls)
         self.output_dim = task.dim_data
         self.name = task.name if task.name is not None else name
@@ -40,12 +48,21 @@ def __init__(self, parameters, task, max_calls, name='ABCpy_simulator'):
         input_parameters = InputConnector.from_list(parameters)
         super(ABCpySimulator, self).__init__(input_parameters, self.name)
 
-    def forward_simulate(self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()):
+    def forward_simulate(
+        self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()
+    ):
         tensor_param = torch.tensor(abcpy_input_values)
-        tensor_res = [self.simulator(tensor_param) for k in range(num_forward_simulations)]
+        tensor_res = [
+            self.simulator(tensor_param) for k in range(num_forward_simulations)
+        ]
         # print(tensor_res)
         # print([np.array(x).reshape(-1, ) for x in tensor_res])
-        return [np.array(x).reshape(-1, ) for x in tensor_res]
+        return [
+            np.array(x).reshape(
+                -1,
+            )
+            for x in tensor_res
+        ]
 
     def get_output_dimension(self):
         return self.output_dim
@@ -70,8 +87,10 @@ def get_distance(distance: str, statistics: Statistics) -> Callable:
         distance_calc = PenLogReg(statistics)
 
     elif distance == "Wasserstein":
-        raise NotImplementedError("Wasserstein distace not yet implemented as we are considering only one single "
-                                  "simulation for parameter value")
+        raise NotImplementedError(
+            "Wasserstein distace not yet implemented as we are considering only one single "
+            "simulation for parameter value"
+        )
 
     else:
         raise NotImplementedError(f"Distance '{distance}' not implemented.")
@@ -83,10 +102,12 @@ def journal_cleanup_rejABC(journal, percentile=None, threshold=None):
     """This function takes a Journal file (typically produced by an Rejection ABC run with very large epsilon value)
     and the keeps only the samples which achieve performance less than either some percentile of the achieved distances,
      or either some specified threshold. It is
-    a very simple way to obtain a Rejection ABC which works on a percentile of the obtained distances. """
+    a very simple way to obtain a Rejection ABC which works on a percentile of the obtained distances."""
 
     if (threshold is None) == (percentile is None):
-        raise RuntimeError("Exactly one of percentile or epsilon needs to be specified.")
+        raise RuntimeError(
+            "Exactly one of percentile or epsilon needs to be specified."
+        )
 
     if percentile is not None:
         distance_cutoff = np.percentile(journal.distances[-1], percentile)
@@ -95,11 +116,15 @@ def journal_cleanup_rejABC(journal, percentile=None, threshold=None):
     picked_simulations = journal.distances[-1] < distance_cutoff
     new_distances = journal.distances[-1][picked_simulations]
     if len(picked_simulations) == 0:
-        raise RuntimeError("The specified value of threshold is too low, no simulations are selected.")
+        raise RuntimeError(
+            "The specified value of threshold is too low, no simulations are selected."
+        )
 
     new_journal = Journal(journal._type)
     new_journal.configuration["n_samples"] = journal.configuration["n_samples"]
-    new_journal.configuration["n_samples_per_param"] = journal.configuration["n_samples_per_param"]
+    new_journal.configuration["n_samples_per_param"] = journal.configuration[
+        "n_samples_per_param"
+    ]
     new_journal.configuration["epsilon"] = journal.configuration["epsilon"]
 
     n_reduced_samples = np.sum(picked_simulations)
diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
index b737f5cd..fdcca8dd 100644
--- a/sbibm/algorithms/abcpy/rejection_abc.py
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -1,4 +1,5 @@
 from typing import Optional, Tuple
+
 import numpy as np
 import torch
 from abcpy.backends import BackendDummy
@@ -6,7 +7,12 @@
 from abcpy.statistics import Identity
 
 import sbibm
-from sbibm.algorithms.abcpy.abcpy_utils import ABCpyPrior, get_distance, ABCpySimulator, journal_cleanup_rejABC
+from sbibm.algorithms.abcpy.abcpy_utils import (
+    ABCpyPrior,
+    ABCpySimulator,
+    get_distance,
+    journal_cleanup_rejABC,
+)
 from sbibm.tasks.task import Task
 from sbibm.utils.io import save_tensor_to_csv
 from sbibm.utils.kde import get_kde
@@ -14,22 +20,22 @@
 
 
 def run(
-        task: Task,
-        num_samples: int,
-        num_simulations: int,
-        num_simulations_per_param: Optional[int] = 1,
-        num_observation: Optional[int] = None,
-        observation: Optional[torch.Tensor] = None,
-        num_top_samples: Optional[int] = 100,
-        quantile: Optional[float] = None,
-        eps: Optional[float] = None,
-        distance: str = "l2",
-        save_distances: bool = False,
-        sass: bool = False,
-        sass_fraction: float = 0.5,
-        sass_feature_expansion_degree: int = 3,
-        kde_bandwidth: Optional[str] = None,
-        seed: Optional[int] = None,
+    task: Task,
+    num_samples: int,
+    num_simulations: int,
+    num_simulations_per_param: Optional[int] = 1,
+    num_observation: Optional[int] = None,
+    observation: Optional[torch.Tensor] = None,
+    num_top_samples: Optional[int] = 100,
+    quantile: Optional[float] = None,
+    eps: Optional[float] = None,
+    distance: str = "l2",
+    save_distances: bool = False,
+    sass: bool = False,
+    sass_fraction: float = 0.5,
+    sass_feature_expansion_degree: int = 3,
+    kde_bandwidth: Optional[str] = None,
+    seed: Optional[int] = None,
 ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
     """Runs REJ-ABC from `ABCpy`
 
@@ -69,7 +75,9 @@ def run(
     assert not (num_observation is not None and observation is not None)
 
     if (eps is not None) + (num_top_samples is not None) + (quantile is not None) != 1:
-        raise RuntimeError("Exactly one of `num_top_samples`, `quantile`, `eps` needs to be specified.")
+        raise RuntimeError(
+            "Exactly one of `num_top_samples`, `quantile`, `eps` needs to be specified."
+        )
 
     log = sbibm.get_logger(__name__)
     log.info(f"Running REJ-ABC")
@@ -80,7 +88,7 @@ def run(
     if num_top_samples is not None:
         if sass:
             quantile = num_top_samples / (
-                    num_simulations - int(sass_fraction * num_simulations)
+                num_simulations - int(sass_fraction * num_simulations)
             )
         else:
             quantile = num_top_samples / num_simulations
@@ -91,7 +99,9 @@ def run(
     # as Rejection ABC works by directly drawing until you get below some threshold -> here we set the threshold to
     # very large value and then we select the best objects later.
 
-    statistics = Identity(degree=1, cross=True)  # we assume statistics are already computed inside the task
+    statistics = Identity(
+        degree=1, cross=True
+    )  # we assume statistics are already computed inside the task
     distance_calc = get_distance(distance, statistics)
 
     # define prior and model from the task simulator:
@@ -99,27 +109,43 @@ def run(
     model = ABCpySimulator([parameters], task, max_calls=num_simulations)
 
     # inference
-    sampler = RejectionABC(root_models=[model], distances=[distance_calc], backend=backend, seed=seed)
-    journal_standard_ABC = sampler.sample([[np.array(observation)]],
-                                          n_samples=num_simulations // num_simulations_per_param,
-                                          n_samples_per_param=num_simulations_per_param,
-                                          epsilon=10 ** 50)  # set epsilon to very large number
-    if eps is None:  # then we use quantile to select the posterior samples from the generated ones
-        journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, percentile=quantile * 100)
+    sampler = RejectionABC(
+        root_models=[model], distances=[distance_calc], backend=backend, seed=seed
+    )
+    journal_standard_ABC = sampler.sample(
+        [[np.array(observation)]],
+        n_samples=num_simulations // num_simulations_per_param,
+        n_samples_per_param=num_simulations_per_param,
+        epsilon=10 ** 50,
+    )  # set epsilon to very large number
+    if (
+        eps is None
+    ):  # then we use quantile to select the posterior samples from the generated ones
+        journal_standard_ABC_reduced = journal_cleanup_rejABC(
+            journal_standard_ABC, percentile=quantile * 100
+        )
     else:
-        journal_standard_ABC_reduced = journal_cleanup_rejABC(journal_standard_ABC, threshold=eps)
+        journal_standard_ABC_reduced = journal_cleanup_rejABC(
+            journal_standard_ABC, threshold=eps
+        )
 
-    actual_n_simulations = journal_standard_ABC.number_of_simulations[-1] * journal_standard_ABC.configuration[
-        "n_samples_per_param"]
+    actual_n_simulations = (
+        journal_standard_ABC.number_of_simulations[-1]
+        * journal_standard_ABC.configuration["n_samples_per_param"]
+    )
     # this takes into account the fact that num_simulations can be not divisible by num_simulations_per_param
-    expected_n_simulations = (num_simulations // num_simulations_per_param) * num_simulations_per_param
+    expected_n_simulations = (
+        num_simulations // num_simulations_per_param
+    ) * num_simulations_per_param
     assert actual_n_simulations == expected_n_simulations
 
     if save_distances:
         distances = torch.tensor(journal_standard_ABC_reduced.distances[-1])
         save_tensor_to_csv("distances.csv", distances)
 
-    samples = torch.tensor(journal_standard_ABC_reduced.get_accepted_parameters()).squeeze()  # that should work
+    samples = torch.tensor(
+        journal_standard_ABC_reduced.get_accepted_parameters()
+    ).squeeze()  # that should work
 
     if kde_bandwidth is not None:
         # fit KDE on data to obtain samples

From 30b3a5a25f88cdd9579df4e945a91219419f92f1 Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Sun, 14 Mar 2021 15:56:03 +0100
Subject: [PATCH 7/9] Remove random seed and small fix

Remove random seed and small fix
---
 sbibm/algorithms/abcpy/abcpy_utils.py   | 10 ++++------
 sbibm/algorithms/abcpy/rejection_abc.py |  5 ++---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/sbibm/algorithms/abcpy/abcpy_utils.py b/sbibm/algorithms/abcpy/abcpy_utils.py
index 4bee280a..c6cc7458 100644
--- a/sbibm/algorithms/abcpy/abcpy_utils.py
+++ b/sbibm/algorithms/abcpy/abcpy_utils.py
@@ -119,20 +119,18 @@ def journal_cleanup_rejABC(journal, percentile=None, threshold=None):
         raise RuntimeError(
             "The specified value of threshold is too low, no simulations are selected."
         )
-
+    n_reduced_samples = np.sum(picked_simulations)
     new_journal = Journal(journal._type)
-    new_journal.configuration["n_samples"] = journal.configuration["n_samples"]
+    new_journal.configuration["n_samples"] = n_reduced_samples
     new_journal.configuration["n_samples_per_param"] = journal.configuration[
         "n_samples_per_param"
     ]
-    new_journal.configuration["epsilon"] = journal.configuration["epsilon"]
-
-    n_reduced_samples = np.sum(picked_simulations)
+    new_journal.configuration["epsilon"] = distance_cutoff
 
     new_accepted_parameters = []
     param_names = journal.get_parameters().keys()
     new_names_and_parameters = {name: [] for name in param_names}
-    for i in range(len(picked_simulations)):
+    for i in np.where(picked_simulations)[0]:
         if picked_simulations[i]:
             new_accepted_parameters.append(journal.get_accepted_parameters()[i])
             for name in param_names:
diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
index fdcca8dd..5461a8b2 100644
--- a/sbibm/algorithms/abcpy/rejection_abc.py
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -35,7 +35,6 @@ def run(
     sass_fraction: float = 0.5,
     sass_feature_expansion_degree: int = 3,
     kde_bandwidth: Optional[str] = None,
-    seed: Optional[int] = None,
 ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
     """Runs REJ-ABC from `ABCpy`
 
@@ -110,7 +109,7 @@ def run(
 
     # inference
     sampler = RejectionABC(
-        root_models=[model], distances=[distance_calc], backend=backend, seed=seed
+        root_models=[model], distances=[distance_calc], backend=backend
     )
     journal_standard_ABC = sampler.sample(
         [[np.array(observation)]],
@@ -164,4 +163,4 @@ def run(
 
     # The latter is None as we cannot compute posterior probabilities with ABCpy (it would be the posterior probability
     # of the true parameter space in case that was available)
-    return samples, journal_standard_ABC.number_of_simulations[-1], None
+    return samples, actual_n_simulations, None

From c9c6469db1c0601e456e6bd4c44665661b75bb6e Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Sun, 14 Mar 2021 15:57:32 +0100
Subject: [PATCH 8/9] Create tests

---
 tests/algorithms/test_abcpy.py | 60 ++++++++++++++++++++++++++++++++++
 try_ABCpy.py                   | 41 -----------------------
 2 files changed, 60 insertions(+), 41 deletions(-)
 create mode 100644 tests/algorithms/test_abcpy.py
 delete mode 100644 try_ABCpy.py

diff --git a/tests/algorithms/test_abcpy.py b/tests/algorithms/test_abcpy.py
new file mode 100644
index 00000000..988c7b57
--- /dev/null
+++ b/tests/algorithms/test_abcpy.py
@@ -0,0 +1,60 @@
+import numpy as np
+import pytest
+import torch
+
+import sbibm
+from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc
+
+
+@pytest.mark.parametrize(
+    "task_name",
+    [(task_name) for task_name in ["gaussian_linear_uniform"]],
+)
+def test_abcpy_rejection(
+    task_name,
+    num_observation=1,
+    num_samples=10_000,
+    num_simulations=1_000,
+):
+    # set random seeds:
+    torch.manual_seed(1)
+    np.random.seed(1)
+
+    task = sbibm.get_task(task_name)
+
+    posterior_samples, actual_num_simulations, _ = rej_abc(
+        task=task,
+        num_samples=num_samples,
+        num_observation=num_observation,
+        num_simulations=num_simulations,
+        quantile=0.1,
+        num_top_samples=None,
+        kde_bandwidth="cv",
+    )
+    assert actual_num_simulations == num_simulations
+    assert posterior_samples.shape[0] == num_samples
+
+    posterior_samples, actual_num_simulations, _ = rej_abc(
+        task=task,
+        num_samples=num_samples,
+        num_observation=num_observation,
+        num_simulations=num_simulations,
+        num_top_samples=500,
+        kde_bandwidth="cv",
+    )
+    assert actual_num_simulations == num_simulations
+    assert posterior_samples.shape[0] == num_samples
+
+    # test multiple simulations per parameter
+    posterior_samples, actual_num_simulations, _ = rej_abc(
+        task=task,
+        num_samples=num_samples,
+        num_observation=num_observation,
+        num_simulations_per_param=2,
+        num_simulations=num_simulations,
+        eps=6,
+        num_top_samples=None,
+        kde_bandwidth="cv",
+    )
+    assert actual_num_simulations == num_simulations
+    assert posterior_samples.shape[0] == num_samples
diff --git a/try_ABCpy.py b/try_ABCpy.py
deleted file mode 100644
index 9392fe4c..00000000
--- a/try_ABCpy.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import sbibm
-
-task_name = "gaussian_linear"
-task_name = "two_moons"
-
-task = sbibm.get_task(task_name)  # See sbibm.get_available_tasks() for all tasks
-prior = task.get_prior()
-simulator = task.get_simulator()
-observation = task.get_observation(num_observation=1)  # 10 per task
-
-# Alternatively, we can import existing algorithms, e.g:
-from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc  # See help(rej_abc) for keywords
-
-num_simulations = 1000
-num_samples = 10000
-# posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, kde_bandwidth="cv",
-#                                   num_simulations=num_simulations, quantile=0.03, num_top_samples=None)
-# posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1,
-#                                   num_simulations=num_simulations, num_top_samples=300, kde_bandwidth="cv")
-posterior_samples, _, _ = rej_abc(task=task, num_samples=num_samples, num_observation=1, kde_bandwidth="cv",
-                                  num_simulations_per_param=3,
-                                  num_simulations=num_simulations, eps=0.3, num_top_samples=None)
-
-print("Posterior samples shape", posterior_samples.shape)
-
-# Once we got samples from an approximate posterior, compare them to the reference:
-# from sbibm.metrics import c2st
-# reference_samples = task.get_reference_posterior_samples(num_observation=1)
-# c2st_accuracy = c2st(reference_samples, posterior_samples)
-
-# Visualise both posteriors:
-from sbibm.visualisation import fig_posterior
-
-fig = fig_posterior(task_name=task_name, observation=1, samples_tensor=posterior_samples, num_samples=100, prior=False)
-fig.show()
-# Note: Use fig.show() or fig.save() to show or save the figure
-
-# Get results from other algorithms for comparison:
-# from sbibm.visualisation import fig_metric
-# results_df = sbibm.get_results(dataset="main_paper.csv")
-# fig = fig_metric(results_df.query("task == 'two_moons'"), metric="C2ST")

From b35211ecbf08cb715fbdaa494f2f4ddd29f3e7d5 Mon Sep 17 00:00:00 2001
From: LoryPack <lorenzo.pacchiardi@stats.ox.ac.uk>
Date: Thu, 18 Mar 2021 17:32:34 +0100
Subject: [PATCH 9/9] Fix docstring

---
 sbibm/algorithms/abcpy/rejection_abc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sbibm/algorithms/abcpy/rejection_abc.py b/sbibm/algorithms/abcpy/rejection_abc.py
index 5461a8b2..52e7faa0 100644
--- a/sbibm/algorithms/abcpy/rejection_abc.py
+++ b/sbibm/algorithms/abcpy/rejection_abc.py
@@ -38,7 +38,7 @@ def run(
 ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
     """Runs REJ-ABC from `ABCpy`
 
-    We do not implement SASS for now, and LRA after sampling.
+    ABCpy does not implement LRA post processing. SASS is supported in ABCpy but not yet implemented here.
 
     Choose one of `num_top_samples`, `quantile`, `eps`.