Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ sbi-logs/
# Editors
.tags
.vscode
.idea

# macOS
*.DS_Store
Expand Down
Empty file.
146 changes: 146 additions & 0 deletions sbibm/algorithms/abcpy/abcpy_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from typing import Callable

import numpy as np
import torch
from abcpy.continuousmodels import Continuous, InputConnector
from abcpy.distances import Euclidean, LogReg, PenLogReg
from abcpy.output import Journal
from abcpy.probabilisticmodels import ProbabilisticModel
from abcpy.statistics import Statistics


class ABCpyPrior(ProbabilisticModel, Continuous):
def __init__(self, task, name="ABCpy_prior"):
self.prior_forward = task.get_prior()
self.dim_parameters = task.dim_parameters
self.name = task.name if task.name is not None else name

input_parameters = InputConnector.from_list([])
super(ABCpyPrior, self).__init__(input_parameters, self.name + "_prior")

def forward_simulate(
self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()
):
result = np.array(self.prior_forward(num_forward_simulations))
return [
np.array([x]).reshape(
-1,
)
for x in result
]

def get_output_dimension(self):
return self.dim_parameters

def _check_input(self, input_values):
return True

def _check_output(self, values):
return True


class ABCpySimulator(ProbabilisticModel, Continuous):
def __init__(self, parameters, task, max_calls, name="ABCpy_simulator"):
self.simulator = task.get_simulator(max_calls=max_calls)
self.output_dim = task.dim_data
self.name = task.name if task.name is not None else name

input_parameters = InputConnector.from_list(parameters)
super(ABCpySimulator, self).__init__(input_parameters, self.name)

def forward_simulate(
self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState()
):
tensor_param = torch.tensor(abcpy_input_values)
tensor_res = [
self.simulator(tensor_param) for k in range(num_forward_simulations)
]
# print(tensor_res)
# print([np.array(x).reshape(-1, ) for x in tensor_res])
return [
np.array(x).reshape(
-1,
)
for x in tensor_res
]

def get_output_dimension(self):
return self.output_dim

def _check_input(self, input_values):
return True

def _check_output(self, values):
return True


def get_distance(distance: str, statistics: Statistics) -> Callable:
"""Return distance function for ABCpy."""

if distance == "l2":
distance_calc = Euclidean(statistics)

elif distance == "log_reg":
distance_calc = LogReg(statistics)

elif distance == "pen_log_reg":
distance_calc = PenLogReg(statistics)

elif distance == "Wasserstein":
raise NotImplementedError(
"Wasserstein distace not yet implemented as we are considering only one single "
"simulation for parameter value"
)

else:
raise NotImplementedError(f"Distance '{distance}' not implemented.")

return distance_calc


def journal_cleanup_rejABC(journal, percentile=None, threshold=None):
"""This function takes a Journal file (typically produced by an Rejection ABC run with very large epsilon value)
and the keeps only the samples which achieve performance less than either some percentile of the achieved distances,
or either some specified threshold. It is
a very simple way to obtain a Rejection ABC which works on a percentile of the obtained distances."""

if (threshold is None) == (percentile is None):
raise RuntimeError(
"Exactly one of percentile or epsilon needs to be specified."
)

if percentile is not None:
distance_cutoff = np.percentile(journal.distances[-1], percentile)
else:
distance_cutoff = threshold
picked_simulations = journal.distances[-1] < distance_cutoff
new_distances = journal.distances[-1][picked_simulations]
if len(picked_simulations) == 0:
raise RuntimeError(
"The specified value of threshold is too low, no simulations are selected."
)
n_reduced_samples = np.sum(picked_simulations)
new_journal = Journal(journal._type)
new_journal.configuration["n_samples"] = n_reduced_samples
new_journal.configuration["n_samples_per_param"] = journal.configuration[
"n_samples_per_param"
]
new_journal.configuration["epsilon"] = distance_cutoff

new_accepted_parameters = []
param_names = journal.get_parameters().keys()
new_names_and_parameters = {name: [] for name in param_names}
for i in np.where(picked_simulations)[0]:
if picked_simulations[i]:
new_accepted_parameters.append(journal.get_accepted_parameters()[i])
for name in param_names:
new_names_and_parameters[name].append(journal.get_parameters()[name][i])

new_journal.add_accepted_parameters(new_accepted_parameters)
new_journal.add_weights(np.ones((n_reduced_samples, 1)))
new_journal.add_ESS_estimate(np.ones((n_reduced_samples, 1)))
new_journal.add_distances(new_distances)
new_journal.add_user_parameters(new_names_and_parameters)
new_journal.number_of_simulations.append(journal.number_of_simulations[-1])

return new_journal
166 changes: 166 additions & 0 deletions sbibm/algorithms/abcpy/rejection_abc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from typing import Optional, Tuple

import numpy as np
import torch
from abcpy.backends import BackendDummy
from abcpy.inferences import RejectionABC
from abcpy.statistics import Identity

import sbibm
from sbibm.algorithms.abcpy.abcpy_utils import (
ABCpyPrior,
ABCpySimulator,
get_distance,
journal_cleanup_rejABC,
)
from sbibm.tasks.task import Task
from sbibm.utils.io import save_tensor_to_csv
from sbibm.utils.kde import get_kde
from sbibm.utils.torch import sample


def run(
task: Task,
num_samples: int,
num_simulations: int,
num_simulations_per_param: Optional[int] = 1,
num_observation: Optional[int] = None,
observation: Optional[torch.Tensor] = None,
num_top_samples: Optional[int] = 100,
quantile: Optional[float] = None,
eps: Optional[float] = None,
distance: str = "l2",
save_distances: bool = False,
sass: bool = False,
sass_fraction: float = 0.5,
sass_feature_expansion_degree: int = 3,
kde_bandwidth: Optional[str] = None,
) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]:
"""Runs REJ-ABC from `ABCpy`

ABCpy does not implement LRA post processing. SASS is supported in ABCpy but not yet implemented here.

Choose one of `num_top_samples`, `quantile`, `eps`.

Args:
task: Task instance
num_samples: Number of samples to generate from posterior
num_simulations: Simulation budget
num_simulations_per_param: The number of the total simulations budget to produce for each parameter value;
used to better estimate the ABC distance from the observation for each parameter value. Useful to
investigate whether splitting the budget to do multi-simulations per parameter is helpful or not.
num_observation: Observation number to load, alternative to `observation`
observation: Observation, alternative to `num_observation`
num_top_samples: If given, will use `top=True` with num_top_samples
quantile: Quantile to use
eps: Epsilon threshold to use
distance: Distance to use; can be "l2" (Euclidean Distance), "log_reg" (LogReg) or "pen_log_reg" (PenLogReg)
save_distances: If True, stores distances of samples to disk
sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012.
Not yet implemented, left for compatibility with other algorithms.
sass_fraction: Fraction of simulation budget to use for sass. Unused for now, left for compatibility
with other algorithms.
sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. Unused for now, only
left for compatibility with other algorithms.
kde_bandwidth: If not None, will resample using KDE when necessary, set
e.g. to "cv" for cross-validated bandwidth selection
Returns:
Samples from posterior, number of simulator calls, log probability of true params if computable
"""
if sass:
raise NotImplementedError("SASS not yet implemented")

assert not (num_observation is None and observation is None)
assert not (num_observation is not None and observation is not None)

if (eps is not None) + (num_top_samples is not None) + (quantile is not None) != 1:
raise RuntimeError(
"Exactly one of `num_top_samples`, `quantile`, `eps` needs to be specified."
)

log = sbibm.get_logger(__name__)
log.info(f"Running REJ-ABC")

if observation is None:
observation = task.get_observation(num_observation)

if num_top_samples is not None:
if sass:
quantile = num_top_samples / (
num_simulations - int(sass_fraction * num_simulations)
)
else:
quantile = num_top_samples / num_simulations

# use the dummy backend only (for now)
backend = BackendDummy()

# as Rejection ABC works by directly drawing until you get below some threshold -> here we set the threshold to
# very large value and then we select the best objects later.

statistics = Identity(
degree=1, cross=True
) # we assume statistics are already computed inside the task
distance_calc = get_distance(distance, statistics)

# define prior and model from the task simulator:
parameters = ABCpyPrior(task)
model = ABCpySimulator([parameters], task, max_calls=num_simulations)

# inference
sampler = RejectionABC(
root_models=[model], distances=[distance_calc], backend=backend
)
journal_standard_ABC = sampler.sample(
[[np.array(observation)]],
n_samples=num_simulations // num_simulations_per_param,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the n_samples is a fixed budget, right? there is in principle no way it can be exceeded, and if it was then we would get a SimulationBudgetExceeded exception because of the max_calls passed above, no?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically in ABCpy Rejection ABC fixes a number of samples with distance from the observation below a given threshold, and simulates from the model until that number of samples are accepted. As a workaround to get a fixed simulation budget, I used an extremely large epsilon so that all simulation are accepted. The results are then post-processed and the ones with smaller distance are accepted.

I realize it's not the cleanest implementation ever, but it should work here. As in ABCpy we rarely use RejectionABC for practical purposes, we never improved the implementation so that it allows a fixed simulation target.

n_samples_per_param=num_simulations_per_param,
epsilon=10 ** 50,
) # set epsilon to very large number
if (
eps is None
): # then we use quantile to select the posterior samples from the generated ones
journal_standard_ABC_reduced = journal_cleanup_rejABC(
journal_standard_ABC, percentile=quantile * 100
)
else:
journal_standard_ABC_reduced = journal_cleanup_rejABC(
journal_standard_ABC, threshold=eps
)

actual_n_simulations = (
journal_standard_ABC.number_of_simulations[-1]
* journal_standard_ABC.configuration["n_samples_per_param"]
)
# this takes into account the fact that num_simulations can be not divisible by num_simulations_per_param
expected_n_simulations = (
num_simulations // num_simulations_per_param
) * num_simulations_per_param
assert actual_n_simulations == expected_n_simulations

if save_distances:
distances = torch.tensor(journal_standard_ABC_reduced.distances[-1])
save_tensor_to_csv("distances.csv", distances)

samples = torch.tensor(
journal_standard_ABC_reduced.get_accepted_parameters()
).squeeze() # that should work

if kde_bandwidth is not None:
# fit KDE on data to obtain samples
log.info(
f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}"
)
kde = get_kde(
samples,
bandwidth=kde_bandwidth,
)
samples = kde.sample(num_samples)
else:
# otherwise sample with replacement until you get the num_samples you want
log.info(f"Sampling {num_samples} samples from trace")
samples = sample(samples, num_samples=num_samples, replace=True)

# The latter is None as we cannot compute posterior probabilities with ABCpy (it would be the posterior probability
# of the true parameter space in case that was available)
return samples, actual_n_simulations, None
60 changes: 60 additions & 0 deletions tests/algorithms/test_abcpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy as np
import pytest
import torch

import sbibm
from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc


@pytest.mark.parametrize(
"task_name",
[(task_name) for task_name in ["gaussian_linear_uniform"]],
)
def test_abcpy_rejection(
task_name,
num_observation=1,
num_samples=10_000,
num_simulations=1_000,
):
# set random seeds:
torch.manual_seed(1)
np.random.seed(1)

task = sbibm.get_task(task_name)

posterior_samples, actual_num_simulations, _ = rej_abc(
task=task,
num_samples=num_samples,
num_observation=num_observation,
num_simulations=num_simulations,
quantile=0.1,
num_top_samples=None,
kde_bandwidth="cv",
)
assert actual_num_simulations == num_simulations
assert posterior_samples.shape[0] == num_samples

posterior_samples, actual_num_simulations, _ = rej_abc(
task=task,
num_samples=num_samples,
num_observation=num_observation,
num_simulations=num_simulations,
num_top_samples=500,
kde_bandwidth="cv",
)
assert actual_num_simulations == num_simulations
assert posterior_samples.shape[0] == num_samples

# test multiple simulations per parameter
posterior_samples, actual_num_simulations, _ = rej_abc(
task=task,
num_samples=num_samples,
num_observation=num_observation,
num_simulations_per_param=2,
num_simulations=num_simulations,
eps=6,
num_top_samples=None,
kde_bandwidth="cv",
)
assert actual_num_simulations == num_simulations
assert posterior_samples.shape[0] == num_samples