-
Notifications
You must be signed in to change notification settings - Fork 48
First attempt at wrapping ABCpy - work-in-progress #4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
eb7f8fb
e8d59a7
f81edba
9d5abef
055f74c
a409676
30b3a5a
c9c6469
b35211e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ sbi-logs/ | |
| # Editors | ||
| .tags | ||
| .vscode | ||
| .idea | ||
|
|
||
| # macOS | ||
| *.DS_Store | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| from typing import Callable | ||
|
|
||
| import numpy as np | ||
| import torch | ||
| from abcpy.continuousmodels import Continuous, InputConnector | ||
| from abcpy.distances import Euclidean, LogReg, PenLogReg | ||
| from abcpy.output import Journal | ||
| from abcpy.probabilisticmodels import ProbabilisticModel | ||
| from abcpy.statistics import Statistics | ||
|
|
||
|
|
||
| class ABCpyPrior(ProbabilisticModel, Continuous): | ||
| def __init__(self, task, name="ABCpy_prior"): | ||
| self.prior_forward = task.get_prior() | ||
| self.dim_parameters = task.dim_parameters | ||
| self.name = task.name if task.name is not None else name | ||
|
|
||
| input_parameters = InputConnector.from_list([]) | ||
| super(ABCpyPrior, self).__init__(input_parameters, self.name + "_prior") | ||
|
|
||
| def forward_simulate( | ||
| self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState() | ||
| ): | ||
| result = np.array(self.prior_forward(num_forward_simulations)) | ||
| return [ | ||
| np.array([x]).reshape( | ||
| -1, | ||
| ) | ||
| for x in result | ||
| ] | ||
|
|
||
| def get_output_dimension(self): | ||
| return self.dim_parameters | ||
|
|
||
| def _check_input(self, input_values): | ||
| return True | ||
|
|
||
| def _check_output(self, values): | ||
| return True | ||
|
|
||
|
|
||
| class ABCpySimulator(ProbabilisticModel, Continuous): | ||
| def __init__(self, parameters, task, max_calls, name="ABCpy_simulator"): | ||
| self.simulator = task.get_simulator(max_calls=max_calls) | ||
| self.output_dim = task.dim_data | ||
| self.name = task.name if task.name is not None else name | ||
|
|
||
| input_parameters = InputConnector.from_list(parameters) | ||
| super(ABCpySimulator, self).__init__(input_parameters, self.name) | ||
|
|
||
| def forward_simulate( | ||
| self, abcpy_input_values, num_forward_simulations, rng=np.random.RandomState() | ||
| ): | ||
| tensor_param = torch.tensor(abcpy_input_values) | ||
| tensor_res = [ | ||
| self.simulator(tensor_param) for k in range(num_forward_simulations) | ||
| ] | ||
| # print(tensor_res) | ||
| # print([np.array(x).reshape(-1, ) for x in tensor_res]) | ||
| return [ | ||
| np.array(x).reshape( | ||
| -1, | ||
| ) | ||
| for x in tensor_res | ||
| ] | ||
|
|
||
| def get_output_dimension(self): | ||
| return self.output_dim | ||
|
|
||
| def _check_input(self, input_values): | ||
| return True | ||
|
|
||
| def _check_output(self, values): | ||
| return True | ||
|
|
||
|
|
||
| def get_distance(distance: str, statistics: Statistics) -> Callable: | ||
| """Return distance function for ABCpy.""" | ||
|
|
||
| if distance == "l2": | ||
| distance_calc = Euclidean(statistics) | ||
|
|
||
| elif distance == "log_reg": | ||
| distance_calc = LogReg(statistics) | ||
|
|
||
| elif distance == "pen_log_reg": | ||
| distance_calc = PenLogReg(statistics) | ||
|
|
||
| elif distance == "Wasserstein": | ||
| raise NotImplementedError( | ||
| "Wasserstein distace not yet implemented as we are considering only one single " | ||
| "simulation for parameter value" | ||
| ) | ||
|
|
||
| else: | ||
| raise NotImplementedError(f"Distance '{distance}' not implemented.") | ||
|
|
||
| return distance_calc | ||
|
|
||
|
|
||
| def journal_cleanup_rejABC(journal, percentile=None, threshold=None): | ||
| """This function takes a Journal file (typically produced by an Rejection ABC run with very large epsilon value) | ||
| and the keeps only the samples which achieve performance less than either some percentile of the achieved distances, | ||
| or either some specified threshold. It is | ||
| a very simple way to obtain a Rejection ABC which works on a percentile of the obtained distances.""" | ||
|
|
||
| if (threshold is None) == (percentile is None): | ||
| raise RuntimeError( | ||
| "Exactly one of percentile or epsilon needs to be specified." | ||
| ) | ||
|
|
||
| if percentile is not None: | ||
| distance_cutoff = np.percentile(journal.distances[-1], percentile) | ||
| else: | ||
| distance_cutoff = threshold | ||
| picked_simulations = journal.distances[-1] < distance_cutoff | ||
| new_distances = journal.distances[-1][picked_simulations] | ||
| if len(picked_simulations) == 0: | ||
| raise RuntimeError( | ||
| "The specified value of threshold is too low, no simulations are selected." | ||
| ) | ||
| n_reduced_samples = np.sum(picked_simulations) | ||
| new_journal = Journal(journal._type) | ||
| new_journal.configuration["n_samples"] = n_reduced_samples | ||
| new_journal.configuration["n_samples_per_param"] = journal.configuration[ | ||
| "n_samples_per_param" | ||
| ] | ||
| new_journal.configuration["epsilon"] = distance_cutoff | ||
|
|
||
| new_accepted_parameters = [] | ||
| param_names = journal.get_parameters().keys() | ||
| new_names_and_parameters = {name: [] for name in param_names} | ||
| for i in np.where(picked_simulations)[0]: | ||
| if picked_simulations[i]: | ||
| new_accepted_parameters.append(journal.get_accepted_parameters()[i]) | ||
| for name in param_names: | ||
| new_names_and_parameters[name].append(journal.get_parameters()[name][i]) | ||
|
|
||
| new_journal.add_accepted_parameters(new_accepted_parameters) | ||
| new_journal.add_weights(np.ones((n_reduced_samples, 1))) | ||
| new_journal.add_ESS_estimate(np.ones((n_reduced_samples, 1))) | ||
| new_journal.add_distances(new_distances) | ||
| new_journal.add_user_parameters(new_names_and_parameters) | ||
| new_journal.number_of_simulations.append(journal.number_of_simulations[-1]) | ||
|
|
||
| return new_journal | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,166 @@ | ||
| from typing import Optional, Tuple | ||
|
|
||
| import numpy as np | ||
| import torch | ||
| from abcpy.backends import BackendDummy | ||
| from abcpy.inferences import RejectionABC | ||
| from abcpy.statistics import Identity | ||
|
|
||
| import sbibm | ||
| from sbibm.algorithms.abcpy.abcpy_utils import ( | ||
| ABCpyPrior, | ||
| ABCpySimulator, | ||
| get_distance, | ||
| journal_cleanup_rejABC, | ||
| ) | ||
| from sbibm.tasks.task import Task | ||
| from sbibm.utils.io import save_tensor_to_csv | ||
| from sbibm.utils.kde import get_kde | ||
| from sbibm.utils.torch import sample | ||
|
|
||
|
|
||
| def run( | ||
| task: Task, | ||
| num_samples: int, | ||
| num_simulations: int, | ||
| num_simulations_per_param: Optional[int] = 1, | ||
| num_observation: Optional[int] = None, | ||
| observation: Optional[torch.Tensor] = None, | ||
| num_top_samples: Optional[int] = 100, | ||
| quantile: Optional[float] = None, | ||
| eps: Optional[float] = None, | ||
| distance: str = "l2", | ||
| save_distances: bool = False, | ||
| sass: bool = False, | ||
| sass_fraction: float = 0.5, | ||
| sass_feature_expansion_degree: int = 3, | ||
| kde_bandwidth: Optional[str] = None, | ||
| ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: | ||
| """Runs REJ-ABC from `ABCpy` | ||
|
|
||
| ABCpy does not implement LRA post processing. SASS is supported in ABCpy but not yet implemented here. | ||
|
|
||
| Choose one of `num_top_samples`, `quantile`, `eps`. | ||
|
|
||
| Args: | ||
| task: Task instance | ||
| num_samples: Number of samples to generate from posterior | ||
| num_simulations: Simulation budget | ||
| num_simulations_per_param: The number of the total simulations budget to produce for each parameter value; | ||
| used to better estimate the ABC distance from the observation for each parameter value. Useful to | ||
| investigate whether splitting the budget to do multi-simulations per parameter is helpful or not. | ||
| num_observation: Observation number to load, alternative to `observation` | ||
| observation: Observation, alternative to `num_observation` | ||
| num_top_samples: If given, will use `top=True` with num_top_samples | ||
| quantile: Quantile to use | ||
| eps: Epsilon threshold to use | ||
| distance: Distance to use; can be "l2" (Euclidean Distance), "log_reg" (LogReg) or "pen_log_reg" (PenLogReg) | ||
| save_distances: If True, stores distances of samples to disk | ||
| sass: If True, summary statistics are learned as in Fearnhead & Prangle 2012. | ||
| Not yet implemented, left for compatibility with other algorithms. | ||
| sass_fraction: Fraction of simulation budget to use for sass. Unused for now, left for compatibility | ||
| with other algorithms. | ||
| sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. Unused for now, only | ||
| left for compatibility with other algorithms. | ||
| kde_bandwidth: If not None, will resample using KDE when necessary, set | ||
| e.g. to "cv" for cross-validated bandwidth selection | ||
| Returns: | ||
| Samples from posterior, number of simulator calls, log probability of true params if computable | ||
| """ | ||
| if sass: | ||
| raise NotImplementedError("SASS not yet implemented") | ||
|
|
||
| assert not (num_observation is None and observation is None) | ||
| assert not (num_observation is not None and observation is not None) | ||
|
|
||
| if (eps is not None) + (num_top_samples is not None) + (quantile is not None) != 1: | ||
| raise RuntimeError( | ||
| "Exactly one of `num_top_samples`, `quantile`, `eps` needs to be specified." | ||
| ) | ||
|
|
||
| log = sbibm.get_logger(__name__) | ||
| log.info(f"Running REJ-ABC") | ||
|
|
||
| if observation is None: | ||
| observation = task.get_observation(num_observation) | ||
|
|
||
| if num_top_samples is not None: | ||
| if sass: | ||
| quantile = num_top_samples / ( | ||
| num_simulations - int(sass_fraction * num_simulations) | ||
| ) | ||
| else: | ||
| quantile = num_top_samples / num_simulations | ||
|
|
||
| # use the dummy backend only (for now) | ||
| backend = BackendDummy() | ||
|
|
||
| # as Rejection ABC works by directly drawing until you get below some threshold -> here we set the threshold to | ||
| # very large value and then we select the best objects later. | ||
|
|
||
| statistics = Identity( | ||
| degree=1, cross=True | ||
| ) # we assume statistics are already computed inside the task | ||
| distance_calc = get_distance(distance, statistics) | ||
|
|
||
| # define prior and model from the task simulator: | ||
| parameters = ABCpyPrior(task) | ||
| model = ABCpySimulator([parameters], task, max_calls=num_simulations) | ||
|
|
||
| # inference | ||
| sampler = RejectionABC( | ||
| root_models=[model], distances=[distance_calc], backend=backend | ||
| ) | ||
| journal_standard_ABC = sampler.sample( | ||
| [[np.array(observation)]], | ||
| n_samples=num_simulations // num_simulations_per_param, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Basically in ABCpy Rejection ABC fixes a number of samples with distance from the observation below a given threshold, and simulates from the model until that number of samples are accepted. As a workaround to get a fixed simulation budget, I used an extremely large epsilon so that all simulation are accepted. The results are then post-processed and the ones with smaller distance are accepted. I realize it's not the cleanest implementation ever, but it should work here. As in ABCpy we rarely use RejectionABC for practical purposes, we never improved the implementation so that it allows a fixed simulation target. |
||
| n_samples_per_param=num_simulations_per_param, | ||
| epsilon=10 ** 50, | ||
| ) # set epsilon to very large number | ||
| if ( | ||
| eps is None | ||
| ): # then we use quantile to select the posterior samples from the generated ones | ||
| journal_standard_ABC_reduced = journal_cleanup_rejABC( | ||
| journal_standard_ABC, percentile=quantile * 100 | ||
| ) | ||
| else: | ||
| journal_standard_ABC_reduced = journal_cleanup_rejABC( | ||
| journal_standard_ABC, threshold=eps | ||
| ) | ||
|
|
||
| actual_n_simulations = ( | ||
| journal_standard_ABC.number_of_simulations[-1] | ||
| * journal_standard_ABC.configuration["n_samples_per_param"] | ||
| ) | ||
| # this takes into account the fact that num_simulations can be not divisible by num_simulations_per_param | ||
| expected_n_simulations = ( | ||
| num_simulations // num_simulations_per_param | ||
| ) * num_simulations_per_param | ||
| assert actual_n_simulations == expected_n_simulations | ||
|
|
||
| if save_distances: | ||
| distances = torch.tensor(journal_standard_ABC_reduced.distances[-1]) | ||
| save_tensor_to_csv("distances.csv", distances) | ||
|
|
||
| samples = torch.tensor( | ||
| journal_standard_ABC_reduced.get_accepted_parameters() | ||
| ).squeeze() # that should work | ||
|
|
||
| if kde_bandwidth is not None: | ||
| # fit KDE on data to obtain samples | ||
| log.info( | ||
| f"KDE on {samples.shape[0]} samples with bandwidth option {kde_bandwidth}" | ||
| ) | ||
| kde = get_kde( | ||
| samples, | ||
| bandwidth=kde_bandwidth, | ||
| ) | ||
| samples = kde.sample(num_samples) | ||
| else: | ||
| # otherwise sample with replacement until you get the num_samples you want | ||
| log.info(f"Sampling {num_samples} samples from trace") | ||
| samples = sample(samples, num_samples=num_samples, replace=True) | ||
|
|
||
| # The latter is None as we cannot compute posterior probabilities with ABCpy (it would be the posterior probability | ||
| # of the true parameter space in case that was available) | ||
| return samples, actual_n_simulations, None | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| import numpy as np | ||
| import pytest | ||
| import torch | ||
|
|
||
| import sbibm | ||
| from sbibm.algorithms.abcpy.rejection_abc import run as rej_abc | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "task_name", | ||
| [(task_name) for task_name in ["gaussian_linear_uniform"]], | ||
| ) | ||
| def test_abcpy_rejection( | ||
| task_name, | ||
| num_observation=1, | ||
| num_samples=10_000, | ||
| num_simulations=1_000, | ||
| ): | ||
| # set random seeds: | ||
| torch.manual_seed(1) | ||
| np.random.seed(1) | ||
|
|
||
| task = sbibm.get_task(task_name) | ||
|
|
||
| posterior_samples, actual_num_simulations, _ = rej_abc( | ||
| task=task, | ||
| num_samples=num_samples, | ||
| num_observation=num_observation, | ||
| num_simulations=num_simulations, | ||
| quantile=0.1, | ||
| num_top_samples=None, | ||
| kde_bandwidth="cv", | ||
| ) | ||
| assert actual_num_simulations == num_simulations | ||
| assert posterior_samples.shape[0] == num_samples | ||
|
|
||
| posterior_samples, actual_num_simulations, _ = rej_abc( | ||
| task=task, | ||
| num_samples=num_samples, | ||
| num_observation=num_observation, | ||
| num_simulations=num_simulations, | ||
| num_top_samples=500, | ||
| kde_bandwidth="cv", | ||
| ) | ||
| assert actual_num_simulations == num_simulations | ||
| assert posterior_samples.shape[0] == num_samples | ||
|
|
||
| # test multiple simulations per parameter | ||
| posterior_samples, actual_num_simulations, _ = rej_abc( | ||
| task=task, | ||
| num_samples=num_samples, | ||
| num_observation=num_observation, | ||
| num_simulations_per_param=2, | ||
| num_simulations=num_simulations, | ||
| eps=6, | ||
| num_top_samples=None, | ||
| kde_bandwidth="cv", | ||
| ) | ||
| assert actual_num_simulations == num_simulations | ||
| assert posterior_samples.shape[0] == num_samples |
Uh oh!
There was an error while loading. Please reload this page.