diff --git a/CHANGELOG.md b/CHANGELOG.md index 25fcb68..65dd13b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,13 @@ All notable changes to this project are documented here. - +## [Unreleased] + +### Added +- `BootstrapDistribution.summarize` has new parameter `precision: Optional[Union[int, Literal["auto"]]] = None` to control rounding behaviour. + +### Changed +- `BootstrapSummary.round` parameter `precision` now defaults to `Literal["auto"]`. ## [0.1.1] - 2025-11-03 diff --git a/README.md b/README.md index 4371b72..93c930a 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ This package implements the core logic of Bayesian bootstrapping in Python, alon ## Why use this package? -Learn and experiment with Bayesian bootstrap inference in Python -Quickly compute posterior-like uncertainty intervals for arbitrary statistics -Extend easily with your own weighted statistic functions +- Learn and experiment with Bayesian bootstrap inference in Python +- Quickly compute posterior-like uncertainty intervals for arbitrary statistics +- Extend easily with your own weighted statistic functions ## Installation @@ -90,7 +90,8 @@ Performs Bayesian bootstrapping on `data` using the given statistic. A `BootstrapDistribution` object with: - `.estimates`: array of bootstrapped statistic values -- `.summarize(level)`: returns a `BootstrapSummary` with `mean`, `ci_low`, `ci_high`, and `level` +- `.summarize(level, precision)`: returns a `BootstrapSummary` with `mean`, `ci_low`, `ci_high`, + and `level`; rounded if `precision` is integer-valued or `"auto"` ### Weighted statistic functions included diff --git a/bbstat/evaluate.py b/bbstat/evaluate.py index d43479f..fcc7af3 100644 --- a/bbstat/evaluate.py +++ b/bbstat/evaluate.py @@ -25,7 +25,7 @@ """ from dataclasses import dataclass -from typing import Optional +from typing import Literal, Optional, Union import numpy as np @@ -106,22 +106,32 @@ def ci_width(self) -> float: """Returns the width of the credible interval.""" return self.ci_high - self.ci_low - def round(self, precision: Optional[int] = None) -> "BootstrapSummary": + def round( + self, + precision: Union[int, Literal["auto"]] = "auto", + ) -> "BootstrapSummary": """ Returns a new version of the summary with rounded values. When `precision` is given, the mean and credible interval bounds are rounded - to this number of digits. If `precision=None` (default), the precision is - computed form the width of the credible interval. + to this number of digits. If `precision="auto"` (default), the precision is + computed from the width of the credible interval. Args: - precision (int, optional): The desired precision for rounding. + precision (int or "auto"): The desired precision for rounding. Default is "auto". Returns: BootstrapSummary: The summary of a Bayesian bootstrap procedure's result. + + Raises: + ValueError: If `precision` is not integer or "auto". """ - if precision is None: + if precision == "auto": precision = get_precision_for_rounding(self.ci_width) + elif not isinstance(precision, int): + raise ValueError( + f"Invalid parameter {precision=:}: must be integer or 'auto'." + ) return self.__class__( mean=round(self.mean, precision), ci_low=round(self.ci_low, precision), @@ -226,20 +236,32 @@ def __str__(self) -> str: size = len(self) return f"BootstrapDistribution({mean=:}, {size=:})" - def summarize(self, level: float = 0.87) -> BootstrapSummary: + def summarize( + self, + level: float = 0.87, + precision: Optional[Union[int, Literal["auto"]]] = None, + ) -> BootstrapSummary: """ Returns a `BootstrapSummary` object. - This method is a wrapper for `BootstrapSummary.from_estimates`. + This method is a wrapper for `BootstrapSummary.from_estimates`. If `precision=None` + (default), the summary is returned without rounding. If `precision="auto"` + (or integer-valued), the summary is rounded. Args: level (float): The desired level for the credible interval (must be between 0 and 1). + precision (int or "auto" or None): The desired precision for rounding. + Default is None. Returns: BootstrapSummary: the summary object. Raises: ValueError: If the `level` is not between 0 and 1. + ValueError: If `precision` is not integer or "auto" or None. """ - return BootstrapSummary.from_estimates(self.estimates, level=level) + summary = BootstrapSummary.from_estimates(self.estimates, level=level) + if precision is None: + return summary + return summary.round(precision) diff --git a/bbstat/plot.py b/bbstat/plot.py index 9da0ffb..53856c5 100644 --- a/bbstat/plot.py +++ b/bbstat/plot.py @@ -66,10 +66,7 @@ def plot( summary = bootstrap_distribution.summarize(level) if precision is not None: - if precision == "auto": - summary = summary.round() - else: - summary = summary.round(precision) + summary = summary.round(precision) param_str = f"{summary.mean} ({summary.ci_low}, {summary.ci_high})" diff --git a/docs/getting_started.md b/docs/getting_started.md index 5720265..bf3c385 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -43,6 +43,14 @@ print(summary.round()) # BootstrapSummary(mean=2.6, ci_low=2.1, ci_high=3.2, level=0.95) ``` +Additionally, you can specify the precision in `BootstrapDistribution.summarize`: + +```python +summary = distribution.summarize(level=0.95, precision="auto") +print(summary) +# BootstrapSummary(mean=2.6, ci_low=2.1, ci_high=3.2, level=0.95) +``` + Here the mean estimate is about 2.6 cups per day, with a 95% credible interval of roughly [2.1, 3.2]. The uncertainty reflects variation in the weights each sample could have in the population, not in resampled data points. ## Bootstrapping a quantile @@ -79,7 +87,7 @@ data = (study_hours, exam_scores) # Weighted Pearson correlation via Bayesian bootstrapping distribution = bootstrap(data=data, statistic_fn="pearson_dependency", n_boot=2000, seed=1) -summary = distribution.summarize(level=0.95).round() +summary = distribution.summarize(level=0.95, precision="auto") print(summary) # BootstrapSummary(mean=0.9969, ci_low=0.9911, ci_high=0.9992, level=0.95) ``` @@ -133,7 +141,7 @@ def weighted_geometric_mean(data, weights): data = np.array([1.2, 1.5, 2.0, 2.8, 3.1]) distribution = bootstrap(data=data, statistic_fn=weighted_geometric_mean, n_boot=1500, seed=1) -summary = distribution.summarize().round() +summary = distribution.summarize(precision="auto") print(summary) # BootstrapSummary(mean=2.01, ci_low=1.58, ci_high=2.48, level=0.87) ``` diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index 092eb34..8adf2d4 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Literal, Optional, Union import numpy as np import pytest @@ -65,11 +65,11 @@ def test_bootstrap_summary_ci_width( pytest.param(1, 0.5, 0.1, 0.9), pytest.param(2, 0.51, 0.11, 0.91), pytest.param(3, 0.511, 0.111, 0.911), - pytest.param(None, 0.51, 0.11, 0.91), + pytest.param("auto", 0.51, 0.11, 0.91), ], ) def test_bootstrap_summary_round( - precision: Optional[int], + precision: Union[int, Literal["auto"]], expected_mean: float, expected_ci_low: float, expected_ci_high: float, @@ -90,6 +90,12 @@ def test_bootstrap_summary_round( np.testing.assert_allclose(bootstrap_summary_rounded.ci_high, expected_ci_high) +def test_bootstrap_summary_round_fail() -> None: + bootstrap_summary = BootstrapSummary(mean=0.5, ci_low=0.1, ci_high=0.9, level=0.87) + with pytest.raises(ValueError): + _ = bootstrap_summary.round(precision="not-auto") # type: ignore[arg-type] + + @pytest.mark.parametrize( "level, expected_ci_low, expected_ci_high", [ @@ -145,7 +151,7 @@ def test_bootstrap_distribution(estimates: NDArray[np.floating]) -> None: pytest.param(0.8), ], ) -def test_bootstrap_distribution_summarize( +def test_bootstrap_distribution_summarize_level( estimates: NDArray[np.floating], level: float, ) -> None: @@ -156,6 +162,26 @@ def test_bootstrap_distribution_summarize( np.testing.assert_allclose(bootstrap_summary.level, level) +@pytest.mark.parametrize( + "precision", + [ + pytest.param(0), + pytest.param(1), + pytest.param("auto"), + ], +) +def test_bootstrap_distribution_summarize_precision( + estimates: NDArray[np.floating], + precision: Optional[Union[int, Literal["auto"]]], +) -> None: + bootstrap_distribution = BootstrapDistribution(estimates) + summary0 = bootstrap_distribution.summarize() + summary1 = bootstrap_distribution.summarize(precision=precision) + if precision is not None: + summary0 = summary0.round(precision) + assert summary0 == summary1 + + @pytest.mark.parametrize( "estimates", [