Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

All notable changes to this project are documented here.

<!-- ## [Unreleased] -->
## [Unreleased]

### Added
- `BootstrapDistribution.summarize` has new parameter `precision: Optional[Union[int, Literal["auto"]]] = None` to control rounding behaviour.

### Changed
- `BootstrapSummary.round` parameter `precision` now defaults to `Literal["auto"]`.

## [0.1.1] - 2025-11-03

Expand Down
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ This package implements the core logic of Bayesian bootstrapping in Python, alon

## Why use this package?

Learn and experiment with Bayesian bootstrap inference in Python
Quickly compute posterior-like uncertainty intervals for arbitrary statistics
Extend easily with your own weighted statistic functions
- Learn and experiment with Bayesian bootstrap inference in Python
- Quickly compute posterior-like uncertainty intervals for arbitrary statistics
- Extend easily with your own weighted statistic functions

## Installation

Expand Down Expand Up @@ -90,7 +90,8 @@ Performs Bayesian bootstrapping on `data` using the given statistic.
A `BootstrapDistribution` object with:

- `.estimates`: array of bootstrapped statistic values
- `.summarize(level)`: returns a `BootstrapSummary` with `mean`, `ci_low`, `ci_high`, and `level`
- `.summarize(level, precision)`: returns a `BootstrapSummary` with `mean`, `ci_low`, `ci_high`,
and `level`; rounded if `precision` is integer-valued or `"auto"`

### Weighted statistic functions included

Expand Down
40 changes: 31 additions & 9 deletions bbstat/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"""

from dataclasses import dataclass
from typing import Optional
from typing import Literal, Optional, Union

import numpy as np

Expand Down Expand Up @@ -106,22 +106,32 @@ def ci_width(self) -> float:
"""Returns the width of the credible interval."""
return self.ci_high - self.ci_low

def round(self, precision: Optional[int] = None) -> "BootstrapSummary":
def round(
self,
precision: Union[int, Literal["auto"]] = "auto",
) -> "BootstrapSummary":
"""
Returns a new version of the summary with rounded values.

When `precision` is given, the mean and credible interval bounds are rounded
to this number of digits. If `precision=None` (default), the precision is
computed form the width of the credible interval.
to this number of digits. If `precision="auto"` (default), the precision is
computed from the width of the credible interval.

Args:
precision (int, optional): The desired precision for rounding.
precision (int or "auto"): The desired precision for rounding. Default is "auto".

Returns:
BootstrapSummary: The summary of a Bayesian bootstrap procedure's result.

Raises:
ValueError: If `precision` is not integer or "auto".
"""
if precision is None:
if precision == "auto":
precision = get_precision_for_rounding(self.ci_width)
elif not isinstance(precision, int):
raise ValueError(
f"Invalid parameter {precision=:}: must be integer or 'auto'."
)
return self.__class__(
mean=round(self.mean, precision),
ci_low=round(self.ci_low, precision),
Expand Down Expand Up @@ -226,20 +236,32 @@ def __str__(self) -> str:
size = len(self)
return f"BootstrapDistribution({mean=:}, {size=:})"

def summarize(self, level: float = 0.87) -> BootstrapSummary:
def summarize(
self,
level: float = 0.87,
precision: Optional[Union[int, Literal["auto"]]] = None,
) -> BootstrapSummary:
"""
Returns a `BootstrapSummary` object.

This method is a wrapper for `BootstrapSummary.from_estimates`.
This method is a wrapper for `BootstrapSummary.from_estimates`. If `precision=None`
(default), the summary is returned without rounding. If `precision="auto"`
(or integer-valued), the summary is rounded.

Args:
level (float): The desired level for the credible interval
(must be between 0 and 1).
precision (int or "auto" or None): The desired precision for rounding.
Default is None.

Returns:
BootstrapSummary: the summary object.

Raises:
ValueError: If the `level` is not between 0 and 1.
ValueError: If `precision` is not integer or "auto" or None.
"""
return BootstrapSummary.from_estimates(self.estimates, level=level)
summary = BootstrapSummary.from_estimates(self.estimates, level=level)
if precision is None:
return summary
return summary.round(precision)
5 changes: 1 addition & 4 deletions bbstat/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,7 @@ def plot(
summary = bootstrap_distribution.summarize(level)

if precision is not None:
if precision == "auto":
summary = summary.round()
else:
summary = summary.round(precision)
summary = summary.round(precision)

param_str = f"{summary.mean} ({summary.ci_low}, {summary.ci_high})"

Expand Down
12 changes: 10 additions & 2 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ print(summary.round())
# BootstrapSummary(mean=2.6, ci_low=2.1, ci_high=3.2, level=0.95)
```

Additionally, you can specify the precision in `BootstrapDistribution.summarize`:

```python
summary = distribution.summarize(level=0.95, precision="auto")
print(summary)
# BootstrapSummary(mean=2.6, ci_low=2.1, ci_high=3.2, level=0.95)
```

Here the mean estimate is about 2.6 cups per day, with a 95% credible interval of roughly [2.1, 3.2]. The uncertainty reflects variation in the weights each sample could have in the population, not in resampled data points.

## Bootstrapping a quantile
Expand Down Expand Up @@ -79,7 +87,7 @@ data = (study_hours, exam_scores)

# Weighted Pearson correlation via Bayesian bootstrapping
distribution = bootstrap(data=data, statistic_fn="pearson_dependency", n_boot=2000, seed=1)
summary = distribution.summarize(level=0.95).round()
summary = distribution.summarize(level=0.95, precision="auto")
print(summary)
# BootstrapSummary(mean=0.9969, ci_low=0.9911, ci_high=0.9992, level=0.95)
```
Expand Down Expand Up @@ -133,7 +141,7 @@ def weighted_geometric_mean(data, weights):

data = np.array([1.2, 1.5, 2.0, 2.8, 3.1])
distribution = bootstrap(data=data, statistic_fn=weighted_geometric_mean, n_boot=1500, seed=1)
summary = distribution.summarize().round()
summary = distribution.summarize(precision="auto")
print(summary)
# BootstrapSummary(mean=2.01, ci_low=1.58, ci_high=2.48, level=0.87)
```
Expand Down
34 changes: 30 additions & 4 deletions tests/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import Literal, Optional, Union

import numpy as np
import pytest
Expand Down Expand Up @@ -65,11 +65,11 @@ def test_bootstrap_summary_ci_width(
pytest.param(1, 0.5, 0.1, 0.9),
pytest.param(2, 0.51, 0.11, 0.91),
pytest.param(3, 0.511, 0.111, 0.911),
pytest.param(None, 0.51, 0.11, 0.91),
pytest.param("auto", 0.51, 0.11, 0.91),
],
)
def test_bootstrap_summary_round(
precision: Optional[int],
precision: Union[int, Literal["auto"]],
expected_mean: float,
expected_ci_low: float,
expected_ci_high: float,
Expand All @@ -90,6 +90,12 @@ def test_bootstrap_summary_round(
np.testing.assert_allclose(bootstrap_summary_rounded.ci_high, expected_ci_high)


def test_bootstrap_summary_round_fail() -> None:
bootstrap_summary = BootstrapSummary(mean=0.5, ci_low=0.1, ci_high=0.9, level=0.87)
with pytest.raises(ValueError):
_ = bootstrap_summary.round(precision="not-auto") # type: ignore[arg-type]


@pytest.mark.parametrize(
"level, expected_ci_low, expected_ci_high",
[
Expand Down Expand Up @@ -145,7 +151,7 @@ def test_bootstrap_distribution(estimates: NDArray[np.floating]) -> None:
pytest.param(0.8),
],
)
def test_bootstrap_distribution_summarize(
def test_bootstrap_distribution_summarize_level(
estimates: NDArray[np.floating],
level: float,
) -> None:
Expand All @@ -156,6 +162,26 @@ def test_bootstrap_distribution_summarize(
np.testing.assert_allclose(bootstrap_summary.level, level)


@pytest.mark.parametrize(
"precision",
[
pytest.param(0),
pytest.param(1),
pytest.param("auto"),
],
)
def test_bootstrap_distribution_summarize_precision(
estimates: NDArray[np.floating],
precision: Optional[Union[int, Literal["auto"]]],
) -> None:
bootstrap_distribution = BootstrapDistribution(estimates)
summary0 = bootstrap_distribution.summarize()
summary1 = bootstrap_distribution.summarize(precision=precision)
if precision is not None:
summary0 = summary0.round(precision)
assert summary0 == summary1


@pytest.mark.parametrize(
"estimates",
[
Expand Down