Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## v0.29

* Fixes a bug where `max_gain` and `modeled_gain` were incorrect in kelly output.
* Added `invlognorm` as a new distribution.
* Added `bucket_percentages` to more easily get the percentage of values within a bucket.
* Added `third_kelly` as an alias for `kelly` with deference = 0.66. (TODO: Fix tests)
* Allows Bernoulli distributions to be defined with p=0 or p=1
* Added a `Makefile` to help simplify testing and linting workflows

Expand Down
20 changes: 20 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Build/Test/Lint Commands
- Install: `poetry install --with dev`
- Run all tests: `make test` or `pytest && pip3 install . && python3 tests/integration.py`
- Run single test: `pytest tests/test_file.py::test_function_name -v`
- Format code: `make format` or `black . && ruff check . --fix`
- Lint code: `make lint` or `ruff check .`

## Style Guidelines
- Line length: 99 characters (configured for both Black and Ruff)
- Imports: stdlib first, third-party next, local imports last
- Naming: CamelCase for classes, snake_case for functions/vars, UPPER_CASE for constants
- Documentation: NumPy-style docstrings with examples, parameters, returns
- Type hints: Use throughout codebase
- Error handling: Validate inputs, use ValueError with descriptive messages
- Use operator overloading (`__add__`, `__mul__`, etc.) and custom operators (`@` for sampling)
- Tests: Descriptive names, unit tests match module structure, use hypothesis for property testing
160 changes: 134 additions & 26 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "squigglepy"
version = "0.29-dev2"
version = "0.29-dev4"
description = "Squiggle programming language for intuitive probabilistic estimation features in Python"
authors = ["Peter Wildeford <peter@rethinkpriorities.org>"]
license = "MIT"
Expand All @@ -16,6 +16,7 @@ repository = "https://github.com/rethinkpriorities/squigglepy"

[tool.poetry.dependencies]
python = ">=3.9,<3.12"
setuptools = "^69.0.0"
numpy = "^1.24.3"
scipy = "^1.10.1"
tqdm = "^4.65.0"
Expand Down
162 changes: 162 additions & 0 deletions squigglepy/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,100 @@ def __str__(self):
return out


class InverseLognormalDistribution(ContinuousDistribution):
def __init__(
self,
x=None,
y=None,
norm_mean=None,
norm_sd=None,
lognorm_mean=None,
lognorm_sd=None,
credibility=90,
lclip=None,
rclip=None,
):
super().__init__()
self.x = x
self.y = y
self.credibility = credibility
self.norm_mean = norm_mean
self.norm_sd = norm_sd
self.lognorm_mean = lognorm_mean
self.lognorm_sd = lognorm_sd
self.lclip = lclip
self.rclip = rclip

if self.x is not None and self.y is not None and self.x > self.y:
raise ValueError("`high value` cannot be lower than `low value`")
if self.x is not None and self.x <= 0:
raise ValueError("inverse lognormal distribution must have values > 0")

if (self.x is None or self.y is None) and self.norm_sd is None and self.lognorm_sd is None:
raise ValueError(
("must define only one of x/y, norm_mean/norm_sd, " "or lognorm_mean/lognorm_sd")
)
elif (self.x is not None or self.y is not None) and (
self.norm_sd is not None or self.lognorm_sd is not None
):
raise ValueError(
("must define only one of x/y, norm_mean/norm_sd, " "or lognorm_mean/lognorm_sd")
)
elif (self.norm_sd is not None or self.norm_mean is not None) and (
self.lognorm_sd is not None or self.lognorm_mean is not None
):
raise ValueError(
("must define only one of x/y, norm_mean/norm_sd, " "or lognorm_mean/lognorm_sd")
)
elif self.norm_sd is not None and self.norm_mean is None:
self.norm_mean = 0
elif self.lognorm_sd is not None and self.lognorm_mean is None:
self.lognorm_mean = 1

if self.x is not None:
inv_x, inv_y = 1 / self.y, 1 / self.x # Note: x and y are swapped when inverted
self.norm_mean = (np.log(inv_x) + np.log(inv_y)) / 2
cdf_value = 0.5 + 0.5 * (self.credibility / 100)
normed_sigma = scipy.stats.norm.ppf(cdf_value)
self.norm_sd = (np.log(inv_y) - self.norm_mean) / normed_sigma

if self.lognorm_sd is None:
lognorm_mean = np.exp(self.norm_mean + self.norm_sd**2 / 2)
lognorm_var = (np.exp(self.norm_sd**2) - 1) * np.exp(
2 * self.norm_mean + self.norm_sd**2
)

self.lognorm_mean = (
1
/ np.sqrt(lognorm_var + lognorm_mean**2)
* np.exp(-self.norm_mean + self.norm_sd**2 / 2)
)
self.lognorm_sd = self.lognorm_mean * np.sqrt(np.exp(self.norm_sd**2) - 1)

elif self.norm_sd is None:
# Start with the relationship between lognormal and its inverse
cv_squared = (
self.lognorm_sd**2 / self.lognorm_mean**2
) # coefficient of variation squared
self.norm_sd = np.sqrt(np.log(1 + cv_squared))
self.norm_mean = -np.log(self.lognorm_mean) - self.norm_sd**2 / 2

def __str__(self):
out = "<Distribution> invlognorm(lognorm_mean={}, lognorm_sd={}, norm_mean={}, norm_sd={}"
out = out.format(
round(self.lognorm_mean, 2),
round(self.lognorm_sd, 2),
round(self.norm_mean, 2),
round(self.norm_sd, 2),
)
if self.lclip is not None:
out += ", lclip={}".format(self.lclip)
if self.rclip is not None:
out += ", rclip={}".format(self.rclip)
out += ")"
return out


def lognorm(
x=None,
y=None,
Expand Down Expand Up @@ -913,6 +1007,74 @@ def lognorm(
)


def invlognorm(
x=None,
y=None,
credibility=90,
norm_mean=None,
norm_sd=None,
lognorm_mean=None,
lognorm_sd=None,
lclip=None,
rclip=None,
):
"""
Initialize an inverse lognormal distribution.

This is a left-skewed distribution created by taking the reciprocal of a lognormal distribution.

Can be defined either via a credible interval from ``x`` to ``y`` (use ``credibility`` or
it will default to being a 90% CI) or defined via underlying normal distribution parameters
or via the inverse lognormal mean and standard deviation.

Parameters
----------
x : float
The low value of a credible interval defined by ``credibility``. Defaults to a 90% CI.
Must be a value greater than 0.
y : float
The high value of a credible interval defined by ``credibility``. Defaults to a 90% CI.
Must be a value greater than 0.
credibility : float
The range of the credibility interval. Defaults to 90. Ignored if the distribution is
defined instead by ``mean`` and ``sd``.
norm_mean : float or None
The mean of the underlying normal distribution. If not defined, defaults to 0.
norm_sd : float
The standard deviation of the underlying normal distribution.
lognorm_mean : float or None
The mean of the inverse lognormal distribution. If not defined, defaults to 1.
lognorm_sd : float
The standard deviation of the inverse lognormal distribution.
lclip : float or None
If not None, any value below ``lclip`` will be coerced to ``lclip``.
rclip : float or None
If not None, any value below ``rclip`` will be coerced to ``rclip``.

Returns
-------
InverseLognormalDistribution

Examples
--------
>>> invlognorm(1, 10)
<Distribution> invlognorm(lognorm_mean=0.3, lognorm_sd=0.13, norm_mean=-1.15, norm_sd=0.7)
>>> invlognorm(norm_mean=-1, norm_sd=2)
<Distribution> invlognorm(lognorm_mean=7.39, lognorm_sd=54.1, norm_mean=-1, norm_sd=2)
"""
return InverseLognormalDistribution(
x=x,
y=y,
credibility=credibility,
norm_mean=norm_mean,
norm_sd=norm_sd,
lognorm_mean=lognorm_mean,
lognorm_sd=lognorm_sd,
lclip=lclip,
rclip=rclip,
)


def to(
x, y, credibility=90, lclip=None, rclip=None
) -> Union[LognormalDistribution, NormalDistribution]:
Expand Down
36 changes: 36 additions & 0 deletions squigglepy/samplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
GeometricDistribution,
LogTDistribution,
LognormalDistribution,
InverseLognormalDistribution,
MixtureDistribution,
NormalDistribution,
ParetoDistribution,
Expand Down Expand Up @@ -113,6 +114,38 @@ def lognormal_sample(mean, sd, samples=1):
return _simplify(_get_rng().lognormal(mean, sd, samples))


def inverse_lognormal_sample(mean, sd, samples=1):
"""
Sample a random number according to an inverse lognormal distribution.

This is equivalent to taking the reciprocal of samples from a lognormal distribution,
producing a left-skewed distribution (rather than the right-skewed lognormal).

Parameters
----------
mean : float
The mean of the underlying normal distribution in log space.
sd : float
The standard deviation of the underlying normal distribution in log space.
samples : int
The number of samples to return.

Returns
-------
float
A random number sampled from an inverse lognormal distribution.

Examples
--------
>>> set_seed(42)
>>> inverse_lognormal_sample(0, 1)
0.7373871753169729
"""
# Generate lognormal samples and take their reciprocals
lognorm_samples = _get_rng().lognormal(mean, sd, samples)
return _simplify(1.0 / lognorm_samples)


def t_sample(low=None, high=None, t=20, samples=1, credibility=90):
"""
Sample a random number according to a t-distribution.
Expand Down Expand Up @@ -1048,6 +1081,9 @@ def run_dist(dist, pbar=None, tick=1):
elif isinstance(dist, LognormalDistribution):
samples = lognormal_sample(mean=dist.norm_mean, sd=dist.norm_sd, samples=n)

elif isinstance(dist, InverseLognormalDistribution):
samples = inverse_lognormal_sample(mean=dist.norm_mean, sd=dist.norm_sd, samples=n)

elif isinstance(dist, BinomialDistribution):
samples = binomial_sample(n=dist.n, p=dist.p, samples=n)

Expand Down
Loading
Loading