rethinkpriorities · peterhurford · Apr 16, 2025 · Nov 10, 2024 · Jan 25, 2025 · Jan 25, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,9 @@
 ## v0.29
 
+* Fixes a bug where `max_gain` and `modeled_gain` were incorrect in kelly output.
+* Added `invlognorm` as a new distribution.
+* Added `bucket_percentages` to more easily get the percentage of values within a bucket.
+* Added `third_kelly` as an alias for `kelly` with deference = 0.66. (TODO: Fix tests)
 * Allows Bernoulli distributions to be defined with p=0 or p=1
 * Added a `Makefile` to help simplify testing and linting workflows
 

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,20 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Build/Test/Lint Commands
+- Install: `poetry install --with dev`
+- Run all tests: `make test` or `pytest && pip3 install . && python3 tests/integration.py`
+- Run single test: `pytest tests/test_file.py::test_function_name -v`
+- Format code: `make format` or `black . && ruff check . --fix`
+- Lint code: `make lint` or `ruff check .`
+
+## Style Guidelines
+- Line length: 99 characters (configured for both Black and Ruff)
+- Imports: stdlib first, third-party next, local imports last
+- Naming: CamelCase for classes, snake_case for functions/vars, UPPER_CASE for constants
+- Documentation: NumPy-style docstrings with examples, parameters, returns
+- Type hints: Use throughout codebase
+- Error handling: Validate inputs, use ValueError with descriptive messages
+- Use operator overloading (`__add__`, `__mul__`, etc.) and custom operators (`@` for sampling)
+- Tests: Descriptive names, unit tests match module structure, use hypothesis for property testing
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "squigglepy"
-version = "0.29-dev2"
+version = "0.29-dev4"
 description = "Squiggle programming language for intuitive probabilistic estimation features in Python"
 authors = ["Peter Wildeford <peter@rethinkpriorities.org>"]
 license = "MIT"
@@ -16,6 +16,7 @@ repository = "https://github.com/rethinkpriorities/squigglepy"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.12"
+setuptools = "^69.0.0"
 numpy = "^1.24.3"
 scipy = "^1.10.1"
 tqdm = "^4.65.0"

diff --git a/squigglepy/distributions.py b/squigglepy/distributions.py
@@ -846,6 +846,100 @@ def __str__(self):
         return out
 
 
+class InverseLognormalDistribution(ContinuousDistribution):
+    def __init__(
+        self,
+        x=None,
+        y=None,
+        norm_mean=None,
+        norm_sd=None,
+        lognorm_mean=None,
+        lognorm_sd=None,
+        credibility=90,
+        lclip=None,
+        rclip=None,
+    ):
+        super().__init__()
+        self.x = x
+        self.y = y
+        self.credibility = credibility
+        self.norm_mean = norm_mean
+        self.norm_sd = norm_sd
+        self.lognorm_mean = lognorm_mean
+        self.lognorm_sd = lognorm_sd
+        self.lclip = lclip
+        self.rclip = rclip
+
+        if self.x is not None and self.y is not None and self.x > self.y:
+            raise ValueError("`high value` cannot be lower than `low value`")
+        if self.x is not None and self.x <= 0:
+            raise ValueError("inverse lognormal distribution must have values > 0")
+
+        if (self.x is None or self.y is None) and self.norm_sd is None and self.lognorm_sd is None:
+            raise ValueError(
+                ("must define only one of x/y, norm_mean/norm_sd, " "or lognorm_mean/lognorm_sd")
+            )
+        elif (self.x is not None or self.y is not None) and (
+            self.norm_sd is not None or self.lognorm_sd is not None
+        ):
+            raise ValueError(
+                ("must define only one of x/y, norm_mean/norm_sd, " "or lognorm_mean/lognorm_sd")
+            )
+        elif (self.norm_sd is not None or self.norm_mean is not None) and (
+            self.lognorm_sd is not None or self.lognorm_mean is not None
+        ):
+            raise ValueError(
+                ("must define only one of x/y, norm_mean/norm_sd, " "or lognorm_mean/lognorm_sd")
+            )
+        elif self.norm_sd is not None and self.norm_mean is None:
+            self.norm_mean = 0
+        elif self.lognorm_sd is not None and self.lognorm_mean is None:
+            self.lognorm_mean = 1
+
+        if self.x is not None:
+            inv_x, inv_y = 1 / self.y, 1 / self.x  # Note: x and y are swapped when inverted
+            self.norm_mean = (np.log(inv_x) + np.log(inv_y)) / 2
+            cdf_value = 0.5 + 0.5 * (self.credibility / 100)
+            normed_sigma = scipy.stats.norm.ppf(cdf_value)
+            self.norm_sd = (np.log(inv_y) - self.norm_mean) / normed_sigma
+
+        if self.lognorm_sd is None:
+            lognorm_mean = np.exp(self.norm_mean + self.norm_sd**2 / 2)
+            lognorm_var = (np.exp(self.norm_sd**2) - 1) * np.exp(
+                2 * self.norm_mean + self.norm_sd**2
+            )
+
+            self.lognorm_mean = (
+                1
+                / np.sqrt(lognorm_var + lognorm_mean**2)
+                * np.exp(-self.norm_mean + self.norm_sd**2 / 2)
+            )
+            self.lognorm_sd = self.lognorm_mean * np.sqrt(np.exp(self.norm_sd**2) - 1)
+
+        elif self.norm_sd is None:
+            # Start with the relationship between lognormal and its inverse
+            cv_squared = (
+                self.lognorm_sd**2 / self.lognorm_mean**2
+            )  # coefficient of variation squared
+            self.norm_sd = np.sqrt(np.log(1 + cv_squared))
+            self.norm_mean = -np.log(self.lognorm_mean) - self.norm_sd**2 / 2
+
+    def __str__(self):
+        out = "<Distribution> invlognorm(lognorm_mean={}, lognorm_sd={}, norm_mean={}, norm_sd={}"
+        out = out.format(
+            round(self.lognorm_mean, 2),
+            round(self.lognorm_sd, 2),
+            round(self.norm_mean, 2),
+            round(self.norm_sd, 2),
+        )
+        if self.lclip is not None:
+            out += ", lclip={}".format(self.lclip)
+        if self.rclip is not None:
+            out += ", rclip={}".format(self.rclip)
+        out += ")"
+        return out
+
+
 def lognorm(
     x=None,
     y=None,
@@ -913,6 +1007,74 @@ def lognorm(
     )
 
 
+def invlognorm(
+    x=None,
+    y=None,
+    credibility=90,
+    norm_mean=None,
+    norm_sd=None,
+    lognorm_mean=None,
+    lognorm_sd=None,
+    lclip=None,
+    rclip=None,
+):
+    """
+    Initialize an inverse lognormal distribution.
+
+    This is a left-skewed distribution created by taking the reciprocal of a lognormal distribution.
+
+    Can be defined either via a credible interval from ``x`` to ``y`` (use ``credibility`` or
+    it will default to being a 90% CI) or defined via underlying normal distribution parameters
+    or via the inverse lognormal mean and standard deviation.
+
+    Parameters
+    ----------
+    x : float
+        The low value of a credible interval defined by ``credibility``. Defaults to a 90% CI.
+        Must be a value greater than 0.
+    y : float
+        The high value of a credible interval defined by ``credibility``. Defaults to a 90% CI.
+        Must be a value greater than 0.
+    credibility : float
+        The range of the credibility interval. Defaults to 90. Ignored if the distribution is
+        defined instead by ``mean`` and ``sd``.
+    norm_mean : float or None
+        The mean of the underlying normal distribution. If not defined, defaults to 0.
+    norm_sd : float
+        The standard deviation of the underlying normal distribution.
+    lognorm_mean : float or None
+        The mean of the inverse lognormal distribution. If not defined, defaults to 1.
+    lognorm_sd : float
+        The standard deviation of the inverse lognormal distribution.
+    lclip : float or None
+        If not None, any value below ``lclip`` will be coerced to ``lclip``.
+    rclip : float or None
+        If not None, any value below ``rclip`` will be coerced to ``rclip``.
+
+    Returns
+    -------
+    InverseLognormalDistribution
+
+    Examples
+    --------
+    >>> invlognorm(1, 10)
+    <Distribution> invlognorm(lognorm_mean=0.3, lognorm_sd=0.13, norm_mean=-1.15, norm_sd=0.7)
+    >>> invlognorm(norm_mean=-1, norm_sd=2)
+    <Distribution> invlognorm(lognorm_mean=7.39, lognorm_sd=54.1, norm_mean=-1, norm_sd=2)
+    """
+    return InverseLognormalDistribution(
+        x=x,
+        y=y,
+        credibility=credibility,
+        norm_mean=norm_mean,
+        norm_sd=norm_sd,
+        lognorm_mean=lognorm_mean,
+        lognorm_sd=lognorm_sd,
+        lclip=lclip,
+        rclip=rclip,
+    )
+
+
 def to(
     x, y, credibility=90, lclip=None, rclip=None
 ) -> Union[LognormalDistribution, NormalDistribution]:

diff --git a/squigglepy/samplers.py b/squigglepy/samplers.py
@@ -37,6 +37,7 @@
     GeometricDistribution,
     LogTDistribution,
     LognormalDistribution,
+    InverseLognormalDistribution,
     MixtureDistribution,
     NormalDistribution,
     ParetoDistribution,
@@ -113,6 +114,38 @@ def lognormal_sample(mean, sd, samples=1):
     return _simplify(_get_rng().lognormal(mean, sd, samples))
 
 
+def inverse_lognormal_sample(mean, sd, samples=1):
+    """
+    Sample a random number according to an inverse lognormal distribution.
+
+    This is equivalent to taking the reciprocal of samples from a lognormal distribution,
+    producing a left-skewed distribution (rather than the right-skewed lognormal).
+
+    Parameters
+    ----------
+    mean : float
+        The mean of the underlying normal distribution in log space.
+    sd : float
+        The standard deviation of the underlying normal distribution in log space.
+    samples : int
+        The number of samples to return.
+
+    Returns
+    -------
+    float
+        A random number sampled from an inverse lognormal distribution.
+
+    Examples
+    --------
+    >>> set_seed(42)
+    >>> inverse_lognormal_sample(0, 1)
+    0.7373871753169729
+    """
+    # Generate lognormal samples and take their reciprocals
+    lognorm_samples = _get_rng().lognormal(mean, sd, samples)
+    return _simplify(1.0 / lognorm_samples)
+
+
 def t_sample(low=None, high=None, t=20, samples=1, credibility=90):
     """
     Sample a random number according to a t-distribution.
@@ -1048,6 +1081,9 @@ def run_dist(dist, pbar=None, tick=1):
         elif isinstance(dist, LognormalDistribution):
             samples = lognormal_sample(mean=dist.norm_mean, sd=dist.norm_sd, samples=n)
 
+        elif isinstance(dist, InverseLognormalDistribution):
+            samples = inverse_lognormal_sample(mean=dist.norm_mean, sd=dist.norm_sd, samples=n)
+
         elif isinstance(dist, BinomialDistribution):
             samples = binomial_sample(n=dist.n, p=dist.p, samples=n)