Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ model = PyBBT(
)
```

#### Evaluating BBT when reporting errors

By default BBT assumes that the goal of the evaluation is to maximize the metric (e.g. when reporting F1 score or AUROC). In cases, when metrics reported in the dataframe should be minimized (e.g. RMSE), you can set the parameter `maximize` in `PyBBT` to False:

```python
model = PyBBT(
local_rope_value=0.01,
maximize=False, # Set to False if the metric should be minimized
).fit(
df,
dataset_col="dataset",
)
```

### Paired posterior fitting

PyBBT model support two variants of input data for paired case, either a single dataframe with multiple rows per algorithm per dataset, or a pair of dataframes, one defining mean performance per algorithm, and the second with standard deviations.
Expand Down
2 changes: 1 addition & 1 deletion bbttest/bbt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""bbt module: Bayesian Bradley-Terry model implementation."""

from .const import HyperPrior, ReportedProperty, TieSolver
from .params import HyperPrior, ReportedProperty, TieSolver
from .py_bbt import PyBBT

__all__ = [
Expand Down
23 changes: 19 additions & 4 deletions bbttest/bbt/alg.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging as log
import warnings
from collections.abc import Generator, Iterable

import arviz as az
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from .const import TieSolver
from .const import UNNAMED_COLUMNS_WARNING_TEMPLATE
from .params import TieSolver

ALG1_COL = 2
ALG2_COL = 3
Expand Down Expand Up @@ -125,14 +127,27 @@ def _construct_win_table(
dataset_col: str | int | None,
local_rope_value: float | None,
tie_solver: TieSolver,
maximize: bool,
) -> tuple[np.ndarray, list[str]]:
# Extract algorithm names
algorithms_names = set(data.columns.tolist())
algorithms_names = data.columns.tolist()
if isinstance(dataset_col, int):
dataset_col = data.columns[dataset_col]
if dataset_col is not None:
algorithms_names.discard(dataset_col)
algorithms_names = list(algorithms_names)
algorithms_names.remove(dataset_col)

data = data.copy()
if not maximize:
data.loc[:, algorithms_names] = -1 * data[algorithms_names]

if any("Unnamed" in col for col in algorithms_names):
warnings.warn(
UNNAMED_COLUMNS_WARNING_TEMPLATE.format(
algorithms_names=algorithms_names,
dataset_col=dataset_col,
),
UserWarning,
)

if dataset_col is None or data.shape[0] == data[dataset_col].nunique():
table = _construct_no_paired(
Expand Down
76 changes: 4 additions & 72 deletions bbttest/bbt/const.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,4 @@
from enum import Enum

from pymc.distributions import Cauchy, LogNormal, Normal


class HyperPrior(str, Enum):
"""
Hyper Prior distributions for BBT MCMC sampling.
"""

LOG_NORMAL = "logNormal"
LOG_NORMAL_SCALED = "logNormalScaled"
CAUCHY = "cauchy"
NORMAL = "normal"

def _get_pymc_dist(self, scale, name="sigma"):
match self:
case HyperPrior.LOG_NORMAL:
return LogNormal(name, mu=0, sigma=1)
case HyperPrior.LOG_NORMAL_SCALED:
return LogNormal(name, mu=0, sigma=scale)
case HyperPrior.CAUCHY:
return Cauchy(name, alpha=0, beta=scale)
case HyperPrior.NORMAL:
return Normal(name, mu=0, sigma=scale)
case _:
raise ValueError(f"Unsupported hyperprior: {self}")


class ReportedProperty(str, Enum):
"""
Enum containing properties that can be reported from BBT results.
"""

LEFT_MODEL = "left_model"
RIGHT_MODEL = "right_model"
MEDIAN = "median"
MEAN = "mean"
HDI_LOW = "hdi_low"
HDI_HIGH = "hdi_high"
DELTA = "delta"
ABOVE_50 = "above_50"
IN_ROPE = "in_rope"
WEAK_INTERPRETATION = "weak_interpretation"
STRONG_INTERPRETATION = "strong_interpretation"


class TieSolver(str, Enum):
"""
Enum containing tie solving strategies.

ADD - Add 1 win to both players.
SPREAD - Add 1/2 win to both players.
FOGET - Ignore the tie.
DAVIDSON - Use Davidson's method to handle ties.
"""

ADD = "add"
SPREAD = "spread"
FORGET = "forget"
DAVIDSON = "davidson"


DEFAULT_PROPERTIES = (
ReportedProperty.MEAN,
ReportedProperty.DELTA,
ReportedProperty.ABOVE_50,
ReportedProperty.IN_ROPE,
ReportedProperty.WEAK_INTERPRETATION,
)

ALL_PROPERTIES = tuple(ReportedProperty)
UNNAMED_COLUMNS_WARNING_TEMPLATE = """Some algorithm names are unnamed. This may lead to issues in the win table construction.
Algorithm names extracted: {algorithms_names}
Dataset column: {dataset_col}
"""
2 changes: 1 addition & 1 deletion bbttest/bbt/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pymc as pm
import pytensor.tensor as pt

from .const import HyperPrior
from .params import HyperPrior


def _build_bbt_model(
Expand Down
72 changes: 72 additions & 0 deletions bbttest/bbt/params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from enum import Enum

from pymc.distributions import Cauchy, LogNormal, Normal


class HyperPrior(str, Enum):
"""
Hyper Prior distributions for BBT MCMC sampling.
"""

LOG_NORMAL = "logNormal"
LOG_NORMAL_SCALED = "logNormalScaled"
CAUCHY = "cauchy"
NORMAL = "normal"

def _get_pymc_dist(self, scale, name="sigma"):
match self:
case HyperPrior.LOG_NORMAL:
return LogNormal(name, mu=0, sigma=1)
case HyperPrior.LOG_NORMAL_SCALED:
return LogNormal(name, mu=0, sigma=scale)
case HyperPrior.CAUCHY:
return Cauchy(name, alpha=0, beta=scale)
case HyperPrior.NORMAL:
return Normal(name, mu=0, sigma=scale)
case _:
raise ValueError(f"Unsupported hyperprior: {self}")


class ReportedProperty(str, Enum):
"""
Enum containing properties that can be reported from BBT results.
"""

LEFT_MODEL = "left_model"
RIGHT_MODEL = "right_model"
MEDIAN = "median"
MEAN = "mean"
HDI_LOW = "hdi_low"
HDI_HIGH = "hdi_high"
DELTA = "delta"
ABOVE_50 = "above_50"
IN_ROPE = "in_rope"
WEAK_INTERPRETATION = "weak_interpretation"
STRONG_INTERPRETATION = "strong_interpretation"


class TieSolver(str, Enum):
"""
Enum containing tie solving strategies.

ADD - Add 1 win to both players.
SPREAD - Add 1/2 win to both players.
FOGET - Ignore the tie.
DAVIDSON - Use Davidson's method to handle ties.
"""

ADD = "add"
SPREAD = "spread"
FORGET = "forget"
DAVIDSON = "davidson"


DEFAULT_PROPERTIES = (
ReportedProperty.MEAN,
ReportedProperty.DELTA,
ReportedProperty.ABOVE_50,
ReportedProperty.IN_ROPE,
ReportedProperty.WEAK_INTERPRETATION,
)

ALL_PROPERTIES = tuple(ReportedProperty)
13 changes: 10 additions & 3 deletions bbttest/bbt/py_bbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pandas as pd

from .alg import _construct_win_table, _get_pwin, _hdi
from .const import DEFAULT_PROPERTIES, HyperPrior, ReportedProperty, TieSolver
from .model import _mcmcbbt_pymc
from .params import DEFAULT_PROPERTIES, HyperPrior, ReportedProperty, TieSolver


class PyBBT:
Expand Down Expand Up @@ -42,13 +42,17 @@ class PyBBT:
scale: float, default 1.0
The scale parameter for the hyper prior distribution. Ignored if the HyperPrior is LOG_NORMAL.

maximize: bool, default True
Whether higher scores indicate better performance (e.g. accuracy/f1). If using a metric where the goal is to
minimize the score (e.g. RMSE) set this to False.

Attributes
----------
fitted: bool
Whether the model has been fitted.

Examlples
---------
Examples
--------
>>> import pandas as pd
>>> from bbttest import PyBBT, TieSolver
>>> data = pd.DataFrame({
Expand Down Expand Up @@ -78,6 +82,7 @@ def __init__(
local_rope_value: float | None = None,
tie_solver: TieSolver | str = TieSolver.SPREAD,
hyper_prior: HyperPrior | str = HyperPrior.LOG_NORMAL,
maximize: bool = True,
scale: float = 1.0,
):
self._local_rope_value = local_rope_value
Expand All @@ -88,6 +93,7 @@ def __init__(
self._hyper_prior = (
HyperPrior(hyper_prior) if isinstance(hyper_prior, str) else hyper_prior
)
self._maximize = maximize
self._scale = scale
self._fitted = False

Expand Down Expand Up @@ -127,6 +133,7 @@ def fit(
dataset_col=dataset_col,
local_rope_value=self._local_rope_value,
tie_solver=self._tie_solver,
maximize=self._maximize,
)

self._fit_posterior = _mcmcbbt_pymc(
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ test = [
"mypy",
]

[tool.pytest]
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
]

[tool.mypy]
python_version = "3.11"
check_untyped_defs = true # check all functions, this fixes some tests
Expand Down
Loading