From b7c54002f316b31b67c56da0e9ff64b9be0f1a7a Mon Sep 17 00:00:00 2001
From: Jan Freyberg <jan.freyberg@gmail.com>
Date: Fri, 15 May 2020 15:49:13 +0100
Subject: [PATCH 1/7] Return performance when retraining

---
 src/superintendent/base.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/superintendent/base.py b/src/superintendent/base.py
index eb351a3..8134c8e 100644
--- a/src/superintendent/base.py
+++ b/src/superintendent/base.py
@@ -142,9 +142,12 @@ def __init__(
 
         self.acquisition_function = acquisition_function
         if isinstance(acquisition_function, str):
-            self.acquisition_function = acquisition_functions.functions[
-                acquisition_function
-            ]
+            self.acquisition_function = getattr(
+                acquisition_functions, acquisition_function
+            )
+            # acquisition_functions.functions[
+            #     acquisition_function
+            # ]
 
         self.shuffle_prop = shuffle_prop
         self.model_preprocess = model_preprocess
@@ -334,11 +337,11 @@ def retrain(self, button=None):
 
         _, labelled_X, labelled_y = self.queue.list_completed()
 
-        if len(labelled_y) < 10:
+        if len(labelled_y) < 2:
             self.model_performance.value = (
                 "Score: Not enough labels to retrain."
             )
-            return
+            return np.nan
 
         if self.model_preprocess is not None:
             labelled_X, labelled_y = self.model_preprocess(
@@ -391,3 +394,4 @@ def retrain(self, button=None):
         # undo the previously popped item and pop the next one
         self.queue.undo()
         self._skip()
+        return self.performance

From b39179a937b5a0fb7de893a472d6447268b9af78 Mon Sep 17 00:00:00 2001
From: Jan Freyberg <jan.freyberg@gmail.com>
Date: Fri, 15 May 2020 15:50:41 +0100
Subject: [PATCH 2/7] Decorators for testing point estimate/distribution

---
 .../acquisition_functions/decorators.py       | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/src/superintendent/acquisition_functions/decorators.py b/src/superintendent/acquisition_functions/decorators.py
index 877a2df..db83bad 100644
--- a/src/superintendent/acquisition_functions/decorators.py
+++ b/src/superintendent/acquisition_functions/decorators.py
@@ -25,6 +25,7 @@ def _get_indices(scores: np.ndarray, shuffle_prop: float) -> np.ndarray:
 def _is_multioutput(
     probabilities: typing.Union[np.ndarray, typing.List[np.ndarray]]
 ):
+    """Test whether predictions are for single- or multi-output"""
     if isinstance(probabilities, list) and (
         isinstance(probabilities[0], np.ndarray)
         and probabilities[0].ndim == 2
@@ -37,6 +38,69 @@ def _is_multioutput(
         raise ValueError("Unknown probability format.")
 
 
+def _is_distribution(probabilities: np.ndarray):
+    """
+    Test whether predictions are single value per outcome, or a distribution.
+    """
+    if _is_multioutput(probabilities):
+        return _is_distribution(probabilities[0])
+    else:
+        return probabilities.ndim > 2
+
+
+multioutput_reduce_fns = {"mean": np.mean, "max": np.max}
+
+
+def require_point_estimate(fn: typing.Callable) -> typing.Callable:
+    """
+    Mark a function as requiring point estimate predictions.
+
+    If distributions of predictions get passed, the distribution will be
+    averaged first.
+
+    Parameters
+    ----------
+    fn
+        The function to decorate.
+    """
+
+    @functools.wraps(fn)
+    def wrapped_fn(probabilities: np.ndarray, *args, **kwargs):
+        if _is_distribution(probabilities):
+            if _is_multioutput(probabilities):
+                probabilities = [p.mean(axis=-1) for p in probabilities]
+            else:
+                probabilities = probabilities.mean(axis=-1)
+        return fn(probabilities, *args, **kwargs)
+
+    return wrapped_fn
+
+
+def require_distribution(fn: typing.Callable) -> typing.Callable:
+    """
+    Mark a function as requiring distribution output.
+
+    If non-distribution output gets passed, this function will now raise an
+    error.
+
+    Parameters
+    ----------
+    fn
+        The function to decorate.
+    """
+
+    @functools.wraps(fn)
+    def wrapped_fn(probabilities, *args, **kwargs):
+        if not _is_distribution(probabilities):
+            raise ValueError(
+                f"Acquisition function {fn.__name__} "
+                "requires distribution output."
+            )
+        return fn(probabilities, *args, **kwargs)
+
+    return wrapped_fn
+
+
 def make_acquisition_function(handle_multioutput="mean"):
     """Wrap an acquisition function.
 

From c132cd5022ccd295b789c8c8508c5a8088c648a2 Mon Sep 17 00:00:00 2001
From: Jan Freyberg <jan.freyberg@gmail.com>
Date: Fri, 15 May 2020 15:52:24 +0100
Subject: [PATCH 3/7] Add shuffle_prop to function signature

This uses a hacky library called
merge-args
---
 pyproject.toml                                |  4 ++-
 .../acquisition_functions/decorators.py       | 28 +++++++++++++++----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cc242cd..ec1ab6e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ description-file = "README.md"
 classifiers = [
     "Programming Language :: Python :: 3.6",
     "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
     "License :: OSI Approved :: MIT License",
     "Intended Audience :: Science/Research",
     "Framework :: Jupyter",
@@ -34,7 +35,8 @@ requires = [
     "psycopg2-binary>=2.8",
     "flask>=1.0",
     "ipyevents>=0.6.0",
-    "typing-extensions"
+    "typing-extensions",
+    "merge-args",
 ]
 
 [tool.flit.metadata.requires-extra]
diff --git a/src/superintendent/acquisition_functions/decorators.py b/src/superintendent/acquisition_functions/decorators.py
index db83bad..2c87c36 100644
--- a/src/superintendent/acquisition_functions/decorators.py
+++ b/src/superintendent/acquisition_functions/decorators.py
@@ -2,6 +2,11 @@
 import typing
 
 import numpy as np
+from merge_args import merge_args
+
+
+def _dummy_fn(shuffle_prop: float = 0.1):
+    ...
 
 
 def _shuffle_subset(data: np.ndarray, shuffle_prop: float) -> np.ndarray:
@@ -114,23 +119,34 @@ def make_acquisition_function(handle_multioutput="mean"):
         comes as a list of binary classifier outputs.
     """
 
-    def decorator(fn):
-        if handle_multioutput == "mean":  # define fn where scores are avgd
+    def decorator(
+        fn: typing.Callable[[np.ndarray], np.ndarray]
+    ) -> typing.Callable[[np.ndarray, float], np.ndarray]:
+        if handle_multioutput is not None:
 
+            reduce_fn = multioutput_reduce_fns[handle_multioutput]
+
+            @merge_args(_dummy_fn)
             @functools.wraps(fn)
-            def wrapped_fn(probabilities, shuffle_prop=0.1):
+            def wrapped_fn(
+                probabilities: np.ndarray, shuffle_prop: float = 0.1
+            ):
                 if _is_multioutput(probabilities):
                     scores = np.stack(
-                        tuple(fn(prob) for prob in probabilities), axis=0
-                    ).mean(axis=0)
+                        tuple(fn(prob) for prob in probabilities), axis=0,
+                    )
+                    scores = reduce_fn(scores, axis=0)
                 else:
                     scores = fn(probabilities)
                 return _get_indices(scores, shuffle_prop)
 
         else:  # raise error if list is passed
 
+            @merge_args(_dummy_fn)
             @functools.wraps(fn)
-            def wrapped_fn(probabilities, shuffle_prop=0.1):
+            def wrapped_fn(
+                probabilities: np.ndarray, shuffle_prop: float = 0.1
+            ):
                 if _is_multioutput(probabilities):
                     raise ValueError(
                         "The input probabilities is a list of arrays, "

From 899c7ab56a03179860bf733cbcd0083fc0f2912a Mon Sep 17 00:00:00 2001
From: Jan Freyberg <jan.freyberg@gmail.com>
Date: Fri, 15 May 2020 15:52:58 +0100
Subject: [PATCH 4/7] Move acquis. funs to new file, add BALD

---
 .../acquisition_functions/__init__.py         |  90 ++--------
 .../acquisition_functions/functions.py        | 155 ++++++++++++++++++
 2 files changed, 165 insertions(+), 80 deletions(-)
 create mode 100644 src/superintendent/acquisition_functions/functions.py

diff --git a/src/superintendent/acquisition_functions/__init__.py b/src/superintendent/acquisition_functions/__init__.py
index 7038392..8b5f055 100644
--- a/src/superintendent/acquisition_functions/__init__.py
+++ b/src/superintendent/acquisition_functions/__init__.py
@@ -1,92 +1,22 @@
 """
 Functions to prioritise labelling data points (to drive active learning).
 """
-from typing import Dict, Callable
-import numpy as np
-import scipy.stats
-
-from .decorators import make_acquisition_function
-
-__all__ = ["entropy", "margin", "certainty"]
-
-
-@make_acquisition_function(handle_multioutput=None)  # noqa: D002
-def entropy(probabilities: np.ndarray) -> np.ndarray:
-    """
-    Sort by the entropy of the probabilities (high to low).
-
-    Parameters
-    ----------
-    probabilities : np.ndarray
-        An array of probabilities, with the shape n_samples,
-        n_classes
-
-    Other Parameters
-    ----------------
-    shuffle_prop : float (default=0.1)
-        The proportion of data points that should be randomly shuffled. This
-        means the sorting retains some randomness, to avoid biasing your
-        new labels and catching any minority classes the algorithm currently
-        classifies as a different label.
-
-    """
-    neg_entropy = -scipy.stats.entropy(probabilities.T)
-    return neg_entropy
 
+from typing import Dict, Callable, Union, List
+import numpy as np
+from .functions import entropy, margin, certainty, bald, random
 
-@make_acquisition_function(handle_multioutput="mean")  # noqa: D002
-def margin(probabilities: np.ndarray) -> np.ndarray:
-    """
-    Sort by the margin between the top two predictions (low to high).
-
-    Parameters
-    ----------
-    probabilities : np.ndarray
-        An array of probabilities, with the shape n_samples,
-        n_classes
-
-    Other Parameters
-    ----------------
-    shuffle_prop : float
-        The proportion of data points that should be randomly shuffled. This
-        means the sorting retains some randomness, to avoid biasing your
-        new labels and catching any minority classes the algorithm currently
-        classifies as a different label.
-    """
-    margin = (
-        np.sort(probabilities, axis=1)[:, -1]
-        - np.sort(probabilities, axis=1)[:, -2]
-    )
-    return margin
-
-
-@make_acquisition_function(handle_multioutput="mean")  # noqa: D002
-def certainty(probabilities: np.ndarray):
-    """
-    Sort by the certainty of the maximum prediction.
-
-    Parameters
-    ----------
-    probabilities : np.ndarray
-        An array of probabilities, with the shape n_samples,
-        n_classes
-
-    Other Parameters
-    ----------------
-    shuffle_prop : float
-        The proportion of data points that should be randomly shuffled. This
-        means the sorting retains some randomness, to avoid biasing your
-        new labels and catching any minority classes the algorithm currently
-        classifies as a different label.
-
-    """
-    certainty = probabilities.max(axis=-1)
-    return certainty
+__all__ = ["entropy", "margin", "certainty", "bald", "functions", "random"]
 
+AcquisitionFunction = Callable[
+    [Union[np.ndarray, List[np.ndarray]]], np.ndarray
+]
 
-functions: Dict[str, Callable] = {
+functions: Dict[str, AcquisitionFunction] = {
     "entropy": entropy,
     "margin": margin,
     "certainty": certainty,
+    "bald": bald,
+    "random": random,
 }
 """A dictionary of functions to prioritise data."""
diff --git a/src/superintendent/acquisition_functions/functions.py b/src/superintendent/acquisition_functions/functions.py
new file mode 100644
index 0000000..e209660
--- /dev/null
+++ b/src/superintendent/acquisition_functions/functions.py
@@ -0,0 +1,155 @@
+from typing import Dict, Callable
+import numpy as np
+import scipy.stats
+
+from .decorators import (
+    make_acquisition_function,
+    require_point_estimate,
+    require_distribution,
+)
+
+__all__ = ["entropy", "margin", "certainty", "bald"]
+
+
+@make_acquisition_function(handle_multioutput=None)  # noqa: D002
+def entropy(probabilities: np.ndarray) -> np.ndarray:
+    """
+    Sort by the entropy of the probabilities (high to low).
+
+    Parameters
+    ----------
+    probabilities : np.ndarray
+        An array of probabilities, with the shape n_samples,
+        n_classes
+
+    Other Parameters
+    ----------------
+    shuffle_prop : float (default=0.1)
+        The proportion of data points that should be randomly shuffled. This
+        means the sorting retains some randomness, to avoid biasing your
+        new labels and catching any minority classes the algorithm currently
+        classifies as a different label.
+
+    """
+    if probabilities.ndim == 3:
+        entropy_ = scipy.stats.entropy(probabilities, axis=-2).mean(-1)
+    else:
+        entropy_ = scipy.stats.entropy(probabilities, axis=-1)
+    return -entropy_
+
+
+@make_acquisition_function(handle_multioutput="mean")  # noqa: D002
+@require_point_estimate
+def margin(probabilities: np.ndarray) -> np.ndarray:
+    """
+    Sort by the margin between the top two predictions (low to high).
+
+    Parameters
+    ----------
+    probabilities : np.ndarray
+        An array of probabilities, with the shape n_samples,
+        n_classes
+
+    Other Parameters
+    ----------------
+    shuffle_prop : float
+        The proportion of data points that should be randomly shuffled. This
+        means the sorting retains some randomness, to avoid biasing your
+        new labels and catching any minority classes the algorithm currently
+        classifies as a different label.
+    """
+    margin = (
+        np.sort(probabilities, axis=1)[:, -1]
+        - np.sort(probabilities, axis=1)[:, -2]
+    )
+    return margin
+
+
+@make_acquisition_function(handle_multioutput="mean")  # noqa: D002
+@require_point_estimate
+def certainty(probabilities: np.ndarray):
+    """
+    Sort by the certainty of the maximum prediction.
+
+    Parameters
+    ----------
+    probabilities : np.ndarray
+        An array of probabilities, with the shape n_samples,
+        n_classes
+
+    Other Parameters
+    ----------------
+    shuffle_prop : float
+        The proportion of data points that should be randomly shuffled. This
+        means the sorting retains some randomness, to avoid biasing your
+        new labels and catching any minority classes the algorithm currently
+        classifies as a different label.
+
+    """
+    certainty = probabilities.max(axis=-1)
+    return certainty
+
+
+@make_acquisition_function(handle_multioutput="mean")
+@require_distribution
+def bald(probabilities):
+    """
+    Sort the data by the BALD criterion [1].
+
+    This function only works with probability distribution output, so output
+    should be a 3-dimensional array, of shape (n_samples, n_classes, n_samples)
+
+    .. [1] Houlsby, Neil, et al. "Bayesian active learning for classification
+       and preference learning." arXiv preprint arXiv:1112.5745 (2011).
+
+    Parameters
+    ----------
+    probabilities : np.ndarray
+        An array of probabilities, with the shape n_samples, n_classes,
+        n_predictions.
+
+    Other Parameters
+    ----------------
+    shuffle_prop : float
+        The proportion of data points that should be randomly shuffled. This
+        means the sorting retains some randomness, to avoid biasing your
+        new labels and catching any minority classes the algorithm currently
+        classifies as a different label.
+
+    """
+    expected_entropy = scipy.stats.entropy(probabilities, axis=1).mean(axis=-1)
+    entropy_expected = scipy.stats.entropy(probabilities.mean(axis=-1), axis=1)
+    return -(entropy_expected - expected_entropy)
+
+
+@make_acquisition_function(handle_multioutput="mean")
+def random(probabilities):
+    """
+    Sort the data randomly.
+
+    This returns a completely random ordering, and is useful as a baseline.
+
+    Parameters
+    ----------
+    probabilities : np.ndarray
+        An array of probabilities.
+
+    Other Parameters
+    ----------------
+    shuffle_prop : float
+        The proportion of data points that should be randomly shuffled. This
+        means the sorting retains some randomness, to avoid biasing your
+        new labels and catching any minority classes the algorithm currently
+        classifies as a different label.
+    """
+    return np.random.rand(probabilities.shape[0])
+
+
+functions: Dict[str, Callable] = {
+    "entropy": entropy,
+    "margin": margin,
+    "certainty": certainty,
+    "bald": bald,
+    "random": random,
+}
+"""A dictionary of functions to prioritise data."""

From a434b88281dba22e623134157bd18aa462750106 Mon Sep 17 00:00:00 2001
From: Jan Freyberg <jan.freyberg@gmail.com>
Date: Fri, 15 May 2020 15:53:31 +0100
Subject: [PATCH 5/7] Move around tests for acquisition funs

---
 ..._prioritisation.py => test_acquisition.py} | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 rename tests/{test_prioritisation.py => test_acquisition.py} (58%)

diff --git a/tests/test_prioritisation.py b/tests/test_acquisition.py
similarity index 58%
rename from tests/test_prioritisation.py
rename to tests/test_acquisition.py
index 1432002..d844256 100644
--- a/tests/test_prioritisation.py
+++ b/tests/test_acquisition.py
@@ -37,3 +37,25 @@ def test_certainty():
     assert (
         certainty(probabilites, shuffle_prop=0) == np.array([0, 2, 1])
     ).all()
+
+
+def test_that_multioutput_works_for_certainty():
+    probabilities = np.array(
+        [[0.3, 0.4, 0.3], [0.01, 0.9, 0.09], [0.5, 0.5, 0.0]]
+    )
+    # since aggregation for multioutput is averaging, this should produce same
+    # output as before:
+    probabilities = [probabilities] * 3
+    assert (
+        certainty(probabilities, shuffle_prop=0) == np.array([0, 2, 1])
+    ).all()
+
+    # whereas here the output should be different:
+    probabilities = [
+        np.array([[0.3, 0.4, 0.3], [0.01, 0.9, 0.09], [0.5, 0.5, 0.0]]),
+        np.array([[0.5, 0.5, 0.0], [0.01, 0.9, 0.09], [0.3, 0.4, 0.3]]),
+        np.array([[0.5, 0.5, 0.0], [0.01, 0.9, 0.09], [0.3, 0.4, 0.3]]),
+    ]
+    assert (
+        certainty(probabilities, shuffle_prop=0) == np.array([1, 2, 0])
+    ).all()

From 76fd954b5d63960fd0dbb369945acade6974cbf1 Mon Sep 17 00:00:00 2001
From: Jan Freyberg <jan.freyberg@gmail.com>
Date: Fri, 15 May 2020 15:53:41 +0100
Subject: [PATCH 6/7] Typo fix in readme

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 15ebdfb..79e6ccf 100644
--- a/README.md
+++ b/README.md
@@ -51,8 +51,7 @@ If you want to contribute to `superintendent`, you will need to install the test
 dependencies as well. You can do so with
 `pip install superintendent[tests,examples]`
 
-
-## Acknowledgements
+## Acknowledgements
 
 Much of the initial work on `superintendent` was done during my time at
 [Faculty AI](https://faculty.ai/).

From 53f383da91f8c6ee1c4b04f016a23699d808a8fd Mon Sep 17 00:00:00 2001
From: Jan Freyberg <jan.freyberg@gmail.com>
Date: Fri, 15 May 2020 15:53:51 +0100
Subject: [PATCH 7/7] Add a skeleton experiment class

---
 src/superintendent/experiments/__init__.py | 70 ++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 src/superintendent/experiments/__init__.py

diff --git a/src/superintendent/experiments/__init__.py b/src/superintendent/experiments/__init__.py
new file mode 100644
index 0000000..2307463
--- /dev/null
+++ b/src/superintendent/experiments/__init__.py
@@ -0,0 +1,70 @@
+import numpy as np
+from typing import Callable
+from math import ceil
+from .. import acquisition_functions
+from ..queueing import SimpleLabellingQueue
+from ..base import Labeller
+from tqdm.autonotebook import tqdm
+
+
+class ActiveLearningExperiment:
+    def __init__(
+        self,
+        widget: Labeller,
+        labels,
+        *,
+        retrain_interval=None,
+        # n_initial_labels=None,
+        shuffle=True,
+        repeats=1,
+    ):
+
+        self.widget = widget
+        if shuffle:
+            self.widget.queue.shuffle()
+
+        self._labels = labels
+
+        if retrain_interval is None:
+            self.retrain_interval = len(self.widget.queue) // 20
+        else:
+            self.retrain_interval = retrain_interval
+
+        # a little hack to get the first popped queue item:
+        # id_ = self.widget.queue._popped[-1]
+        # self.widget.queue.submit(id_, self._labels[id_])
+        self.widget.queue.undo()
+        # for _ in range((n_initial_labels or self.retrain_interval) - 1):
+        #     id_, feature_ = self.widget.queue.pop()
+        #     self.widget.queue.submit(id_, self._labels[id_])
+
+        self.repeats = repeats
+        n_steps = ceil(len(self.widget.queue) / self.retrain_interval)
+
+        self.n_samples = np.empty((n_steps, repeats))
+        self.scores = np.empty((n_steps, repeats))
+
+    def run(self):
+
+        n_steps = ceil(len(self.widget.queue) / self.retrain_interval)
+
+        for repeat in range(self.repeats):
+            self.widget.queue.shuffle()
+            while len(self.widget.queue._popped) > 0:
+                self.widget.queue.undo()
+
+            for step in tqdm(range(n_steps)):
+                performance = self.widget.retrain()
+                # print(self.widget.queue.order)
+                # print(self.widget.queue._popped)
+
+                self.n_samples[step, repeat] = len(self.widget.queue.labels)
+                # self.n_samples.append(len(self.widget.queue.labels))
+                self.scores[step, repeat] = performance
+
+                for _ in range(self.retrain_interval):
+                    try:
+                        id_, feature_ = self.widget.queue.pop()
+                    except IndexError:
+                        break
+                    self.widget.queue.submit(id_, self._labels[id_])