From 9e9d6dc225cc30659d699a72dacfdf5be94679b4 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 13:33:39 -0800
Subject: [PATCH 1/8] bootstrap ci math helper

---
 src/common/math.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 src/common/math.py

diff --git a/src/common/math.py b/src/common/math.py
new file mode 100644
index 00000000..d0890eae
--- /dev/null
+++ b/src/common/math.py
@@ -0,0 +1,50 @@
+"""
+Helper math functions
+"""
+import os
+import argparse
+import logging
+import numpy as np
+
+def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, alpha=0.05):
+    """
+    Args:
+        data (np.array) : input data
+        iterations (int) : how many bootstrapped samples to generate
+        operators (Dict[str->func]) : map of functions to produce CI for
+    
+    Returns:
+        bootstrap_cis: Dict[str->tuple]
+    """
+    # values will be stored in a dict
+    bootstrap_runs = {}
+    for operator_key in operators.keys():
+        bootstrap_runs[operator_key] = []
+
+    sample_size = len(data)
+    for _ in range(iterations):
+        bootstrap = np.random.choice(data, size=sample_size, replace=True)
+        for operator_key, operator_func in operators.items():
+            bootstrap_runs[operator_key].append(operator_func(bootstrap))
+
+    operators_ci = {}
+    for operator_key in operators.keys():
+        values = np.array(bootstrap_runs[operator_key])
+        ci_left = np.percentile(values, round(alpha/2*100))
+        ci_right = np.percentile(values, round(100-alpha/2*100))
+        ci_mean = np.mean(values) # just for fun
+        operators_ci[operator_key] = (ci_left, ci_mean, ci_right)
+
+    return(operators_ci)
+
+if __name__ == "__main__":
+    sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0])
+    print(bootstrap_ci(
+        sample_data,
+        iterations=1000,
+        operators={
+            'mean':np.mean,
+            'p90': (lambda x : np.percentile(x, 90)),
+            'p99': (lambda x : np.percentile(x, 99)),
+        }
+    ))
\ No newline at end of file

From 520c905c773be4ad892169b5c82ef0a011effe22 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 13:39:13 -0800
Subject: [PATCH 2/8] use confidence level instead of alpha

---
 src/common/math.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/common/math.py b/src/common/math.py
index d0890eae..4f63f35b 100644
--- a/src/common/math.py
+++ b/src/common/math.py
@@ -6,15 +6,16 @@
 import logging
 import numpy as np
 
-def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, alpha=0.05):
+def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, confidence_level=0.95):
     """
     Args:
         data (np.array) : input data
         iterations (int) : how many bootstrapped samples to generate
         operators (Dict[str->func]) : map of functions to produce CI for
+        confidence_level (float) : confidence_level = 1-alpha
     
     Returns:
-        bootstrap_cis: Dict[str->tuple]
+        operators_ci: Dict[str->tuple]
     """
     # values will be stored in a dict
     bootstrap_runs = {}
@@ -30,8 +31,8 @@ def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, alpha=0.05):
     operators_ci = {}
     for operator_key in operators.keys():
         values = np.array(bootstrap_runs[operator_key])
-        ci_left = np.percentile(values, round(alpha/2*100))
-        ci_right = np.percentile(values, round(100-alpha/2*100))
+        ci_left = np.percentile(values, ((1-confidence_level)/2*100))
+        ci_right = np.percentile(values, (100-(1-confidence_level)/2*100))
         ci_mean = np.mean(values) # just for fun
         operators_ci[operator_key] = (ci_left, ci_mean, ci_right)
 

From b288e428e5c3489e6365874b2baf47726eb53d68 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 13:53:55 -0800
Subject: [PATCH 3/8] add unit tests

---
 src/common/math.py        | 14 +-----------
 tests/common/test_math.py | 47 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 13 deletions(-)
 create mode 100644 tests/common/test_math.py

diff --git a/src/common/math.py b/src/common/math.py
index 4f63f35b..8defb88f 100644
--- a/src/common/math.py
+++ b/src/common/math.py
@@ -6,7 +6,7 @@
 import logging
 import numpy as np
 
-def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, confidence_level=0.95):
+def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, confidence_level=0.95, seed=None):
     """
     Args:
         data (np.array) : input data
@@ -37,15 +37,3 @@ def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, confidence_l
         operators_ci[operator_key] = (ci_left, ci_mean, ci_right)
 
     return(operators_ci)
-
-if __name__ == "__main__":
-    sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0])
-    print(bootstrap_ci(
-        sample_data,
-        iterations=1000,
-        operators={
-            'mean':np.mean,
-            'p90': (lambda x : np.percentile(x, 90)),
-            'p99': (lambda x : np.percentile(x, 99)),
-        }
-    ))
\ No newline at end of file
diff --git a/tests/common/test_math.py b/tests/common/test_math.py
new file mode 100644
index 00000000..84dbed9b
--- /dev/null
+++ b/tests/common/test_math.py
@@ -0,0 +1,47 @@
+"""Tests src/common/math.py"""
+import os
+import pytest
+import numpy as np
+
+from common.math import bootstrap_ci
+
+def test_bootstrap_ci():
+    """Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """
+    sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0])
+    operators={
+        'mean':np.mean,
+        'p90': (lambda x : np.percentile(x, 90)),
+        'p99': (lambda x : np.percentile(x, 99)),
+    }
+
+    np.random.seed(404) # fixed const
+
+    # because we're fixing the seed, we can actually go deeper
+    expected_values = {
+        'mean': (0.30000000000000004, 0.99395, 2.1624999999999996),
+        'p90': (0.5, 2.36413, 5.0),
+        'p99': (0.593, 3.469213, 5.0),
+    }
+
+    returned_values = bootstrap_ci(
+        sample_data,
+        iterations=1000,
+        operators=operators
+    )
+
+    for key in operators:
+        # check type
+        assert key in returned_values
+        assert isinstance(returned_values[key], tuple)
+        assert len(returned_values[key]) == 3
+
+        # basic interval over
+        ci_left, ci_mean, ci_right = returned_values[key]
+        assert ci_left <= ci_mean
+        assert ci_mean <= ci_right
+
+        # because it's a bootstrap, these are supposed to be true
+        assert min(sample_data) <= ci_left
+        assert ci_right <= max(sample_data)
+
+    assert returned_values == expected_values
\ No newline at end of file

From e1fede7aa6c2337f0e2ed58f02e8b63edfba386f Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 13:56:21 -0800
Subject: [PATCH 4/8] add unit tests

---
 tests/common/test_math.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tests/common/test_math.py b/tests/common/test_math.py
index 84dbed9b..3956be3a 100644
--- a/tests/common/test_math.py
+++ b/tests/common/test_math.py
@@ -5,7 +5,7 @@
 
 from common.math import bootstrap_ci
 
-def test_bootstrap_ci():
+def test_bootstrap_ci_fixed_seed():
     """Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """
     sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0])
     operators={
@@ -29,6 +29,26 @@ def test_bootstrap_ci():
         operators=operators
     )
 
+    assert returned_values == expected_values
+
+
+def test_bootstrap_ci_no_seed():
+    """Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """
+    sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0])
+    operators={
+        'mean':np.mean,
+        'p90': (lambda x : np.percentile(x, 90)),
+        'p99': (lambda x : np.percentile(x, 99)),
+    }
+
+    np.random.seed(None) # not const
+
+    returned_values = bootstrap_ci(
+        sample_data,
+        iterations=1000,
+        operators=operators
+    )
+
     for key in operators:
         # check type
         assert key in returned_values
@@ -43,5 +63,3 @@ def test_bootstrap_ci():
         # because it's a bootstrap, these are supposed to be true
         assert min(sample_data) <= ci_left
         assert ci_right <= max(sample_data)
-
-    assert returned_values == expected_values
\ No newline at end of file

From 9e956b810b25689e96e925afa25e4a25ffd390db Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 13:57:41 -0800
Subject: [PATCH 5/8] add unit tests

---
 tests/common/test_math.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/common/test_math.py b/tests/common/test_math.py
index 3956be3a..eb7cec2c 100644
--- a/tests/common/test_math.py
+++ b/tests/common/test_math.py
@@ -34,14 +34,15 @@ def test_bootstrap_ci_fixed_seed():
 
 def test_bootstrap_ci_no_seed():
     """Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """
-    sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0])
+    np.random.seed(None) # not const
+
+    sample_data = np.random.rand(100)
     operators={
         'mean':np.mean,
         'p90': (lambda x : np.percentile(x, 90)),
         'p99': (lambda x : np.percentile(x, 99)),
     }
 
-    np.random.seed(None) # not const
 
     returned_values = bootstrap_ci(
         sample_data,

From 86a012188f43cbc38c0dfa9e5f7c6986b41124f4 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 14:00:02 -0800
Subject: [PATCH 6/8] add unit tests

---
 tests/common/test_math.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/common/test_math.py b/tests/common/test_math.py
index eb7cec2c..6de360c6 100644
--- a/tests/common/test_math.py
+++ b/tests/common/test_math.py
@@ -64,3 +64,8 @@ def test_bootstrap_ci_no_seed():
         # because it's a bootstrap, these are supposed to be true
         assert min(sample_data) <= ci_left
         assert ci_right <= max(sample_data)
+
+    # tests that are specific to the operators
+    assert returned_values['p90'][0] <= returned_values['p99'][0] # p90 < p99 so left CI also
+    assert returned_values['p90'][1] <= returned_values['p99'][1] # p90 < p99 so mean also
+    assert returned_values['p90'][2] <= returned_values['p99'][2] # p90 < p99 so right CI also

From 34ca1341083c70c44f92a27c19bf2f68dc455984 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 14:00:44 -0800
Subject: [PATCH 7/8] add unit tests

---
 tests/common/test_math.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/common/test_math.py b/tests/common/test_math.py
index 6de360c6..53609c4e 100644
--- a/tests/common/test_math.py
+++ b/tests/common/test_math.py
@@ -26,7 +26,8 @@ def test_bootstrap_ci_fixed_seed():
     returned_values = bootstrap_ci(
         sample_data,
         iterations=1000,
-        operators=operators
+        operators=operators,
+        confidence_level=0.95
     )
 
     assert returned_values == expected_values
@@ -47,7 +48,8 @@ def test_bootstrap_ci_no_seed():
     returned_values = bootstrap_ci(
         sample_data,
         iterations=1000,
-        operators=operators
+        operators=operators,
+        confidence_level=0.95
     )
 
     for key in operators:

From e01e03ababa4c949ec96f68b35d24bfc0296959d Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Wed, 24 Nov 2021 14:06:30 -0800
Subject: [PATCH 8/8] add unit tests

---
 tests/common/test_math.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/common/test_math.py b/tests/common/test_math.py
index 53609c4e..75aaab59 100644
--- a/tests/common/test_math.py
+++ b/tests/common/test_math.py
@@ -44,7 +44,6 @@ def test_bootstrap_ci_no_seed():
         'p99': (lambda x : np.percentile(x, 99)),
     }
 
-
     returned_values = bootstrap_ci(
         sample_data,
         iterations=1000,
@@ -58,7 +57,7 @@ def test_bootstrap_ci_no_seed():
         assert isinstance(returned_values[key], tuple)
         assert len(returned_values[key]) == 3
 
-        # basic interval over
+        # basic interval ordering
         ci_left, ci_mean, ci_right = returned_values[key]
         assert ci_left <= ci_mean
         assert ci_mean <= ci_right