Skip to content
This repository was archived by the owner on Apr 8, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/common/math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
Helper math functions
"""
import os
import argparse
import logging
import numpy as np

def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, confidence_level=0.95, seed=None):
"""
Args:
data (np.array) : input data
iterations (int) : how many bootstrapped samples to generate
operators (Dict[str->func]) : map of functions to produce CI for
confidence_level (float) : confidence_level = 1-alpha

Returns:
operators_ci: Dict[str->tuple]
"""
# values will be stored in a dict
bootstrap_runs = {}
for operator_key in operators.keys():
bootstrap_runs[operator_key] = []

sample_size = len(data)
for _ in range(iterations):
bootstrap = np.random.choice(data, size=sample_size, replace=True)
for operator_key, operator_func in operators.items():
bootstrap_runs[operator_key].append(operator_func(bootstrap))

operators_ci = {}
for operator_key in operators.keys():
values = np.array(bootstrap_runs[operator_key])
ci_left = np.percentile(values, ((1-confidence_level)/2*100))
ci_right = np.percentile(values, (100-(1-confidence_level)/2*100))
ci_mean = np.mean(values) # just for fun
operators_ci[operator_key] = (ci_left, ci_mean, ci_right)

return(operators_ci)
72 changes: 72 additions & 0 deletions tests/common/test_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Tests src/common/math.py"""
import os
import pytest
import numpy as np

from common.math import bootstrap_ci

def test_bootstrap_ci_fixed_seed():
"""Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """
sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0])
operators={
'mean':np.mean,
'p90': (lambda x : np.percentile(x, 90)),
'p99': (lambda x : np.percentile(x, 99)),
}

np.random.seed(404) # fixed const

# because we're fixing the seed, we can actually go deeper
expected_values = {
'mean': (0.30000000000000004, 0.99395, 2.1624999999999996),
'p90': (0.5, 2.36413, 5.0),
'p99': (0.593, 3.469213, 5.0),
}

returned_values = bootstrap_ci(
sample_data,
iterations=1000,
operators=operators,
confidence_level=0.95
)

assert returned_values == expected_values


def test_bootstrap_ci_no_seed():
"""Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """
np.random.seed(None) # not const

sample_data = np.random.rand(100)
operators={
'mean':np.mean,
'p90': (lambda x : np.percentile(x, 90)),
'p99': (lambda x : np.percentile(x, 99)),
}

returned_values = bootstrap_ci(
sample_data,
iterations=1000,
operators=operators,
confidence_level=0.95
)

for key in operators:
# check type
assert key in returned_values
assert isinstance(returned_values[key], tuple)
assert len(returned_values[key]) == 3

# basic interval ordering
ci_left, ci_mean, ci_right = returned_values[key]
assert ci_left <= ci_mean
assert ci_mean <= ci_right

# because it's a bootstrap, these are supposed to be true
assert min(sample_data) <= ci_left
assert ci_right <= max(sample_data)

# tests that are specific to the operators
assert returned_values['p90'][0] <= returned_values['p99'][0] # p90 < p99 so left CI also
assert returned_values['p90'][1] <= returned_values['p99'][1] # p90 < p99 so mean also
assert returned_values['p90'][2] <= returned_values['p99'][2] # p90 < p99 so right CI also