From 518527c37c0b14c18ade9742c6a121ee40140316 Mon Sep 17 00:00:00 2001 From: Chris Morrow Date: Thu, 19 Jul 2018 23:02:17 -0500 Subject: [PATCH 1/7] Removing unused static doc files --- .gitignore | 2 + build/lib/sci_analysis/old/__init__.py | 0 .../lib/sci_analysis/preferences/__init__.py | 0 .../sci_analysis/preferences/preferences.py | 76 -- build/lib/sci_analysis/test/test_analyze.py | 459 ----------- build/lib/sci_analysis/test/test_anova.py | 127 --- .../lib/sci_analysis/test/test_correlation.py | 211 ----- .../sci_analysis/test/test_data_operations.py | 674 ---------------- .../sci_analysis/test/test_equalvariance.py | 251 ------ .../sci_analysis/test/test_graph_boxplots.py | 735 ------------------ .../lib/sci_analysis/test/test_graph_histo.py | 381 --------- .../sci_analysis/test/test_graph_scatter.py | 319 -------- .../sci_analysis/test/test_groupstatistics.py | 419 ---------- build/lib/sci_analysis/test/test_kruskal.py | 141 ---- build/lib/sci_analysis/test/test_kstest.py | 149 ---- .../lib/sci_analysis/test/test_linregress.py | 188 ----- .../lib/sci_analysis/test/test_mannwhitney.py | 167 ---- build/lib/sci_analysis/test/test_normtest.py | 135 ---- .../lib/sci_analysis/test/test_statistics.py | 207 ----- build/lib/sci_analysis/test/test_ttest.py | 248 ------ .../lib/sci_analysis/test/test_twosampleks.py | 145 ---- build/lib/sci_analysis/test/test_vector.py | 341 -------- sci_analysis.egg-info/PKG-INFO | 2 +- 23 files changed, 3 insertions(+), 5374 deletions(-) delete mode 100644 build/lib/sci_analysis/old/__init__.py delete mode 100644 build/lib/sci_analysis/preferences/__init__.py delete mode 100644 build/lib/sci_analysis/preferences/preferences.py delete mode 100644 build/lib/sci_analysis/test/test_analyze.py delete mode 100644 build/lib/sci_analysis/test/test_anova.py delete mode 100644 build/lib/sci_analysis/test/test_correlation.py delete mode 100644 build/lib/sci_analysis/test/test_data_operations.py delete mode 100644 build/lib/sci_analysis/test/test_equalvariance.py delete mode 100644 build/lib/sci_analysis/test/test_graph_boxplots.py delete mode 100644 build/lib/sci_analysis/test/test_graph_histo.py delete mode 100644 build/lib/sci_analysis/test/test_graph_scatter.py delete mode 100644 build/lib/sci_analysis/test/test_groupstatistics.py delete mode 100644 build/lib/sci_analysis/test/test_kruskal.py delete mode 100644 build/lib/sci_analysis/test/test_kstest.py delete mode 100644 build/lib/sci_analysis/test/test_linregress.py delete mode 100644 build/lib/sci_analysis/test/test_mannwhitney.py delete mode 100644 build/lib/sci_analysis/test/test_normtest.py delete mode 100644 build/lib/sci_analysis/test/test_statistics.py delete mode 100644 build/lib/sci_analysis/test/test_ttest.py delete mode 100644 build/lib/sci_analysis/test/test_twosampleks.py delete mode 100644 build/lib/sci_analysis/test/test_vector.py diff --git a/.gitignore b/.gitignore index cbf81e7..5129187 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,5 @@ build_docs.sh docs/index.rst.bak docs/_build/* docs/_build/html/_static/classic.css +.cache/v/cache/lastfailed +.cache/v/cache/lastfailed diff --git a/build/lib/sci_analysis/old/__init__.py b/build/lib/sci_analysis/old/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/build/lib/sci_analysis/preferences/__init__.py b/build/lib/sci_analysis/preferences/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/build/lib/sci_analysis/preferences/preferences.py b/build/lib/sci_analysis/preferences/preferences.py deleted file mode 100644 index 1d190a8..0000000 --- a/build/lib/sci_analysis/preferences/preferences.py +++ /dev/null @@ -1,76 +0,0 @@ - - -class DefaultPreferences(type): - """The type for Default Preferences that cannot be modified""" - - def __setattr__(cls, key, value): - if key == "defaults": - raise AttributeError("Cannot override defaults") - else: - return type.__setattr__(cls, key, value) - - def __delattr__(cls, item): - if item == "defaults": - raise AttributeError("Cannot delete defaults") - else: - return type.__delattr__(cls, item) - - -class Preferences(object): - """The base Preferences class""" - - __metaclass__ = DefaultPreferences - - def list(self): - print(self.__dict__) - return self.__dict__ - - def defaults(self): - return tuple(self.__dict__.values()) - - -class GraphPreferences(object): - """Handles graphing preferences.""" - - class Plot(object): - boxplot = True - histogram = True - cdf = False - oneway = True - probplot = True - scatter = True - tukey = False - histogram_borders = False - boxplot_borders = False - defaults = (boxplot, histogram, cdf, oneway, probplot, scatter, tukey, histogram_borders, boxplot_borders) - - distribution = {'counts': False, - 'violin': False, - 'boxplot': True, - 'fit': False, - 'fit_style': 'r--', - 'fit_width': '2', - 'cdf_style': 'k-', - 'distribution': 'norm', - 'bins': 20, - 'color': 'green' - } - - bivariate = {'points': True, - 'point_style': 'k.', - 'contours': False, - 'contour_width': 1.25, - 'fit': True, - 'fit_style': 'r-', - 'fit_width': 1, - 'boxplot': True, - 'violin': True, - 'bins': 20, - 'color': 'green' - } - - oneway = {'boxplot': True, - 'violin': False, - 'point_style': '^', - 'line_style': '-' - } diff --git a/build/lib/sci_analysis/test/test_analyze.py b/build/lib/sci_analysis/test/test_analyze.py deleted file mode 100644 index 0b95b90..0000000 --- a/build/lib/sci_analysis/test/test_analyze.py +++ /dev/null @@ -1,459 +0,0 @@ -import unittest -import numpy as np -import pandas as pd -import scipy.stats as st -from os import path, getcwd - -from ..analysis.exc import NoDataError -from ..analysis import analyze, analyse - - -class MyTestCase(unittest.TestCase): - - _seed = 987654321 - - @property - def save_path(self): - if getcwd().split('/')[-1] == 'test': - return './images/' - elif getcwd().split('/')[-1] == 'sci_analysis': - if path.exists('./setup.py'): - return './sci_analysis/test/images/' - else: - return './test/images/' - # elif getcwd().split('/')[-1] == 'sci-analysis': - # if path.exists('./setup.py'): - # return './sci_analysis/test/images/' - # else: - # return './test/images/' - else: - './' - - def test_100_catch_no_data_1_array(self): - """Catch the case where no data is passed""" - self.assertRaises(NoDataError, lambda: analyze([])) - - def test_101_catch_no_data_None(self): - """Catch the case where None is passed""" - self.assertRaises(ValueError, lambda: analyze(None)) - - def test_102_catch_xdata_no_iterable(self): - """Catch the case where xdata is not iterable""" - self.assertRaises(TypeError, lambda: analyze(1)) - - # def test_103_catch_more_than_2_data_args(self): - # """Catch the case where more than 2 data arguments are given""" - # self.assertRaises(ValueError, lambda: analyze(st.norm.rvs(size=10), st.norm.rvs(size=10), st.norm.rvs(size=10))) - - def test_104_ttest_large_default(self): - """Perform an analysis on a large sample using the ttest""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze([input_1_array, input_2_array], debug=True, - save_to='{}test_analyze_104'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_105_ttest_small_default(self): - """Perform an analysis on a small sample using the ttest""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=10) - input_2_array = st.norm.rvs(size=10) - self.assertEqual(analyze([input_1_array, input_2_array], debug=True, - save_to='{}test_analyze_105'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_106_ttest_large_group(self): - """Perform an analysis on a large sample using the ttest with set group names""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - groups=['Test 1', 'Test 2'], - debug=True, - save_to='{}test_analyze_106'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_107_ttest_large_dict(self): - """Perform an analysis on a large sample using the ttest with set dict""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze({'dTest 1': input_1_array, 'dTest 2': input_2_array}, - debug=True, - save_to='{}test_analyze_107'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_108_ttest_xlabel_ylabel(self): - """Perform an analysis on a large sample using the ttest with labels set""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - title='Labels test', - xname='X Test', - yname='Y Test', - debug=True, - save_to='{}test_analyze_108'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_109_mannwhitney_default(self): - """Perform an analysis on a non-normal data set using the Mann Whitney test""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.weibull_min.rvs(1.2, size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - title='MannWhitney Default', - debug=True, - save_to='{}test_analyze_109'.format(self.save_path)), - ['Oneway', 'MannWhitney']) - - def test_110_mannwhitney_groups(self): - """Perform an analysis on a non-normal data set using the Mann Whitney test""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.weibull_min.rvs(1.2, size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - groups=['Test 1', 'Test 2'], - title='MannWhitney Groups', - debug=True, - save_to='{}test_analyze_110'.format(self.save_path)), - ['Oneway', 'MannWhitney']) - - def test_111_mannwhitney_groups(self): - """Perform an analysis on a non-normal data set using the Mann Whitney test""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.weibull_min.rvs(1.2, size=100) - self.assertEqual(analyze({'dTest 1': input_1_array, 'dTest 2': input_2_array}, - title='MannWhitney Dict', - debug=True, - save_to='{}test_analyze_111'.format(self.save_path)), - ['Oneway', 'MannWhitney']) - - def test_112_twosampleks_default(self): - """Perform an analysis on a small bi-modal data set using the twosample ks test""" - np.random.seed(self._seed) - input_1_array = np.append(st.norm.rvs(0, 1, size=10), st.norm.rvs(10, 1, size=10)) - input_2_array = np.append(st.norm.rvs(0, 1, size=10), st.norm.rvs(10, 1, size=10)) - self.assertEqual(analyze([input_1_array, input_2_array], - title='TwoSampleKSTest Default', - debug=True, - save_to='{}test_analyze_112'.format(self.save_path)), - ['Oneway', 'TwoSampleKSTest']) - - def test_113_twosampleks_groups(self): - """Perform an analysis on a small bi-modal data set using the twosample ks test""" - np.random.seed(self._seed) - input_1_array = np.append(st.norm.rvs(0, 1, size=10), st.norm.rvs(10, 1, size=10)) - input_2_array = np.append(st.norm.rvs(0, 1, size=10), st.norm.rvs(10, 1, size=10)) - self.assertEqual(analyze([input_1_array, input_2_array], - groups=['Group 1', 'Group 2'], - title='TwoSampleKSTest Groups', - debug=True, - save_to='{}test_analyze_113'.format(self.save_path)), - ['Oneway', 'TwoSampleKSTest']) - - def test_114_twosampleks_dict(self): - """Perform an analysis on a small bi-modal data set using the twosample ks test""" - np.random.seed(self._seed) - input_1_array = np.append(st.norm.rvs(0, 1, size=10), st.norm.rvs(10, 1, size=10)) - input_2_array = np.append(st.norm.rvs(0, 1, size=10), st.norm.rvs(10, 1, size=10)) - self.assertEqual(analyze({'dGroup 1': input_1_array, 'dGroup 2': input_2_array}, - title='TwoSampleKSTest Dict', - debug=True, - save_to='{}test_analyze_114'.format(self.save_path)), - ['Oneway', 'TwoSampleKSTest']) - - def test_115_ttest_name_categories_default(self): - """Perform an analysis on a large sample using the ttest with labels set""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - title='Labels test 2', - categories='X Test', - name='Y Test', - debug=True, - save_to='{}test_analyze_115'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_116_ttest_name_categories_groups(self): - """Perform an analysis on a large sample using the ttest with labels set""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - groups=['Group 1', 'Group 2'], - title='Labels test 2 Groups', - categories='X Test', - name='Y Test', - debug=True, - save_to='{}test_analyze_116'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_117_ttest_name_categories_dict(self): - """Perform an analysis on a large sample using the ttest with labels set""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze({'dGroup 1': input_1_array, 'dGroup 2': input_2_array}, - title='Labels test Dict', - categories='X Test', - name='Y Test', - debug=True, - save_to='{}test_analyze_117'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_118_ttest_alpha(self): - """Perform an analysis on a large sample using the ttest with alpha 0.02""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - title='Alpha 0.02', - alpha=0.02, - debug=True, - save_to='{}test_analyze_118'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_119_ttest_no_nqp(self): - """Perform an analysis on a large sample using the ttest without a nqp""" - np.random.seed(self._seed) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - self.assertEqual(analyze([input_1_array, input_2_array], - title='No NQP', - nqp=False, - debug=True, - save_to='{}test_analyze_119'.format(self.save_path)), - ['Oneway', 'TTest']) - - def test_120_bivariate_default(self): - """Perform a correlation on two data sets with default settings""" - np.random.seed(self._seed) - input_x_array = st.weibull_min.rvs(2, size=200) - input_y_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) - self.assertEqual(analyze(input_x_array, input_y_array, - debug=True, - save_to='{}test_analyze_120'.format(self.save_path)), - ['Bivariate']) - - def test_121_bivariate_xname_yname(self): - """Perform a correlation on two data sets with labels set""" - np.random.seed(self._seed) - input_x_array = st.weibull_min.rvs(2, size=200) - input_y_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) - self.assertEqual(analyze(input_x_array, input_y_array, - xname='X Test', - yname='Y Test', - title='Labels Test', - debug=True, - save_to='{}test_analyze_121'.format(self.save_path)), - ['Bivariate']) - - def test_122_bivariate_alpha(self): - """Perform a correlation on two data sets with alpha set to 0.02""" - np.random.seed(self._seed) - input_x_array = st.weibull_min.rvs(2, size=200) - input_y_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) - self.assertEqual(analyze(input_x_array, input_y_array, - alpha=0.02, - title='Alpha Test', - debug=True, - save_to='{}test_analyze_122'.format(self.save_path)), - ['Bivariate']) - - def test_123_distribution_default(self): - """Perform a distribution analysis with default settings""" - np.random.seed(self._seed) - input_array = st.norm.rvs(size=200) - self.assertEqual(analyze(input_array, - debug=True, - save_to='{}test_analyze_123'.format(self.save_path)), - ['Distribution', 'NormTest']) - - def test_124_distribution_label(self): - """Perform a distribution analysis with label set""" - np.random.seed(self._seed) - input_array = st.norm.rvs(size=200) - self.assertEqual(analyze(input_array, - name='Test', - title='Label Test', - debug=True, - save_to='{}test_analyze_124'.format(self.save_path)), - ['Distribution', 'NormTest']) - - def test_125_distribution_population(self): - """Perform a distribution analysis with population set""" - np.random.seed(self._seed) - input_array = st.norm.rvs(size=200) - self.assertEqual(analyze(input_array, - sample=False, - title='Population Stats', - debug=True, - save_to='{}test_analyze_125'.format(self.save_path)), - ['Distribution', 'NormTest']) - - def test_126_distribution_cdf(self): - """Perform a distribution analysis with cdf""" - np.random.seed(self._seed) - input_array = st.norm.rvs(size=200) - self.assertEqual(analyze(input_array, - cdf=True, - title='CDF Test', - debug=True, - save_to='{}test_analyze_126'.format(self.save_path)), - ['Distribution', 'NormTest']) - - def test_127_distribution_fit_norm_default(self): - """Perform a distribution analysis with normal dist KSTest""" - np.random.seed(self._seed) - input_array = st.norm.rvs(size=200) - self.assertEqual(analyze(input_array, - distribution='norm', - fit=True, - title='Norm Fit', - debug=True, - save_to='{}test_analyze_127'.format(self.save_path)), - ['Distribution', 'KSTest']) - - def test_128_distribution_fit_norm_alpha(self): - """Perform a distribution analysis with normal dist KSTest and alpha 0.02""" - np.random.seed(self._seed) - input_array = st.norm.rvs(size=200) - self.assertEqual(analyze(input_array, - distribution='norm', - fit=True, - alpha=0.02, - title='Alpha 0.02', - debug=True, - save_to='{}test_analyze_128'.format(self.save_path)), - ['Distribution', 'KSTest']) - - def test_129_distribution_categorical_default(self): - """Perform a distribution analysis with categorical data and default settings.""" - np.random.seed(self._seed) - input_array = ['abcdefghijklmnopqrstuvwxyz'[:np.random.randint(1,26)] for _ in range(30)] - self.assertListEqual(analyze(input_array, - debug=True, - save_to='{}test_analyze_129'.format(self.save_path)), - ['Frequencies']) - - def test_130_distribution_categorical_percent(self): - """Perform a distribution analysis with categorical data and percent y-axis.""" - np.random.seed(self._seed) - input_array = ['abcdefghijklmnopqrstuvwxyz'[:np.random.randint(1,26)] for _ in range(30)] - self.assertListEqual(analyze(input_array, - debug=True, - percent=True, - save_to='{}test_analyze_130'.format(self.save_path)), - ['Frequencies']) - - def test_131_distribution_categorical_percent_alias(self): - """Perform a distribution analysis with categorical data and percent y-axis using the analyse alias.""" - np.random.seed(self._seed) - input_array = ['abcdefghijklmnopqrstuvwxyz'[:np.random.randint(1,26)] for _ in range(30)] - self.assertListEqual(analyse(input_array, - debug=True, - percent=True, - save_to='{}test_analyze_131'.format(self.save_path)), - ['Frequencies']) - - def test_132_stacked_ttest_default(self): - np.random.seed(self._seed) - input_1_array = pd.DataFrame({'input': st.norm.rvs(size=2000), 'group': ['Group 1'] * 2000}) - input_2_array = pd.DataFrame({'input': st.norm.rvs(1, size=2000), 'group': ['Group 2'] * 2000}) - df = pd.concat([input_1_array, input_2_array]) - self.assertEqual(analyze(df['input'], groups=df['group'], - debug=True, - save_to='{}test_analyze_132'.format(self.save_path)), - ['Stacked Oneway', 'TTest']) - - def test_133_two_group_bivariate(self): - """Perform a correlation with two groups.""" - np.random.seed(self._seed) - input_1_x = st.norm.rvs(size=100) - input_1_y = [x + st.norm.rvs(0, 0.5, size=1)[0] for x in input_1_x] - input_2_x = st.norm.rvs(size=100) - input_2_y = [(x / 2) + st.norm.rvs(0, 0.2, size=1)[0] for x in input_2_x] - grp = [1] * 100 + [2] * 100 - cs_x = np.concatenate((input_1_x, input_2_x)) - cs_y = np.concatenate((input_1_y, input_2_y)) - input_array = pd.DataFrame({'a': cs_x, 'b': cs_y, 'c': grp}) - self.assertEqual(analyze(input_array['a'], input_array['b'], groups=input_array['c'], - debug=True, - save_to='{}test_analyze_133'.format(self.save_path)), - ['Group Bivariate']) - - def test_134_three_group_bivariate(self): - """Perform a correlation with three groups.""" - np.random.seed(self._seed) - size = 100 - input_1_x = st.norm.rvs(size=size) - input_1_y = [x + st.norm.rvs(0, 0.5, size=1)[0] for x in input_1_x] - input_2_x = st.norm.rvs(size=size) - input_2_y = [(x / 2) + st.norm.rvs(0, 0.2, size=1)[0] for x in input_2_x] - input_3_x = st.norm.rvs(size=size) - input_3_y = np.array([(x * 1.5) + st.norm.rvs(size=1)[0] for x in input_3_x]) - 0.5 - grp = [1] * size + [2] * size + [3] * size - cs_x = np.concatenate((input_1_x, input_2_x, input_3_x)) - cs_y = np.concatenate((input_1_y, input_2_y, input_3_y)) - input_array = pd.DataFrame({'a': cs_x, 'b': cs_y, 'c': grp}) - self.assertEqual(analyze(input_array['a'], input_array['b'], groups=input_array['c'], - debug=True, - save_to='{}test_analyze_134'.format(self.save_path)), - ['Group Bivariate']) - - def test_135_stacked_manwhitney_default(self): - np.random.seed(self._seed) - input_1_array = pd.DataFrame({'input': st.norm.rvs(size=2000), 'group': ['Group 1'] * 2000}) - input_2_array = pd.DataFrame({'input': st.weibull_min.rvs(1.2, size=2000), 'group': ['Group 2'] * 2000}) - df = pd.concat([input_1_array, input_2_array]) - self.assertEqual(analyze(df['input'], groups=df['group'], - debug=True, - save_to='{}test_analyze_135'.format(self.save_path)), - ['Stacked Oneway', 'MannWhitney']) - - def test_136_stacked_twosampleks_default(self): - np.random.seed(self._seed) - size = 10 - input_1_array = pd.DataFrame({'input': np.append(st.norm.rvs(0, 1, size=size), st.norm.rvs(10, 1, size=size)), - 'group': ['Group 1'] * size * 2}) - input_2_array = pd.DataFrame({'input': np.append(st.norm.rvs(0, 1, size=size), st.norm.rvs(10, 1, size=size)), - 'group': ['Group 2'] * size * 2}) - df = pd.concat([input_1_array, input_2_array]) - self.assertListEqual(analyze(df['input'], groups=df['group'], - debug=True, - save_to='{}test_analyze_136'.format(self.save_path)), - ['Stacked Oneway', 'TwoSampleKSTest']) - - def test_137_stacked_anova_default(self): - np.random.seed(self._seed) - size = 100 - input_1_array = pd.DataFrame({'input': st.norm.rvs(size=size), 'group': ['Group 1'] * size}) - input_2_array = pd.DataFrame({'input': st.norm.rvs(size=size), 'group': ['Group 2'] * size}) - input_3_array = pd.DataFrame({'input': st.norm.rvs(0.5, size=size), 'group': ['Group 3'] * size}) - input_4_array = pd.DataFrame({'input': st.norm.rvs(size=size), 'group': ['Group 4'] * size}) - df = pd.concat([input_1_array, input_2_array, input_3_array, input_4_array]) - self.assertEqual(analyze(df['input'], groups=df['group'], - debug=True, - save_to='{}test_analyze_137'.format(self.save_path)), - ['Stacked Oneway', 'Anova']) - - def test_138_stacked_kw_default(self): - np.random.seed(self._seed) - size = 100 - input_1_array = pd.DataFrame({'input': st.norm.rvs(0, 0.75, size=size), 'group': ['Group 1'] * size}) - input_2_array = pd.DataFrame({'input': st.norm.rvs(size=size), 'group': ['Group 2'] * size}) - input_3_array = pd.DataFrame({'input': st.norm.rvs(0.5, size=size), 'group': ['Group 3'] * size}) - input_4_array = pd.DataFrame({'input': st.norm.rvs(size=size), 'group': ['Group 4'] * size}) - df = pd.concat([input_1_array, input_2_array, input_3_array, input_4_array]) - self.assertEqual(analyze(df['input'], groups=df['group'], - debug=True, - save_to='{}test_analyze_138'.format(self.save_path)), - ['Stacked Oneway', 'Kruskal']) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_anova.py b/build/lib/sci_analysis/test/test_anova.py deleted file mode 100644 index 98311f5..0000000 --- a/build/lib/sci_analysis/test/test_anova.py +++ /dev/null @@ -1,127 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..analysis.hypo_tests import Anova -from ..analysis.exc import MinimumSizeError, NoDataError - - -class MyTestCase(unittest.TestCase): - def test_550_ANOVA_matched(self): - """Test the ANOVA class on matched data""" - np.random.seed(987654321) - x_parms = [4, 1.75] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*x_parms, size=100) - z_input_array = st.norm.rvs(*x_parms, size=100) - alpha = 0.05 - exp = Anova(x_input_array, y_input_array, z_input_array, alpha=alpha, display=False) - output = """ - -Oneway ANOVA ------------- - -alpha = 0.0500 -f value = 0.1076 -p value = 0.8980 - -H0: Group means are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: ANOVA Type I error") - self.assertAlmostEqual(exp.statistic, 0.1076, delta=0.0001) - self.assertAlmostEqual(exp.f_value, 0.1076, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.898, delta=0.001) - self.assertEqual(str(exp), output) - - def test_553_ANOVA_unmatched(self): - """Test the ANOVA class on unmatched data""" - np.random.seed(987654321) - x_parms = [4, 1.75] - y_parms = [6, 1.75] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*y_parms, size=100) - z_input_array = st.norm.rvs(*x_parms, size=100) - alpha = 0.05 - self.assertLess(Anova(x_input_array, y_input_array, z_input_array, alpha=alpha, display=False).p_value, alpha, - "FAIL: ANOVA Type II error") - - def test_554_ANOVA_matched_just_above_min_size(self): - """Test the ANOVA class on matched data just above min size""" - np.random.seed(987654321) - x_parms = [4, 1.75] - x_input_array = st.norm.rvs(*x_parms, size=3) - y_input_array = st.norm.rvs(*x_parms, size=3) - z_input_array = st.norm.rvs(*x_parms, size=3) - alpha = 0.05 - exp = Anova(x_input_array, y_input_array, z_input_array, alpha=alpha, display=True) - output = """ - -Oneway ANOVA ------------- - -alpha = 0.0500 -f value = 0.0285 -p value = 0.9720 - -H0: Group means are matched -""" - self.assertGreater(exp.p_value, alpha) - self.assertEqual(str(exp), output) - - def test_555_ANOVA_matched_just_at_size(self): - """Test the ANOVA class on matched data at min size""" - np.random.seed(987654321) - x_parms = [4, 1.75] - x_input_array = st.norm.rvs(*x_parms, size=2) - y_input_array = st.norm.rvs(*x_parms, size=2) - z_input_array = st.norm.rvs(*x_parms, size=2) - alpha = 0.05 - self.assertRaises(MinimumSizeError, lambda: Anova(x_input_array, y_input_array, z_input_array, - alpha=alpha, - display=False).p_value) - - def test_556_ANOVA_matched_single_empty_vector(self): - """Test the ANOVA class on matched data with a single empty vector""" - np.random.seed(987654321) - x_parms = [4, 1.75] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = ["one", "two", "three", "four", "five"] - z_input_array = st.norm.rvs(*x_parms, size=100) - alpha = 0.05 - exp = Anova(x_input_array, y_input_array, z_input_array, alpha=alpha, display=False) - output = """ - -Oneway ANOVA ------------- - -alpha = 0.0500 -f value = 0.0672 -p value = 0.7957 - -H0: Group means are matched -""" - self.assertGreater(exp.p_value, alpha) - self.assertEqual(str(exp), output) - - def test_557_ANOVA_matched_all_empty_vectors(self): - """Test the ANOVA class on matched data with all vectors empty""" - np.random.seed(987654321) - x_input_array = [float("nan"), float("nan"), float("nan"), "four", float("nan")] - y_input_array = ["one", "two", "three", "four", "five"] - alpha = 0.05 - self.assertRaises(NoDataError, lambda: Anova(x_input_array, - y_input_array, - alpha=alpha, - display=False).p_value) - - def test_558_ANOVA_matched_single_argument(self): - """Test the ANOVA class on matched data""" - np.random.seed(987654321) - x_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - a = 0.05 - self.assertRaises(NoDataError, lambda: Anova(x_input_array, alpha=a, display=False).p_value) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_correlation.py b/build/lib/sci_analysis/test/test_correlation.py deleted file mode 100644 index 3671c02..0000000 --- a/build/lib/sci_analysis/test/test_correlation.py +++ /dev/null @@ -1,211 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..analysis import Correlation -from ..analysis.exc import MinimumSizeError, NoDataError -from ..data import UnequalVectorLengthError, Vector - - -class MyTestCase(unittest.TestCase): - def test_Correlation_corr_pearson(self): - """Test the Correlation class for correlated normally distributed data""" - np.random.seed(987654321) - x_input_array = list(st.norm.rvs(size=100)) - y_input_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in x_input_array]) - alpha = 0.05 - output = """ - -Pearson Correlation Coefficient -------------------------------- - -alpha = 0.0500 -r value = 0.8904 -p value = 0.0000 - -HA: There is a significant relationship between predictor and response -""" - exp = Correlation(x_input_array, y_input_array, alpha=alpha, display=False) - self.assertLess(exp.p_value, alpha, "FAIL: Correlation pearson Type II error") - self.assertEqual(exp.test_type, 'pearson') - self.assertAlmostEqual(exp.r_value, 0.8904, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertAlmostEqual(exp.statistic, 0.8904, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_Correlation_no_corr_pearson(self): - """Test the Correlation class for uncorrelated normally distributed data""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(size=100) - y_input_array = st.norm.rvs(size=100) - alpha = 0.05 - output = """ - -Pearson Correlation Coefficient -------------------------------- - -alpha = 0.0500 -r value = -0.0055 -p value = 0.9567 - -H0: There is no significant relationship between predictor and response -""" - exp = Correlation(x_input_array, y_input_array, alpha=alpha, display=False) - self.assertGreater(exp.p_value, alpha, "FAIL: Correlation pearson Type I error") - self.assertEqual(exp.test_type, 'pearson') - self.assertAlmostEqual(exp.r_value, -0.0055, delta=0.0001) - self.assertAlmostEqual(exp.statistic, -0.0055, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.9567, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_Correlation_corr_spearman(self): - """Test the Correlation class for correlated randomly distributed data""" - np.random.seed(987654321) - x_input_array = list(st.weibull_min.rvs(1.7, size=100)) - y_input_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in x_input_array]) - alpha = 0.05 - output = """ - -Spearman Correlation Coefficient --------------------------------- - -alpha = 0.0500 -r value = 0.7271 -p value = 0.0000 - -HA: There is a significant relationship between predictor and response -""" - exp = Correlation(x_input_array, y_input_array, alpha=alpha, display=False) - self.assertLess(exp.p_value, alpha, "FAIL: Correlation spearman Type II error") - self.assertEqual(exp.test_type, 'spearman') - self.assertAlmostEqual(exp.r_value, 0.7271, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertAlmostEqual(exp.statistic, 0.7271, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_Correlation_no_corr_spearman(self): - """Test the Correlation class for uncorrelated randomly distributed data""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(size=100) - y_input_array = st.weibull_min.rvs(1.7, size=100) - alpha = 0.05 - output = """ - -Spearman Correlation Coefficient --------------------------------- - -alpha = 0.0500 -r value = -0.0528 -p value = 0.6021 - -H0: There is no significant relationship between predictor and response -""" - exp = Correlation(x_input_array, y_input_array, alpha=alpha, display=False) - self.assertGreater(exp.p_value, alpha, "FAIL: Correlation spearman Type I error") - self.assertEqual(exp.test_type, 'spearman') - self.assertAlmostEqual(exp.r_value, -0.0528, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.6021, delta=0.0001) - self.assertAlmostEqual(exp.statistic, -0.0528, delta=0.0001) - self.assertTrue(np.array_equal(x_input_array, exp.xdata)) - self.assertTrue(np.array_equal(x_input_array, exp.predictor)) - self.assertTrue(np.array_equal(y_input_array, exp.ydata)) - self.assertTrue(np.array_equal(y_input_array, exp.response)) - self.assertEqual(str(exp), output) - - def test_Correlation_no_corr_pearson_just_above_min_size(self): - """Test the Correlation class for uncorrelated normally distributed data just above the minimum size""" - np.random.seed(987654321) - alpha = 0.05 - self.assertTrue(Correlation(st.norm.rvs(size=4), - st.norm.rvs(size=4), - alpha=alpha, - display=False).p_value, - "FAIL: Correlation pearson just above minimum size") - - def test_Correlation_no_corr_pearson_at_min_size(self): - """Test the Correlation class for uncorrelated normally distributed data at the minimum size""" - np.random.seed(987654321) - alpha = 0.05 - self.assertRaises(MinimumSizeError, lambda: Correlation(st.norm.rvs(size=3), - st.norm.rvs(size=3), - alpha=alpha, - display=False).p_value) - - def test_Correlation_no_corr_pearson_unequal_vectors(self): - """Test the Correlation class for uncorrelated normally distributed data with unequal vectors""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=87) - y_input_array = st.norm.rvs(size=100) - self.assertRaises(UnequalVectorLengthError, lambda: Correlation(x_input_array, y_input_array, - alpha=alpha, - display=False).p_value) - - def test_Correlation_no_corr_pearson_empty_vector(self): - """Test the Correlation class for uncorrelated normally distributed data with an empty vector""" - np.random.seed(987654321) - alpha = 0.05 - self.assertRaises(NoDataError, lambda: Correlation(["one", "two", "three", "four", "five"], - st.norm.rvs(size=5), - alpha=alpha, - display=False).p_value) - - def test_Correlation_vector(self): - """Test the Correlation class with an input Vector""" - np.random.seed(987654321) - x_input_array = list(st.norm.rvs(size=100)) - y_input_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in x_input_array]) - alpha = 0.05 - output = """ - -Pearson Correlation Coefficient -------------------------------- - -alpha = 0.0500 -r value = 0.8904 -p value = 0.0000 - -HA: There is a significant relationship between predictor and response -""" - exp = Correlation(Vector(x_input_array, other=y_input_array), alpha=alpha, display=False) - self.assertLess(exp.p_value, alpha, "FAIL: Correlation pearson Type II error") - self.assertEqual(exp.test_type, 'pearson') - self.assertAlmostEqual(exp.r_value, 0.8904, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertAlmostEqual(exp.statistic, 0.8904, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_Correlation_vector_alpha(self): - """Test the Correlation class with an input Vector and different alpha""" - np.random.seed(987654321) - x_input_array = list(st.norm.rvs(size=100)) - y_input_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in x_input_array]) - alpha = 0.01 - output = """ - -Pearson Correlation Coefficient -------------------------------- - -alpha = 0.0100 -r value = 0.8904 -p value = 0.0000 - -HA: There is a significant relationship between predictor and response -""" - exp = Correlation(Vector(x_input_array, other=y_input_array), alpha=alpha, display=False) - self.assertLess(exp.p_value, alpha, "FAIL: Correlation pearson Type II error") - self.assertEqual(exp.test_type, 'pearson') - self.assertAlmostEqual(exp.r_value, 0.8904, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertAlmostEqual(exp.statistic, 0.8904, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_Correlation_missing_ydata(self): - """Test the case where no ydata is given.""" - np.random.seed(987654321) - x_input_array = range(1, 101) - self.assertRaises(AttributeError, lambda: Correlation(x_input_array)) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_data_operations.py b/build/lib/sci_analysis/test/test_data_operations.py deleted file mode 100644 index 0064b71..0000000 --- a/build/lib/sci_analysis/test/test_data_operations.py +++ /dev/null @@ -1,674 +0,0 @@ -import unittest - -import numpy as np -import numpy.ma as ma -import pandas as pd - -from ..data import (is_array, is_dict, is_dict_group, is_group, is_iterable, is_tuple, to_float, flatten, is_series, - Vector, is_data, is_vector, is_numeric, is_number) - - -class MyTestCase(unittest.TestCase): - inputs = { - 'num': 3, - 'float': 1.34, - 'string': "hello", - 'num_string': '1.34', - 'char': "h", - 'none': None, - 'list': [1, 2, 3, 4, 5], - 'num_list': ["1", "2", "3", "4", "5"], - 'mixed_list': [1, 2.00, "3", "four", '5'], - 'zero_len_list': [], - 'multiple_dim_list': [[1, 2, 3], [4, 5, 6]], - 'tuple': (1, 2, 3, 4, 5), - 'num_tuple': ("1", "2", "3", "4", "5"), - 'mixed_tuple': (1, 2, "3", "four", '5'), - 'dict': {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}, - 'array': np.array([1, 2, 3, 4, 5]), - 'float_array': np.array([1.0, 2.0, 3.0, 4.0, 5.0]), - 'nan_array': np.array([1, float("nan"), 3, float("nan"), 5], dtype='float'), - 'negative_array': np.array([-1, 2.0, -3.00, 0, -5]), - 'masked_array': ma.masked_array([1, 2, 3, 4, 5], mask=[0, 1, 1, 0, 0]), - 'multi_dim_array': np.array([[1, 2, 3], [4, 5, 6]]), - 'scalar_array': np.array(3), - 'zero_len_array': np.array([]), - 'empty_array': np.empty(5), - 'vector': Vector([1, 2, 3, 4, 5]), - 'series': pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]), - 'dict_series': pd.Series({1: 1.0, 2: 2.0, 3: 3.0, 4: 4.0, 5: 5.0}), - 'large_array': np.random.rand(500), - 'large_list': range(500), - 'group': [np.random.rand(50), np.random.rand(50) * 2, np.random.rand(50) * 3], - 'group_of_lists': [range(5), range(6, 10), range(11, 15)], - 'dict_of_lists': {'a': range(1, 5), 'b': range(6, 10), 'c': range(11, 15)} - } - - ans_array = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 1, - 'float_array': 1, - 'nan_array': 1, - 'negative_array': 1, - 'masked_array': 1, - 'multi_dim_array': 1, - 'scalar_array': 1, - 'zero_len_array': 1, - 'empty_array': 1, - 'vector': 0, - 'series': 1, - 'dict_series': 1, - 'large_array': 1, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 0 - } - - ans_dict = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 1, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 0, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 1 - } - - ans_iterable = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 1, - 'num_list': 1, - 'mixed_list': 1, - 'zero_len_list': 1, - 'multiple_dim_list': 1, - 'tuple': 1, - 'num_tuple': 1, - 'mixed_tuple': 1, - 'dict': 1, - 'array': 1, - 'float_array': 1, - 'nan_array': 1, - 'negative_array': 1, - 'masked_array': 1, - 'multi_dim_array': 1, - 'scalar_array': 0, - 'zero_len_array': 1, - 'empty_array': 1, - 'vector': 1, - 'series': 1, - 'dict_series': 1, - 'large_array': 1, - 'large_list': 1, - 'group': 1, - 'group_of_lists': 1, - 'dict_of_lists': 1 - } - - ans_tuple = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 1, - 'num_tuple': 1, - 'mixed_tuple': 1, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 0, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 0 - } - - ans_data = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 1, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 0 - } - - ans_vector = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 1, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 0 - } - - ans_group = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 1, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 1, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 0, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 1, - 'group_of_lists': 1, - 'dict_of_lists': 0 - } - - ans_dict_group = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 0, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 1 - } - - ans_series = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 0, - 'series': 1, - 'dict_series': 1, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 0 - } - - ans_numeric = { - 'num': 0, - 'float': 0, - 'num_string': 0, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 0, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 1, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 0 - } - - ans_number = { - 'num': 1, - 'float': 1, - 'num_string': 1, - 'string': 0, - 'char': 0, - 'none': 0, - 'list': 0, - 'num_list': 0, - 'mixed_list': 0, - 'zero_len_list': 0, - 'multiple_dim_list': 0, - 'tuple': 0, - 'num_tuple': 0, - 'mixed_tuple': 0, - 'dict': 0, - 'array': 0, - 'float_array': 0, - 'nan_array': 0, - 'negative_array': 0, - 'masked_array': 0, - 'multi_dim_array': 0, - 'scalar_array': 1, - 'zero_len_array': 0, - 'empty_array': 0, - 'vector': 0, - 'series': 0, - 'dict_series': 0, - 'large_array': 0, - 'large_list': 0, - 'group': 0, - 'group_of_lists': 0, - 'dict_of_lists': 0 - } - - # Test logic tests - - def test_001_is_array(self): - """Tests the is_array method""" - eval_array = {} - print("") - print("is_array test") - print("-" * 80) - for name, test in self.inputs.items(): - try: - assert is_array(test) - print("PASS: " + name) - eval_array[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_array[name] = 0 - # self.assertTrue(eval_array == self.ans_array, "FAIL: is_array test") - self.assertDictEqual(eval_array, self.ans_array, "FAIL: is_array test") - - def test_002_is_dict(self): - """Tests the is_dict method""" - eval_dict = {} - print("") - print("is_dict test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_dict(test) - print("PASS: " + name) - eval_dict[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_dict[name] = 0 - self.assertTrue(eval_dict == self.ans_dict, "FAIL: is_dict test") - - def test_003_is_iterable(self): - """Tests the is_iterable method""" - eval_iterable = {} - print("") - print("is_iterable test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_iterable(test) - print("PASS: " + name) - eval_iterable[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_iterable[name] = 0 - self.assertTrue(eval_iterable == self.ans_iterable, "FAIL: is_iterable test") - - def test_004_is_tuple(self): - """Tests the is_tuple method""" - eval_tuple = {} - print("") - print("is_tuple test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_tuple(test) - print("PASS: " + name) - eval_tuple[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_tuple[name] = 0 - self.assertTrue(eval_tuple == self.ans_tuple, "FAIL: is_tuple test") - - def test_005_is_data(self): - """Tests the is_data method""" - eval_data = {} - print("") - print("is_data test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_data(test) - print("PASS: " + name) - eval_data[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_data[name] = 0 - self.assertTrue(eval_data == self.ans_data, "FAIL: is_data test") - - def test_006_is_vector(self): - """Tests the is_vector method""" - eval_vector = {} - print("") - print("is_vector test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_vector(test) - print("PASS: " + name) - eval_vector[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_vector[name] = 0 - self.assertTrue(eval_vector == self.ans_vector, "FAIL: is_vector test") - - def test_007_is_group(self): - """Tests the is_group method""" - eval_group = {} - print("") - print("is_group test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_group(test) - print("PASS: " + name) - eval_group[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_group[name] = 0 - self.assertTrue(eval_group == self.ans_group, "FAIL: is_group test") - - def test_008_is_dict_group(self): - """Test the is_dict_group method""" - eval_dict_group = {} - print("") - print("is_dict_group test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_dict_group(test) - print("PASS: " + name) - eval_dict_group[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_dict_group[name] = 0 - self.assertTrue(eval_dict_group == self.ans_dict_group, "FAIL: is_dict_group test") - - def test_009_is_series(self): - """Test the is_series method""" - eval_series = {} - print("") - print("is_series test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_series(test) - print("PASS: " + name) - eval_series[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_series[name] = 0 - # self.assertTrue(eval_dict_group == self.ans_series, "FAIL: is_dict_group test") - self.assertDictEqual(eval_series, self.ans_series, "FAIL: is_series test") - - def test_010_is_numeric(self): - """Tests the is_numeric method""" - eval_numeric = {} - print("") - print("is_numeric test") - print("-" * 70) - for name, test in self.inputs.items(): - try: - assert is_numeric(test) - print("PASS: " + name) - eval_numeric[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_numeric[name] = 0 - self.assertTrue(eval_numeric == self.ans_numeric, "FAIL: is_numeric test") - - def test_011_is_number(self): - """Test the is_number function""" - eval_numeric = {} - for name, test in self.inputs.items(): - try: - assert is_number(test) - print("PASS: " + name) - eval_numeric[name] = 1 - except AssertionError: - print("FAIL: " + name) - eval_numeric[name] = 0 - self.assertTrue(eval_numeric == self.ans_number, "FAIL: is_number test") - - def test_050_to_float_list(self): - """Test the to_float int list conversion""" - input_float = range(5) - out_float = [0.0, 1.0, 2.0, 3.0, 4.0] - self.assertEqual(to_float(input_float), out_float, "FAIL: Error to_float int list") - - def test_051_to_float_quoted_list(self): - """Test the to_float string quoted num list conversion""" - input_float = ["1", "2", "3.0", "4.5", "5.65"] - out_float = [1.0, 2.0, 3.0, 4.5, 5.65] - self.assertEqual(to_float(input_float), out_float, "FAIL: Error to_float quoted string num list") - - def test_052_to_float_str_list(self): - """Test the to_float string list conversion""" - input_float = ["one", "two", "three", "four", "five"] - out_float = [float("nan")] * 5 - self.assertTrue(np.array_equal(np.isnan(to_float(input_float)), np.isnan(out_float)), - "FAIL: Error to_float string list") - - def test_053_to_float_mixed_list(self): - """Test the to_float mixed list conversion""" - input_float = [1, "two", "3.0", 4.1, "5.65"] - out_float = [1.0, float("nan"), 3.0, 4.1, 5.65] - self.assertEqual([y for y in to_float(input_float) if not np.isnan(y)], - [x for x in out_float if not np.isnan(x)], - "FAIL: Error to_float mixed list") - - def test_054_to_float_missing_val_list(self): - """Test the to_float missing val list conversion""" - input_float = ["1.4", "", 3.0, 4, ""] - out_float = [1.4, float("nan"), 3.0, 4, float("nan")] - self.assertEqual([y for y in to_float(input_float) if not np.isnan(y)], - [x for x in out_float if not np.isnan(x)], - "FAIL: Error to_float missing val list") - - def test_055_to_float_empty_list(self): - """Test the to_float empty list conversion""" - input_float = [] - out_float = [] - self.assertEqual(to_float(input_float), out_float, "FAIL: Error to_float empty list") - - # Test flatten function - - def test_060_flatten_2_dim(self): - """Test the flatten method on a 2 dim array""" - input_flatten = [[1, 2, 3], [4, 5, 6]] - out_flatten = [1, 2, 3, 4, 5, 6] - self.assertTrue(np.array_equal(flatten(input_flatten), out_flatten), "FAIL: Error in flatten 2dim") - - def test_061_flatten_3_dim(self): - """Test the flatten method on a 3 dim array""" - input_flatten = [[[1, 2, 3], [4, 5, 6]], [[11, 12, 13], [14, 15, 16]]] - out_flatten = [1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16] - self.assertTrue(np.array_equal(flatten(input_flatten), out_flatten), "FAIL: Error in flatten 3dim") - - def test_062_flatten_4_dim(self): - """Test the flatten method on a 4 dim array""" - input_flatten = [[[[1, 2, 3], [4, 5, 6]], [[11, 12, 13], [14, 15, 16]]], - [[[111, 112, 113], [114, 115, 116]], [[1111, 1112, 1113], [1114, 1115, 1116]]]] - out_flatten = [1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16, - 111, 112, 113, 114, 115, 116, 1111, 1112, 1113, 1114, 1115, 1116] - self.assertTrue(np.array_equal(flatten(input_flatten), out_flatten), "FAIL: Error in flatten 4dim") - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_equalvariance.py b/build/lib/sci_analysis/test/test_equalvariance.py deleted file mode 100644 index 5f52a74..0000000 --- a/build/lib/sci_analysis/test/test_equalvariance.py +++ /dev/null @@ -1,251 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..analysis import EqualVariance -from ..analysis.exc import MinimumSizeError, NoDataError - - -class MyTestCase(unittest.TestCase): - def test_450_EqualVariance_Bartlett_matched(self): - """Test the EqualVariance class for normally distributed matched variances""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 0.75] - z_parms = [4, 0.75] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*y_parms, size=100) - z_input_array = st.norm.rvs(*z_parms, size=100) - a = 0.05 - exp = EqualVariance(x_input_array, y_input_array, z_input_array, alpha=a, display=False) - output = """ - -Bartlett Test -------------- - -alpha = 0.0500 -T value = 0.2264 -p value = 0.8930 - -H0: Variances are equal -""" - self.assertGreater(exp.p_value, a, "FAIL: Equal variance Bartlett Type I error") - self.assertEqual(exp.test_type, 'Bartlett') - self.assertAlmostEqual(exp.statistic, 0.2264, delta=0.0001) - self.assertAlmostEqual(exp.t_value, 0.2264, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.893, delta=0.001) - self.assertEqual(str(exp), output) - - def test_452_EqualVariance_Bartlett_unmatched(self): - """Test the EqualVariance class for normally distributed unmatched variances""" - np.random.seed(987654321) - x_parms = [4, 1.35] - y_parms = [4, 1.35] - z_parms = [4, 0.75] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*y_parms, size=100) - z_input_array = st.norm.rvs(*z_parms, size=100) - a = 0.05 - exp = EqualVariance(x_input_array, y_input_array, z_input_array, alpha=a, display=True) - output = """ - -Bartlett Test -------------- - -alpha = 0.0500 -T value = 43.0402 -p value = 0.0000 - -HA: Variances are not equal -""" - self.assertLess(exp.p_value, a, "FAIL: Equal variance bartlett Type II error") - self.assertEqual(exp.test_type, 'Bartlett') - self.assertAlmostEqual(exp.statistic, 43.0402, delta=0.0001) - self.assertAlmostEqual(exp.t_value, 43.0402, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_456_EqualVariance_Bartlett_unmatched_w_value(self): - """Test the EqualVariance class for normally distributed unmatched variances""" - np.random.seed(987654321) - x_parms = [4, 1.35] - y_parms = [4, 1.35] - z_parms = [4, 0.75] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*y_parms, size=100) - z_input_array = st.norm.rvs(*z_parms, size=100) - a = 0.05 - self.assertRaises(KeyError, lambda: EqualVariance(x_input_array, y_input_array, z_input_array, - alpha=a, - display=False).w_value) - - def test_457_EqualVariance_Bartlett_single_argument(self): - """Test the EqualVariance class for normally distributed unmatched variances""" - np.random.seed(987654321) - x_parms = [4, 1.35] - x_input_array = st.norm.rvs(*x_parms, size=100) - a = 0.05 - self.assertRaises(NoDataError, lambda: EqualVariance(x_input_array, alpha=a, display=False).p_value) - - def test_458_EqualVariance_Levene_matched(self): - """Test the EqualVariance class for non-normally distributed matched variances""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - z_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - y_input_array = st.weibull_min.rvs(*y_parms, size=100) - z_input_array = st.weibull_min.rvs(*z_parms, size=100) - a = 0.05 - exp = EqualVariance(x_input_array, y_input_array, z_input_array, alpha=a, display=False) - output = """ - -Levene Test ------------ - -alpha = 0.0500 -W value = 1.7545 -p value = 0.1748 - -H0: Variances are equal -""" - self.assertGreater(exp.p_value, a, "FAIL: Unequal variance levene Type I error") - self.assertEqual(exp.test_type, 'Levene') - self.assertAlmostEqual(exp.statistic, 1.7545, delta=0.0001) - self.assertAlmostEqual(exp.w_value, 1.7545, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.1748, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_460_EqualVariance_Levene_unmatched(self): - """Test the EqualVariance class for non-normally distributed unmatched variances""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [4, 0.75] - z_parms = [1.7] - a = 0.05 - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*y_parms, size=100) - z_input_array = st.weibull_min.rvs(*z_parms, size=100) - exp = EqualVariance(x_input_array, y_input_array, z_input_array, alpha=a, display=True) - output = """ - -Levene Test ------------ - -alpha = 0.0500 -W value = 11.2166 -p value = 0.0000 - -HA: Variances are not equal -""" - self.assertLess(exp.p_value, a, "FAIL: Unequal variance levene Type II error") - self.assertEqual(exp.test_type, 'Levene') - self.assertAlmostEqual(exp.statistic, 11.2166, delta=0.0001) - self.assertAlmostEqual(exp.w_value, 11.2166, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_463_EqualVariance_Levene_single_argument(self): - """Test the EqualVariance class for normally distributed unmatched variances""" - np.random.seed(987654321) - x_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - a = 0.05 - self.assertRaises(NoDataError, lambda: EqualVariance(x_input_array, alpha=a, display=False).p_value) - - def test_464_EqualVariance_Levene_unmatched_t_value(self): - """Test the EqualVariance class for non-normally distributed unmatched variances""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [4, 0.75] - z_parms = [1.7] - a = 0.05 - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*y_parms, size=100) - z_input_array = st.weibull_min.rvs(*z_parms, size=100) - self.assertRaises(KeyError, lambda: EqualVariance(x_input_array, - y_input_array, - z_input_array, - alpha=a, - display=False).t_value) - - def test_465_EqualVariance_Bartlett_matched_just_above_min_size(self): - """Test the EqualVariance class for normally distributed matched variances just above min size""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 0.75] - z_parms = [4, 0.75] - x_input_array = st.norm.rvs(*x_parms, size=3) - y_input_array = st.norm.rvs(*y_parms, size=3) - z_input_array = st.norm.rvs(*z_parms, size=3) - a = 0.05 - exp = EqualVariance(x_input_array, y_input_array, z_input_array, alpha=a, display=False) - output = """ - -Bartlett Test -------------- - -alpha = 0.0500 -T value = 0.0785 -p value = 0.9615 - -H0: Variances are equal -""" - self.assertGreater(exp.p_value, a, "FAIL: Equal variance Bartlett just above min size") - self.assertEqual(exp.test_type, 'Bartlett') - self.assertAlmostEqual(exp.statistic, 0.0785, delta=0.0001) - self.assertAlmostEqual(exp.t_value, 0.0785, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.9615, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_466_EqualVariance_Bartlett_matched_at_min_size(self): - """Test the EqualVariance class for normally distributed matched variances at min size""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 0.75] - z_parms = [4, 0.75] - x_input_array = st.norm.rvs(*x_parms, size=2) - y_input_array = st.norm.rvs(*y_parms, size=9) - z_input_array = st.norm.rvs(*z_parms, size=47) - a = 0.05 - self.assertTrue(MinimumSizeError, lambda: EqualVariance(x_input_array, y_input_array, z_input_array, - alpha=a, - display=False).p_value) - - def test_467_EqualVariance_Bartlett_matched_single_empty_vector(self): - """Test the EqualVariance class for normally distributed matched variances single empty vector""" - np.random.seed(987654321) - x_parms = [4, 0.75] - z_parms = [4, 0.75] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = ["one", "two", "three", "four", "five"] - z_input_array = st.norm.rvs(*z_parms, size=100) - a = 0.05 - exp = EqualVariance(x_input_array, y_input_array, z_input_array, alpha=a, display=False) - output = """ - -Bartlett Test -------------- - -alpha = 0.0500 -T value = 0.0374 -p value = 0.8466 - -H0: Variances are equal -""" - self.assertGreater(exp.p_value, a) - self.assertEqual(str(exp), output) - - def test_466_EqualVariance_Bartlett_all_empty_vectors(self): - """Test the EqualVariance class for normally distributed matched variances with all empty vectors""" - np.random.seed(987654321) - x_input_array = [np.nan, np.nan, np.nan, "four", np.nan] - y_input_array = ["one", "two", "three", "four", "five"] - a = 0.05 - self.assertTrue(NoDataError, lambda: EqualVariance(x_input_array, y_input_array, - alpha=a, - display=False).p_value) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_graph_boxplots.py b/build/lib/sci_analysis/test/test_graph_boxplots.py deleted file mode 100644 index 7df27be..0000000 --- a/build/lib/sci_analysis/test/test_graph_boxplots.py +++ /dev/null @@ -1,735 +0,0 @@ -import unittest -import numpy as np -import pandas as pd -import scipy.stats as st -from os import path, getcwd -from warnings import catch_warnings, simplefilter - -from ..graphs import GraphBoxplot -from .. data import Vector -from ..analysis.exc import NoDataError - - -class TestWarnings(unittest.TestCase): - """A TestCase subclass with assertWarns substitute to cover python 2.7 which doesn't have an assertWarns method.""" - - def assertWarnsCrossCompatible(self, expected_warning, *args, **kwargs): - with catch_warnings(record=True) as warning_list: - simplefilter('always') - callable_obj = args[0] - args = args[1:] - callable_obj(*args, **kwargs) - self.assertTrue(any(item.category == expected_warning for item in warning_list)) - - -class MyTestCase(TestWarnings): - - @property - def save_path(self): - if getcwd().split('/')[-1] == 'test': - return './images/' - elif getcwd().split('/')[-1] == 'sci_analysis': - if path.exists('./setup.py'): - return './sci_analysis/test/images/' - else: - return './test/images/' - else: - './' - - def test_100_boxplot_2_default(self): - """Generate a boxplot graph with default settings""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - save_to='{}test_box_100'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_101_boxplot_2_no_nqp(self): - """Generate a boxplot graph with no nqp""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - nqp=False, - save_to='{}test_box_101'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_102_boxplot_2_weird_variance(self): - """Generate a boxplot graph with small and large variance""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(0, 0.1, size=2000) - input_2_array = st.norm.rvs(1, 8, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - save_to='{}test_box_102'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_103_boxplot_2_groups(self): - """Generate a boxplot graph with set group names""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - groups=('Group 1', 'Group 2'), - save_to='{}test_box_103'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_104_boxplot_2_names_title(self): - """Generate a boxplot graph with set xname, yname and title""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - xname='Test Groups', - yname='Test Data', - title='Title Test', - save_to='{}test_box_104'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_105_boxplot_2_diff_size(self): - """Generate a boxplot graph with different sizes""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(0, 5, size=1234) - input_2_array = st.norm.rvs(0, 5, size=56) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - title='Diff Size', - save_to='{}test_box_105'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_106_boxplot_2_diff_size_diff_disto(self): - """Generate a boxplot graph with different sizes and different distributions""" - np.random.seed(987654321) - input_1_array = st.weibull_min.rvs(2, size=1234) - input_2_array = st.norm.rvs(0, size=56) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - title='Diff Size, Diff Distribution', - save_to='{}test_box_106'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_107_boxplot_2_diff_size_diff_disto_dict(self): - """Generate a boxplot graph with different sizes and different distributions as a dict""" - np.random.seed(987654321) - input_1_array = st.weibull_min.rvs(2, size=1234) - input_2_array = st.norm.rvs(0, size=56) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot({'Group 1': input_1_array, 'Group 2': input_2_array}, - title='Diff Size, Diff Distribution Dict', - save_to='{}test_box_107'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_108_boxplot_2_size_4(self): - """Generate a boxplot graph with size 4""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(1, size=4) - input_2_array = st.norm.rvs(size=4) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot({'Group 1': input_1_array, 'Group 2': input_2_array}, - title='Size 4', - save_to='{}test_box_108'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_109_boxplot_2_at_min_size(self): - """Generate a boxplot graph with size 2""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2) - input_2_array = st.norm.rvs(size=3) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot({'Group 1': input_1_array, 'Group 2': input_2_array}, - title='At Min Size', - save_to='{}test_box_109'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_110_boxplot_2_min_size(self): - """Catch the min size case""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=1) - input_2_array = st.norm.rvs(size=2) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array)]) - res = GraphBoxplot({'Group 1': input_1_array, 'Group 2': input_2_array}, - title='Single point', - save_to='{}test_box_110'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median([np.median(input_1_array), np.median(input_2_array)])) - - def test_111_boxplot_2_missing_data(self): - """Generate a boxplot with missing data""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - indicies_x = list(np.random.randint(0, 1999, 200)) - indicies_y = list(np.random.randint(0, 1999, 200)) - for i in indicies_x: - input_1_array = np.insert(input_1_array, i, np.nan, axis=0) - for i in indicies_y: - input_2_array = np.insert(input_2_array, i, np.nan, axis=0) - gmean = np.nanmean((np.nanmean(input_1_array), np.nanmean(input_2_array))) - gmedian = np.nanmedian([np.nanmedian(input_1_array), np.nanmedian(input_2_array)]) - res = GraphBoxplot(input_1_array, input_2_array, - title='Random Missing Data', - save_to='{}test_box_111'.format(self.save_path)) - self.assertTrue(res) - self.assertAlmostEqual(gmean, res.grand_mean((np.nanmean(input_1_array), np.nanmean(input_2_array))), 4) - self.assertAlmostEqual(gmedian, res.grand_median([np.nanmedian(input_1_array), np.nanmedian(input_2_array)]), 4) - - def test_112_boxplot_2_empty_arrays(self): - """Catch the case where both arrays are empty""" - np.random.seed(987654321) - input_1_array = np.array([]) - input_2_array = np.array([]) - self.assertRaises(NoDataError, lambda: GraphBoxplot(input_1_array, input_2_array)) - - def test_113_boxplot_2_empty_lists(self): - """Catch the case where both lists are empty""" - np.random.seed(987654321) - input_1_array = [] - input_2_array = [] - self.assertRaises(NoDataError, lambda: GraphBoxplot(input_1_array, input_2_array)) - - def test_114_boxplot_2_strings(self): - """Generate a boxplot graph with 2 string lists""" - np.random.seed(987654321) - input_1_array = ["this", '2', 'is', '4.0', 'a', '6', 'string'] - input_2_array = ['3.0', "here's", '6', 'a', '9.0', 'string'] - gmean = np.mean((np.mean([2, 4, 6]), np.mean([3, 6, 9]))) - gmedian = np.median((np.median([2, 4, 6]), np.median([3, 6, 9]))) - res = GraphBoxplot(input_1_array, input_2_array, - title='String test', - save_to='{}test_box_114'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean([2, 4, 6]), np.mean([3, 6, 9])))) - self.assertEqual(gmedian, res.grand_median((np.median([2, 4, 6]), np.median([3, 6, 9])))) - - def test_115_boxplot_2_2dim_array(self): - """Generate a boxplot graph with 2 2dim arrays""" - np.random.seed(987654321) - input_1_array = np.array([[1, 2, 3], [4, 5, 6]]) - input_2_array = np.array([[3, 4, 5], [6, 7, 8]]) - gmean = np.nanmean((np.nanmean(input_1_array, axis=None), np.nanmean(input_2_array, axis=None))) - gmedian = np.nanmedian([np.nanmedian(input_1_array, axis=None), np.nanmedian(input_2_array, axis=None)]) - res = GraphBoxplot(input_1_array, input_2_array, - title='2dim Array', - save_to='{}test_box_115'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.nanmean(input_1_array, axis=None), - np.nanmean(input_2_array, axis=None)))) - self.assertEqual(gmedian, res.grand_median([np.nanmedian(input_1_array, axis=None), - np.nanmedian(input_2_array, axis=None)])) - - def test_116_boxplot_2_3dim_array(self): - """Generate a boxplot graph with 2 3dim arrays""" - np.random.seed(987654321) - input_1_array = np.array([[[1, 2, 3], [3, 4, 5]], [[6, 7, 8], [8, 9, 10]]]) - input_2_array = np.array([[[2, 3, 4], [5, 6, 7]], [[7, 8, 9], [10, 11, 12]]]) - gmean = np.nanmean((np.nanmean(input_1_array, axis=None), np.nanmean(input_2_array, axis=None))) - gmedian = np.nanmedian([np.nanmedian(input_1_array, axis=None), np.nanmedian(input_2_array, axis=None)]) - res = GraphBoxplot(input_1_array, input_2_array, - title='3dim Array', - save_to='{}test_box_116'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.nanmean(input_1_array, axis=None), - np.nanmean(input_2_array, axis=None)))) - self.assertEqual(gmedian, res.grand_median([np.nanmedian(input_1_array, axis=None), - np.nanmedian(input_2_array, axis=None)])) - - def test_117_boxplot_2_3dim_list(self): - """Generate a boxplot graph with 2 3dim lists""" - np.random.seed(987654321) - input_1_array = [[['1', 'two', '3'], ['4', '5', 'six']], [['7', '8', '9'], ['ten', '11', '12']]] - input_2_array = [[['one', '2', '3'], ['four', '5', '6']], [['7', '8', '9'], ['ten', '11', '12']]] - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, - title='String Array Test', - save_to='{}test_box_117'.format(self.save_path))) - - def test_118_boxplot_4_default(self): - """Generate a boxplot graph with 4 arrays and default settings""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array), np.mean(input_3_array), - np.mean(input_4_array))) - gmedian = np.median([np.median(input_1_array), np.median(input_2_array), np.median(input_3_array), - np.median(input_4_array)]) - res = GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - save_to='{}test_box_118'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, np.mean((np.mean(input_1_array), np.mean(input_2_array), - np.mean(input_3_array), np.mean(input_4_array)))) - self.assertEqual(gmedian, np.median((np.median(input_1_array), np.median(input_2_array), - np.median(input_3_array), np.median(input_4_array)))) - - def test_119_boxplot_4_no_nqp(self): - """Generate a boxplot graph with 4 arrays and no nqp""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array), - np.mean(input_3_array), np.mean(input_4_array))) - gmedian = np.median((np.median(input_1_array), np.median(input_2_array), - np.median(input_3_array), np.median(input_4_array))) - res = GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - nqp=False, - save_to='{}test_box_119'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, np.mean((np.mean(input_1_array), np.mean(input_2_array), - np.mean(input_3_array), np.mean(input_4_array)))) - self.assertEqual(gmedian, np.median((np.median(input_1_array), np.median(input_2_array), - np.median(input_3_array), np.median(input_4_array)))) - - def test_120_boxplot_4_no_nqp_groups(self): - """Generate a boxplot graph with 4 arrays, no nqp and set groups""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - nqp=False, - groups=('Group 1', 'Group 2', 'Group 3', 'Group 4'), - save_to='{}test_box_120'.format(self.save_path))) - - def test_121_boxplot_4_no_nqp_dict(self): - """Generate a boxplot graph with 4 arrays from a dict and no nqp""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - self.assertTrue(GraphBoxplot({'Group 1': input_1_array, 'Group 2': input_2_array, 'Group 3': input_3_array, - 'Group 4': input_4_array}, - nqp=True, - save_to='{}test_box_121'.format(self.save_path))) - - def test_122_boxplot_4_empty_array(self): - """Generate a boxplot graph with 1 empty array""" - np.random.seed(987654321) - # TODO: Note in the documentation that if an array is ignored this way, the auto-number isn't skipped now. - input_1_array = st.norm.rvs(size=2000) - input_2_array = np.array([]) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - title='1 Missing Array', - save_to='{}test_box_122'.format(self.save_path))) - - def test_123_boxplot_4_2_empty_arrays(self): - """Generate a boxplot graph with 2 empty arrays""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = np.array([]) - input_3_array = [] - input_4_array = st.weibull_min.rvs(1.4, size=2000) - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - title='2 Missing Arrays', - save_to='{}test_box_123'.format(self.save_path))) - - def test_124_boxplot_4_all_empty(self): - """Catch the case where all arrays are empty""" - np.random.seed(987654321) - input_1_array = ['this', 'is', 'an', 'array'] - input_2_array = ['this', 'is', 'another', 'array'] - input_3_array = ['this', 'is', 'not', 'the', 'array', "you're", 'looking', 'for'] - input_4_array = ['and', 'nope'] - self.assertTrue(NoDataError, lambda: GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array)) - - def test_125_boxplot_4_strings(self): - """Generate a boxplot graph from strings""" - np.random.seed(987654321) - input_1_array = ["this", '2', 'is', '4.0', 'a', '6', 'string'] - input_2_array = ['3.0', "here's", '6', 'a', '9.0', 'string'] - input_3_array = ['1', '2', '2', 'two', '3', '3', '3', '4'] - input_4_array = ['4', '4', 'four', '4', 'five', '1'] - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - title='4 Arrays Strings', - save_to='{}test_box_125'.format(self.save_path))) - - def test_126_boxplot_14_default(self): - """Generate a boxplot graph with 14 arrays""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=1847) - input_3_array = st.norm.rvs(0.5, 0.5, size=1134) - input_4_array = st.norm.rvs(0, 0.1, size=962) - input_5_array = st.weibull_min.rvs(1.2, size=2000) - input_6_array = st.norm.rvs(size=82) - input_7_array = st.norm.rvs(0, 2, size=823) - input_8_array = st.norm.rvs(2, size=2000) - input_9_array = st.weibull_min.rvs(2, size=1200) - input_10_array = st.norm.rvs(0.5, 1.5, size=200) - input_11_array = st.norm.rvs(-1, size=1732) - input_12_array = st.norm.rvs(-0.5, 2, size=1386) - input_13_array = st.norm.rvs(0, 0.5, size=548) - input_14_array = st.weibull_min.rvs(1.7, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array), np.mean(input_3_array), np.mean(input_4_array), - np.mean(input_5_array), np.mean(input_6_array), np.mean(input_7_array), np.mean(input_8_array), - np.mean(input_9_array), np.mean(input_10_array), np.mean(input_11_array), - np.mean(input_12_array), np.mean(input_13_array), np.mean(input_14_array))) - gmedian = np.median((np.median(input_1_array), np.median(input_2_array), np.median(input_3_array), - np.median(input_4_array), np.median(input_5_array), np.median(input_6_array), - np.median(input_7_array), np.median(input_8_array), np.median(input_9_array), - np.median(input_10_array), np.median(input_11_array), np.median(input_12_array), - np.median(input_13_array), np.median(input_14_array))) - res = GraphBoxplot({'Group 1': input_1_array, - 'Group 2': input_2_array, - 'Group 3': input_3_array, - 'Group 4': input_4_array, - 'Group 5': input_5_array, - 'Group 6': input_6_array, - 'Group 7': input_7_array, - 'Group 8': input_8_array, - 'Group 9': input_9_array, - 'Group 10': input_10_array, - 'Group 11': input_11_array, - 'Group 12': input_12_array, - 'Group 13': input_13_array, - 'Group 14': input_14_array}, - title='14 Arrays', - save_to='{}test_box_126'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array), np.mean(input_3_array), - np.mean(input_4_array), np.mean(input_5_array), np.mean(input_6_array), - np.mean(input_7_array), np.mean(input_8_array), np.mean(input_9_array), - np.mean(input_10_array), np.mean(input_11_array), - np.mean(input_12_array), np.mean(input_13_array), - np.mean(input_14_array)))) - self.assertEqual(gmedian, res.grand_median((np.median(input_1_array), np.median(input_2_array), - np.median(input_3_array), np.median(input_4_array), - np.median(input_5_array), np.median(input_6_array), - np.median(input_7_array), np.median(input_8_array), - np.median(input_9_array), np.median(input_10_array), - np.median(input_11_array), np.median(input_12_array), - np.median(input_13_array), np.median(input_14_array)))) - - def test_127_boxplot_1_default(self): - """Generate a boxplot graph with 1 array""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(1, size=2000) - self.assertTrue(GraphBoxplot(input_1_array, - title='1 Array', - save_to='{}test_box_127'.format(self.save_path))) - - def test_128_boxplot_1_no_nqp(self): - """Generate a boxplot graph with 1 array and no nqp""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - self.assertTrue(GraphBoxplot(input_1_array, - nqp=False, - title='1 Array no NQP', - save_to='{}test_box_128'.format(self.save_path))) - - def test_129_boxplot_1_groups(self): - """Generate a boxplot graph with 1 array and set groups""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - self.assertTrue(GraphBoxplot(input_1_array, - groups=['Group 1'] * 2000, - title='1 Array Groups Set', - save_to='{}test_box_129'.format(self.save_path))) - - def test_130_boxplot_1_dict(self): - """Generate a boxplot graph with 1 array from a dict""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - self.assertTrue(GraphBoxplot({'Group 1': input_1_array}, - title='1 Array Dict', - save_to='{}test_box_130'.format(self.save_path))) - - def test_131_boxplot_1_no_data(self): - """Catch the case where the 1 and only array is empty""" - np.random.seed(987654321) - input_1_array = np.array([]) - self.assertRaises(NoDataError, lambda: GraphBoxplot(input_1_array)) - - def test_132_boxplot_4_missing_3(self): - """Generate a boxplot graph with 4 arrays where 3 are missing""" - np.random.seed(987654321) - input_1_array = np.array([]) - input_2_array = ['One', 'two', 'three', 'four'] - input_3_array = st.norm.rvs(size=5) - input_4_array = [] - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - title='4 Array 3 Missing', - save_to='{}test_box_132'.format(self.save_path))) - - def test_133_boxplot_horizontal_labels_length_size(self): - """Generate a boxplot graph at the max horizontal labels""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - input_3_array = st.norm.rvs(size=100) - input_4_array = st.norm.rvs(size=100) - input_5_array = st.norm.rvs(size=100) - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, input_5_array, - title='Horizontal labels test', - groups=['1111111111', '2222222222', '3333333333', '4444444444', '5555555555'], - save_to='{}test_box_133'.format(self.save_path))) - - def test_134_boxplot_vertical_labels_length(self): - """Generate a boxplot graph with vertical labels""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - input_3_array = st.norm.rvs(size=100) - input_4_array = st.norm.rvs(size=100) - input_5_array = st.norm.rvs(size=100) - input_6_array = st.norm.rvs(size=100) - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, input_5_array, - input_6_array, - title='Horizontal labels test', - groups=['1111111111', '2222222222', '3333333333', '4444444444', '5555555555', - '6666666666'], - save_to='{}test_box_134'.format(self.save_path))) - - def test_135_boxplot_vertical_labels_size(self): - """Generate a boxplot graph with vertical labels""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - input_3_array = st.norm.rvs(size=100) - input_4_array = st.norm.rvs(size=100) - self.assertTrue(GraphBoxplot(input_1_array, input_2_array, input_3_array, input_4_array, - title='Horizontal labels test', - groups=['1234567890a', '1234567890b', '1234567890c', '1234567890d'], - save_to='{}test_box_135'.format(self.save_path))) - - def test_136_boxplot_4_groups_5(self): - """Generate a boxplot graph with 4 arrays and 5 groups""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - input_3_array = st.norm.rvs(size=100) - input_4_array = st.norm.rvs(size=100) - self.assertRaises(AttributeError, lambda: GraphBoxplot( - input_1_array, - input_2_array, - input_3_array, - input_4_array, - groups=['Group 1', 'Group 2', 'Group 3', 'Group 4', 'Group 5'], - title='4 Arrays 5 Groups', - save_to='{}test_box_136'.format(self.save_path))) - - def test_137_boxplot_4_groups_3(self): - """Generate a boxplot graph with 4 arrays and 3 groups""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=100) - input_2_array = st.norm.rvs(size=100) - input_3_array = st.norm.rvs(size=100) - input_4_array = st.norm.rvs(size=100) - self.assertRaises(AttributeError, lambda: GraphBoxplot(input_1_array, - input_2_array, - input_3_array, - input_4_array, - groups=['Group 1', 'Group 2', 'Group 3'])) - - def test_138_boxplot_vector(self): - """Generate a boxplot graph from a Vector object.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - gmean = np.mean((np.mean(input_1_array), np.mean(input_2_array))) - gmedian = np.median((np.median(input_1_array), np.median(input_2_array))) - vector = Vector(input_1_array).append(Vector(input_2_array)) - res = GraphBoxplot(vector, title='Vector Simple Test', save_to='{}test_box_138'.format(self.save_path)) - self.assertTrue(res) - self.assertEqual(gmean, res.grand_mean((np.mean(input_1_array), np.mean(input_2_array)))) - self.assertEqual(gmedian, res.grand_median((np.median(input_1_array), np.median(input_2_array)))) - - def test_139_boxplot_vector_ignore_groups(self): - """Generate a boxplot graph from a Vector object which should ignore the groups kwargs.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - vector = Vector(input_1_array).append(Vector(input_2_array)) - self.assertTrue(GraphBoxplot(vector, - title='Vector Simple Test', - groups=('Group 1', 'Group 2'), - save_to='{}test_box_139'.format(self.save_path))) - - def test_140_boxplot_vector_with_group_names(self): - """Generate a boxplot graph from a Vector object with specified group names.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - vector = (Vector(input_1_array, groups=['Group 1'] * 2000) - .append(Vector(input_2_array, groups=['Group 2'] * 2000))) - self.assertTrue(GraphBoxplot(vector, - title='Vector Simple Test', - save_to='{}test_box_140'.format(self.save_path))) - - def test_141_boxplot_vector_4_default(self): - """Generate a boxplot graph from a vector object with four groups.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - vector = (Vector(input_1_array) - .append(Vector(input_2_array)) - .append(Vector(input_3_array)) - .append(Vector(input_4_array))) - self.assertTrue(GraphBoxplot(vector, save_to='{}test_box_141'.format(self.save_path))) - - def test_142_boxplot_vector_with_groups_4_default(self): - """Generate a boxplot graph from a vector object with four groups.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - vector = (Vector(input_1_array, groups=['Group 1'] * 2000) - .append(Vector(input_2_array, groups=['Group 2'] * 2000)) - .append(Vector(input_3_array, groups=['Group 3'] * 2000)) - .append(Vector(input_4_array, groups=['Group 4'] * 2000))) - self.assertTrue(GraphBoxplot(vector, save_to='{}test_box_142'.format(self.save_path))) - - def test_143_boxplot_from_columns_default(self): - """Generate a boxplot graph from a single column with group column.""" - np.random.seed(987654321) - input_1_array = pd.DataFrame({'input': st.norm.rvs(size=2000), 'group': ['Group 1'] * 2000}) - input_2_array = pd.DataFrame({'input': st.norm.rvs(1, size=2000), 'group': ['Group 2'] * 2000}) - df = pd.concat([input_1_array, input_2_array]) - self.assertTrue(GraphBoxplot(df['input'], groups=df['group'], - title='DataFrame Simple Test', - save_to='{}test_box_143'.format(self.save_path))) - - def test_144_boxplot_from_columns_with_groups_4_default(self): - """Generate a boxplot graph from a single column with group column.""" - np.random.seed(987654321) - input_1_array = pd.DataFrame({'input': st.norm.rvs(size=2000), 'group': ['Group 1'] * 2000}) - input_2_array = pd.DataFrame({'input': st.norm.rvs(1, size=2000), 'group': ['Group 2'] * 2000}) - input_3_array = pd.DataFrame({'input': st.norm.rvs(2, 0.5, size=2000), 'group': ['Group 3'] * 2000}) - input_4_array = pd.DataFrame({'input': st.weibull_min.rvs(1.4, size=2000), 'group': ['Group 4'] * 2000}) - df = pd.concat([input_1_array, input_2_array, input_3_array, input_4_array]) - self.assertTrue(GraphBoxplot(df['input'], groups=df['group'], - title='DataFrame 4 Groups', - save_to='{}test_box_144'.format(self.save_path))) - - def test_145_boxplot_data_column_length_unequal_to_group_column_length(self): - """Check the case where the length of the data array doesn't match the length of the group labels array.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - self.assertRaises(AttributeError, lambda: GraphBoxplot(input_1_array, groups=['Group 1'])) - - # def test_146_boxplot_issues_depricated_warning(self): - # """Check to make sure a Deprication warnings is raised if passing in multiple arguments.""" - # np.random.seed(987654321) - # input_1_array = st.norm.rvs(size=2000) - # input_2_array = st.norm.rvs(1, size=2000) - # self.assertWarnsCrossCompatible(FutureWarning, - # lambda: GraphBoxplot(input_1_array, input_2_array, - # title='Raise Warning', - # save_to='{}test_box_146'.format(self.save_path))) - - def test_147_boxplot_scalar(self): - """Generate a boxplot from a scalar value.""" - input_1_array = 3 - self.assertTrue(GraphBoxplot(input_1_array, title='Scalar Boxplot', - save_to='{}test_box_147'.format(self.save_path))) - - def test_148_boxplot_vector_no_circles(self): - """Generate a boxplot graph from a vector object with four groups and no circles.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - vector = (Vector(input_1_array) - .append(Vector(input_2_array)) - .append(Vector(input_3_array)) - .append(Vector(input_4_array))) - self.assertTrue(GraphBoxplot(vector, save_to='{}test_box_148'.format(self.save_path), circles=False)) - - def test_149_no_gmean(self): - """Generate a boxplot graph from a vector object with four groups and no grand mean line.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - vector = (Vector(input_1_array) - .append(Vector(input_2_array)) - .append(Vector(input_3_array)) - .append(Vector(input_4_array))) - res = GraphBoxplot(vector, - gmean=False, - save_to='{}test_box_149'.format(self.save_path)) - self.assertTrue(res) - - def test_150_no_gmedian(self): - """Generate a boxplot graph from a vector object with four groups and no grand median line.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - vector = (Vector(input_1_array) - .append(Vector(input_2_array)) - .append(Vector(input_3_array)) - .append(Vector(input_4_array))) - res = GraphBoxplot(vector, - gmedian=False, - save_to='{}test_box_150'.format(self.save_path)) - self.assertTrue(res) - - def test_151_no_gmedian_or_gmean(self): - """Generate a boxplot graph from a vector object with four groups and no grand mean or median line.""" - np.random.seed(987654321) - input_1_array = st.norm.rvs(size=2000) - input_2_array = st.norm.rvs(1, size=2000) - input_3_array = st.norm.rvs(2, 0.5, size=2000) - input_4_array = st.weibull_min.rvs(1.4, size=2000) - vector = (Vector(input_1_array) - .append(Vector(input_2_array)) - .append(Vector(input_3_array)) - .append(Vector(input_4_array))) - res = GraphBoxplot(vector, - gmean=False, - gmedian=False, - save_to='{}test_box_151'.format(self.save_path)) - self.assertTrue(res) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_graph_histo.py b/build/lib/sci_analysis/test/test_graph_histo.py deleted file mode 100644 index 9a4d8dc..0000000 --- a/build/lib/sci_analysis/test/test_graph_histo.py +++ /dev/null @@ -1,381 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st -from os import path, getcwd - -from ..data import Vector -from ..graphs import GraphHisto -from ..analysis.exc import NoDataError - - -class MyTestCase(unittest.TestCase): - - @property - def save_path(self): - if getcwd().split('/')[-1] == 'test': - return './images/' - elif getcwd().split('/')[-1] == 'sci_analysis': - if path.exists('./setup.py'): - return './sci_analysis/test/images/' - else: - return './test/images/' - else: - './' - - def test_100_default_graph(self): - """Generate a histogram graph with default arguments""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - save_to='{}test_histo_100'.format(self.save_path))) - - def test_101_bins(self): - """Generate a histogram graph with 100 bins""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - bins=100, - save_to='{}test_histo_101'.format(self.save_path))) - - def test_102_bins_no_box_plot(self): - """Generate a histogram graph without the accompanying boxplot""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - bins=100, - boxplot=False, - save_to='{}test_histo_102'.format(self.save_path))) - - def test_103_bins_no_box_plot_cdf(self): - """Generate a histogram graph with cdf and no boxplot""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - bins=100, - boxplot=False, - cdf=True, - save_to='{}test_histo_103'.format(self.save_path))) - - def test_104_bins_no_box_plot_cdf_fit(self): - """Generate a histogram graph with fit, cdf and no boxplot""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - bins=100, - boxplot=False, - cdf=True, - fit=True, - save_to='{}test_histo_104'.format(self.save_path))) - - def test_105_no_box_plot(self): - """Generate a histogram graph without the accompanying boxplot""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - boxplot=False, - save_to='{}test_histo_105'.format(self.save_path))) - - def test_106_no_box_plot_cdf(self): - """Generate a histogram graph with cdf and no boxplot""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - boxplot=False, - cdf=True, - save_to='{}test_histo_106'.format(self.save_path))) - - def test_107_no_box_plot_cdf_fit(self): - """Generate a histogram graph with fit, cdf and no boxplot""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - boxplot=False, - cdf=True, - fit=True, - save_to='{}test_histo_107'.format(self.save_path))) - - def test_108_cdf(self): - """Generate a histogram graph with cdf""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - cdf=True, - save_to='{}test_histo_108'.format(self.save_path))) - - def test_109_cdf_fit(self): - """Generate a histogram graph with fit and cdf""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - cdf=True, - fit=True, - save_to='{}test_histo_109'.format(self.save_path))) - - def test_110_fit(self): - """Generate a histogram graph with fit""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - self.assertTrue(GraphHisto(input_array, - fit=True, - save_to='{}test_histo_110'.format(self.save_path))) - - def test_111_only_mean(self): - """Generate a histogram graph with only the mean set""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - mean = np.mean(input_array) - self.assertTrue(GraphHisto(input_array, - mean=mean, - save_to='{}test_histo_111'.format(self.save_path))) - - def test_112_only_std(self): - """Generate a histogram graph with only the std dev set""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - std = np.std(input_array) - self.assertTrue(GraphHisto(input_array, - std_dev=std, - save_to='{}test_histo_112'.format(self.save_path))) - - def test_113_mean_and_std(self): - """Generate a histogram graph with the mean and std dev set""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - mean = np.mean(input_array) - std = np.std(input_array) - self.assertTrue(GraphHisto(input_array, - mean=mean, - std_dev=std, - save_to='{}test_histo_113'.format(self.save_path))) - - def test_114_mean_std_and_sample(self): - """Generate a histogram graph with the mean and std dev set""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - mean = np.mean(input_array) - std = np.std(input_array) - self.assertTrue(GraphHisto(input_array, - mean=mean, - std_dev=std, - sample=False, - save_to='{}test_histo_114'.format(self.save_path))) - - def test_115_distribution(self): - """Generate a histogram graph with distribution set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - save_to='{}test_histo_115'.format(self.save_path))) - - def test_116_distribution_bins(self): - """Generate a histogram graph with distribution and bins set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - bins=100, - save_to='{}test_histo_116'.format(self.save_path))) - - def test_117_distribution_bins_no_boxplot(self): - """Generate a histogram graph with no boxplot, cdf, distribution and bins set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - bins=100, - boxplot=False, - save_to='{}test_histo_117'.format(self.save_path))) - - def test_118_distribution_bins_boxplot_cdf(self): - """Generate a histogram graph with no boxplot, distribution and bins set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - bins=100, - boxplot=False, - cdf=True, - save_to='{}test_histo_118'.format(self.save_path))) - - def test_119_distribution_bins_boxplot_cdf_fit(self): - """Generate a histogram graph with no boxplot, fit, cdf, distribution and bins set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - bins=100, - boxplot=False, - cdf=True, - fit=True, - save_to='{}test_histo_119'.format(self.save_path))) - - def test_120_distribution_boxplot(self): - """Generate a histogram graph with no boxplot and distribution set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - boxplot=False, - save_to='{}test_histo_120'.format(self.save_path))) - - def test_121_distribution_boxplot_cdf(self): - """Generate a histogram graph with no boxplot, cdf and distribution set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - boxplot=False, - cdf=True, - save_to='{}test_histo_121'.format(self.save_path))) - - def test_122_distribution_boxplot_cdf_fit(self): - """Generate a histogram graph with no boxplot, fit, cdf and distribution set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - boxplot=False, - cdf=True, - fit=True, - save_to='{}test_histo_122'.format(self.save_path))) - - def test_123_distribution_cdf(self): - """Generate a histogram graph with cdf and distribution set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - cdf=True, - save_to='{}test_histo_123'.format(self.save_path))) - - def test_124_distribution_cdf_fit(self): - """Generate a histogram graph with fit, cdf and distribution set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - cdf=True, - fit=True, - save_to='{}test_histo_124'.format(self.save_path))) - - def test_125_distribution_fit(self): - """Generate a histogram graph with fit and distribution set""" - np.random.seed(987654321) - input_array = st.weibull_min.rvs(1.7, size=5000) - self.assertTrue(GraphHisto(input_array, - distribution='weibull_min', - fit=True, - save_to='{}test_histo_125'.format(self.save_path))) - - def test_126_empty_list(self): - """Catch the graphing case where the input is an empty list""" - np.random.seed(987654321) - input_array = [] - self.assertRaises(NoDataError, lambda: GraphHisto(input_array)) - - def test_127_empty_array(self): - """Catch the graphing case where the input is an empty array""" - np.random.seed(987654321) - input_array = np.array([]) - self.assertRaises(NoDataError, lambda: GraphHisto(input_array)) - - def test_128_xname(self): - """Set the xname of a histogram graph""" - np.random.seed(987654321) - input_array = Vector(st.norm.rvs(size=5000)) - self.assertTrue(GraphHisto(input_array, - xname='Test', - save_to='{}test_histo_128'.format(self.save_path))) - - def test_129_name(self): - """Set the name of a histogram graph""" - np.random.seed(987654321) - input_array = Vector(st.norm.rvs(size=5000)) - self.assertTrue(GraphHisto(input_array, - name='Test', - save_to='{}test_histo_129'.format(self.save_path))) - - def test_130_yname(self): - """Set the yname of a histogram graph""" - np.random.seed(987654321) - input_array = Vector(st.norm.rvs(size=5000)) - self.assertTrue(GraphHisto(input_array, - yname='Test', - save_to='{}test_histo_130'.format(self.save_path))) - - def test_131_missing_data(self): - """Generate a histogram graph with 500 random missing values""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=5000) - indicies = list(np.random.randint(0, 4999, 500)) - for x in indicies: - input_array = np.insert(input_array, x, np.nan, axis=0) - self.assertTrue(GraphHisto(Vector(input_array), name='Missing Test', - save_to='{}test_histo_131'.format(self.save_path))) - - # def test_132_at_min_size(self): - # """Generate a histogram graph at the minimum size""" - # np.random.seed(987654321) - # input_array = Vector(st.norm.rvs(size=2)) - # self.assertTrue(GraphHisto(input_array, name='At Min Size', save_to='{}test_histo_132'.format(self.save_path))) - # - # def test_133_min_size(self): - # """Generate a histogram graph below the minimum size""" - # np.random.seed(987654321) - # input_array = Vector(st.norm.rvs(size=1)) - # self.assertRaises(MinimumSizeError, lambda: GraphHisto(input_array)) - - def test_134_graph_string(self): - """Generate a histogram graph with string data""" - np.random.seed(987654321) - input_array = ["1", "2", "this", "is", "a", '3', "string", "4", "5"] - self.assertTrue(GraphHisto(input_array, name='String Array', save_to='{}test_histo_134'.format(self.save_path))) - - def test_135_graph_2dim_array(self): - """Generate a histogram graph with a 2dim array""" - np.random.seed(987654321) - input_array = np.array([[1, 2, 3], [4, 5, 6]]) - self.assertTrue(GraphHisto(input_array, name='2dim Array', save_to='{}test_histo_135'.format(self.save_path))) - - def test_136_graph_3dim_array(self): - """Generate a histogram graph with a 3dim array""" - np.random.seed(987654321) - input_array = np.array([[[1, 2, 3], [4, 5, 6]], [[10, 11, 12], [13, 14, 15]]]) - self.assertTrue(GraphHisto(input_array, name='3dim Array', save_to='{}test_histo_136'.format(self.save_path))) - - def test_137_graph_3dim_missing_data(self): - """Generate a histogram graph from a 3dim list with missing data""" - np.random.seed(987654321) - input_array = [[['1', '2', 'three'], ['4.0', 'five', '6']], [['10', '11', '12.00'], ['t', 'h', '15']]] - self.assertTrue(GraphHisto(input_array, name='3dim Missing', save_to='{}test_histo_137'.format(self.save_path))) - - def test_138_graph_title(self): - """Generate a histogram graph with a specified title""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=2000) - self.assertTrue(GraphHisto(input_array, title='Title Test', save_to='{}test_histo_138'.format(self.save_path))) - - def test_139_graph_no_data(self): - """Catch the case where no data is passed to GraphHisto""" - input_array = Vector() - self.assertRaises(NoDataError, lambda: GraphHisto(input_array)) - - def test_140_graph_vector(self): - """Generate a histogram from a Vector object""" - np.random.seed(987654321) - input_array = Vector(st.norm.rvs(size=5000)) - self.assertTrue(GraphHisto(input_array, save_to='{}test_histo_140'.format(self.save_path))) - - def test_141_graph_groups(self): - """Generate a histogram from a Vector with groups""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=2500) - grp1 = ['one'] * 2500 - grp2 = ['two'] * 2500 - exp = Vector(input_array, groups=grp1).append(Vector(input_array, groups=grp2)) - self.assertTrue(GraphHisto(exp, save_to='{}test_histo_141'.format(self.save_path))) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_graph_scatter.py b/build/lib/sci_analysis/test/test_graph_scatter.py deleted file mode 100644 index e623069..0000000 --- a/build/lib/sci_analysis/test/test_graph_scatter.py +++ /dev/null @@ -1,319 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st -from os import path, getcwd - -from ..graphs import GraphScatter -from ..data import Vector -from ..analysis.exc import NoDataError -from ..data import UnequalVectorLengthError - - -class MyTestCase(unittest.TestCase): - - @property - def save_path(self): - if getcwd().split('/')[-1] == 'test': - return './images/' - elif getcwd().split('/')[-1] == 'sci_analysis': - if path.exists('./setup.py'): - return './sci_analysis/test/images/' - else: - return './test/images/' - else: - './' - - def test_100_default(self): - """Generate a scatter plot with default settings""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - save_to='{}test_scatter_100'.format(self.save_path))) - - def test_101_no_points(self): - """Generate a scatter plot with no points""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - points=False, - save_to='{}test_scatter_101'.format(self.save_path))) - - def test_102_no_points_contours(self): - """Generate a scatter plot with no points and contours""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - points=False, - contours=True, - save_to='{}test_scatter_102'.format(self.save_path))) - - def test_103_no_points_contours_boxplots(self): - """Generate a scatter plot with no points, contours and boxplots""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - points=False, - contours=True, - boxplot_borders=True, - save_to='{}test_scatter_103'.format(self.save_path))) - - def test_104_no_fit(self): - """Generate a scatter plot with no fit""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - fit=False, - save_to='{}test_scatter_104'.format(self.save_path))) - - def test_105_no_fit_no_points(self): - """Generate a scatter plot with no fit or points""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - fit=False, - points=False, - save_to='{}test_scatter_105'.format(self.save_path))) - - def test_106_no_fit_no_points_contours(self): - """Generate a scatter plot with no fit or points and contours""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - fit=False, - points=False, - contours=True, - save_to='{}test_scatter_106'.format(self.save_path))) - - def test_107_no_fit_no_points_contours_boxplots(self): - """Generate a scatter plot with no fit or points, contours and boxplots""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - fit=False, - points=False, - contours=True, - boxplot_borders=True, - save_to='{}test_scatter_107'.format(self.save_path))) - - def test_108_contours(self): - """Generate a scatter plot with contours""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - contours=True, - save_to='{}test_scatter_108'.format(self.save_path))) - - def test_109_contours_boxplots(self): - """Generate a scatter plot with contours and boxplots""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - contours=True, - boxplot_borders=True, - save_to='{}test_scatter_109'.format(self.save_path))) - - def test_110_boxplots(self): - """Generate a scatter plot with boxplots""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - boxplot_borders=True, - save_to='{}test_scatter_110'.format(self.save_path))) - - def test_111_no_points_boxplots(self): - """Generate a scatter plot with no points and boxplots""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - points=False, - boxplot_borders=True, - save_to='{}test_scatter_111'.format(self.save_path))) - - def test_112_no_points_no_fit_boxplots(self): - """Generate a scatter plot with no points or no fit and boxplots""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - fit=False, - points=False, - boxplot_borders=True, - save_to='{}test_scatter_112'.format(self.save_path))) - - def test_113_one_empty_list(self): - """Catch the case where the input is an empty list""" - np.random.seed(987654321) - input_x_array = [] - input_y_array = st.norm.rvs(size=2000) - self.assertRaises(NoDataError, lambda: GraphScatter(input_x_array, input_y_array)) - - def test_114_other_empty_list(self): - """Catch the case where the input is an empty list""" - np.random.seed(987654321) - input_y_array = [] - input_x_array = st.norm.rvs(size=2000) - self.assertRaises(UnequalVectorLengthError, lambda: GraphScatter(input_x_array, input_y_array)) - - def test_115_two_empty_lists(self): - """Catch the case where both inputs are empty lists""" - np.random.seed(987654321) - input_x_array = [] - input_y_array = [] - self.assertRaises(NoDataError, lambda: GraphScatter(input_x_array, input_y_array)) - - def test_116_missing_data(self): - """Catch the case where there is missing data in both arrays""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - indicies_x = list(np.random.randint(0, 1999, 200)) - indicies_y = list(np.random.randint(0, 1999, 200)) - for i in indicies_x: - input_x_array = np.insert(input_x_array, i, np.nan, axis=0) - for i in indicies_y: - input_y_array = np.insert(input_y_array, i, np.nan, axis=0) - self.assertTrue(GraphScatter(input_x_array, input_y_array, save_to='{}test_scatter_116'.format(self.save_path))) - - def test_117_at_min_size(self): - """Generate a scatter plot at the min size""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2) - input_y_array = st.norm.rvs(size=2) - self.assertTrue(GraphScatter(input_x_array, input_y_array, save_to='{}test_scatter_117'.format(self.save_path))) - - def test_118_single_point(self): - """Generate a scatter plot with a single point""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=1) - input_y_array = st.norm.rvs(size=1) - self.assertTrue(GraphScatter(input_x_array, input_y_array, save_to='{}test_scatter_118'.format(self.save_path))) - - def test_119_default_corr(self): - """Generate a scatter plot with correlating data""" - np.random.seed(987654321) - input_x_array = st.weibull_min.rvs(2, size=2000) - input_y_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) - self.assertTrue(GraphScatter(input_x_array, input_y_array, save_to='{}test_scatter_119'.format(self.save_path))) - - def test_120_contours_no_fit_corr(self): - """Generate a scatter plot with contours, no fit and correlating data""" - np.random.seed(987654321) - input_x_array = st.weibull_min.rvs(2, size=2000) - input_y_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - fit=False, - contours=True, - save_to='{}test_scatter_120'.format(self.save_path))) - - def test_121_boxplots_fit_corr(self): - """Generate a scatter plot with boxplots, fit and correlating data""" - np.random.seed(987654321) - input_x_array = st.weibull_min.rvs(2, size=2000) - input_y_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - boxplot_borders=True, - save_to='{}test_scatter_121'.format(self.save_path))) - - def test_122_set_x_and_y_name(self): - """Generate a scatter plot with set x and y names""" - np.random.seed(987654321) - input_x_array = st.weibull_min.rvs(2, size=2000) - input_y_array = np.array([x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - xname='Test X', - yname='Test Y', - save_to='{}test_scatter_122'.format(self.save_path))) - - def test_123_scatter_string(self): - """Generate a scatter plot from lists of string values""" - np.random.seed(987654321) - input_x_array = ["1.0", "2.4", "three", "4", "5.1", "six", "7.3"] - input_y_array = ["1.2", "2", "3.0", "4.3", "five", "six", "7.8"] - self.assertTrue(GraphScatter(input_x_array, input_y_array, - # fit=False, - # contours=True, - boxplot_borders=True, - save_to='{}test_scatter_123'.format(self.save_path))) - - def test_124_scatter_length_4_bug(self): - """Generate a scatter plot with 4 points to check for the case where the scatter method thinks the color - tuple is a cmap instead of an RGBA tuple""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=4) - input_y_array = st.norm.rvs(size=4) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - save_to='{}test_scatter_124'.format(self.save_path))) - - def test_125_scatter_title(self): - """Generate a scatter plot with a specified title""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - title='Test Title', - save_to='{}test_scatter_125'.format(self.save_path))) - - def test_126_scatter_2dim_arrays(self): - """Generate a scatter plot a 2dim arrays""" - np.random.seed(987654321) - input_x_array = np.array([[1, 2, 3], [4, 5, 6]]) - input_y_array = np.array([[3, 6, 9], [12, 15, 18]]) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - title='2dim Arrays', - save_to='{}test_scatter_126'.format(self.save_path))) - - def test_127_scatter_2dim_lists_with_missing(self): - """Generate a scatter plot with 2dim arrays with missing data""" - np.random.seed(987654321) - input_x_array = [['1', '2', 'three'], ['4.0', 'five', '6']] - input_y_array = [['3', '6', '9'], ['four', 'five', '18.0']] - self.assertTrue(GraphScatter(input_x_array, input_y_array, - title='2dim Arrays With Missing', - save_to='{}test_scatter_127'.format(self.save_path))) - - def test_128_scatter_3dim_arrays(self): - """Generate a scatter plot a 3dim arrays""" - np.random.seed(987654321) - input_x_array = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]) - input_y_array = np.array([[[3, 6, 9], [12, 15, 18]], [[21, 24, 27], [30, 33, 36]]]) - self.assertTrue(GraphScatter(input_x_array, input_y_array, - title='3dim Arrays', - save_to='{}test_scatter_128'.format(self.save_path))) - - def test_129_scatter_missing_ydata(self): - """Check to make sure an AttributeError is raised is ydata is None.""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = None - self.assertRaises(AttributeError, lambda: GraphScatter(input_x_array, input_y_array)) - - def test_130_scatter_vector(self): - """Generate a scatter plot with a Vector object.""" - np.random.seed(987654321) - input_x_array = st.norm.rvs(size=2000) - input_y_array = st.norm.rvs(size=2000) - vector = Vector(input_x_array, other=input_y_array) - self.assertTrue(GraphScatter(vector, - title='Vector Scatter', - save_to='{}test_scatter_130'.format(self.save_path))) - - def test_131_scatter_empty_vector(self): - """Check to make sure a NoDataError exception is raised if an empty Vector is passed.""" - vector = Vector() - self.assertRaises(NoDataError, lambda: GraphScatter(vector)) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_groupstatistics.py b/build/lib/sci_analysis/test/test_groupstatistics.py deleted file mode 100644 index 7b4d0b6..0000000 --- a/build/lib/sci_analysis/test/test_groupstatistics.py +++ /dev/null @@ -1,419 +0,0 @@ -import unittest -import numpy as np -import pandas as pd -import scipy.stats as st - -from ..analysis import GroupStatistics, GroupStatisticsStacked -from ..analysis.exc import MinimumSizeError, NoDataError -from ..data import Vector - - -class TestGroupStatistics(unittest.TestCase): - def test_0001_group_statistics_no_name(self): - """Test the Group Statistic class with generated group names""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=100) - y_input_array = st.norm.rvs(2, 3, size=45) - z_input_array = st.norm.rvs(8, 1, size=18) - output = """ - -Overall Statistics ------------------- - -Number of Groups = 3 -Total = 163 -Grand Mean = 4.1568 -Pooled Std Dev = 2.0798 -Grand Median = 2.3293 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -100 2.0083 1.0641 -0.4718 2.0761 4.2466 1 -45 2.3678 3.5551 -4.8034 2.2217 11.4199 2 -18 8.0944 1.1855 6.0553 7.9712 10.5272 3 """ - res = GroupStatistics(x_input_array, y_input_array, z_input_array, display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 163) - self.assertEqual(res.k, 3) - self.assertAlmostEqual(res.pooled, 2.0798, 4) - self.assertAlmostEqual(res.pooled_std, 2.0798, 4) - self.assertAlmostEqual(res.gmean, 4.1568, 4) - self.assertAlmostEqual(res.grand_mean, 4.1568, 4) - self.assertAlmostEqual(res.gmedian, 2.3293, 4) - self.assertAlmostEqual(res.grand_median, 2.3293, 4) - - def test_0002_group_statistics_group_names(self): - """Test the Group Statistic class with group names specified in a list""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=100) - y_input_array = st.norm.rvs(2, 3, size=45) - z_input_array = st.norm.rvs(8, 1, size=18) - names = ("one", "two", "three") - output = """ - -Overall Statistics ------------------- - -Number of Groups = 3 -Total = 163 -Grand Mean = 4.1568 -Pooled Std Dev = 2.0798 -Grand Median = 2.3293 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -100 2.0083 1.0641 -0.4718 2.0761 4.2466 one -18 8.0944 1.1855 6.0553 7.9712 10.5272 three -45 2.3678 3.5551 -4.8034 2.2217 11.4199 two """ - res = GroupStatistics(x_input_array, y_input_array, z_input_array, groups=names, display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - - def test_0003_group_statistics_dict(self): - """Test the Group Statistic class with data passed as a dict""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=100) - y_input_array = st.norm.rvs(2, 3, size=45) - z_input_array = st.norm.rvs(8, 1, size=18) - data = {"one": x_input_array, "two": y_input_array, "three": z_input_array} - output = """ - -Overall Statistics ------------------- - -Number of Groups = 3 -Total = 163 -Grand Mean = 4.1568 -Pooled Std Dev = 2.0798 -Grand Median = 2.3293 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -100 2.0083 1.0641 -0.4718 2.0761 4.2466 one -18 8.0944 1.1855 6.0553 7.9712 10.5272 three -45 2.3678 3.5551 -4.8034 2.2217 11.4199 two """ - res = GroupStatistics(data, display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 163) - self.assertEqual(res.k, 3) - self.assertAlmostEqual(res.pooled, 2.0798, 4) - self.assertAlmostEqual(res.pooled_std, 2.0798, 4) - self.assertAlmostEqual(res.gmean, 4.1568, 4) - self.assertAlmostEqual(res.grand_mean, 4.1568, 4) - self.assertAlmostEqual(res.gmedian, 2.3293, 4) - self.assertAlmostEqual(res.grand_median, 2.3293, 4) - - def test_0004_group_statistics_dict_just_above_min_size(self): - """Test the Group Statistic class with data passed as a dict just above min size""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=2) - y_input_array = st.norm.rvs(2, 3, size=2) - z_input_array = st.norm.rvs(8, 1, size=2) - data = {"one": x_input_array, "two": y_input_array, "three": z_input_array} - output = """ - -Overall Statistics ------------------- - -Number of Groups = 3 -Total = 6 -Grand Mean = 4.4847 -Pooled Std Dev = 4.0150 -Grand Median = 5.6247 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -2 2.8003 2.0453 1.3541 2.8003 4.2466 one -2 7.5349 0.7523 7.0029 7.5349 8.0668 three -2 3.1189 6.6038 -1.5507 3.1189 7.7885 two """ - res = GroupStatistics(data, display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 6) - self.assertEqual(res.k, 3) - self.assertAlmostEqual(res.pooled, 4.0150, 4) - self.assertAlmostEqual(res.pooled_std, 4.0150, 4) - self.assertAlmostEqual(res.gmean, 4.4847, 4) - self.assertAlmostEqual(res.grand_mean, 4.4847, 4) - self.assertAlmostEqual(res.gmedian, 5.6247, 4) - self.assertAlmostEqual(res.grand_median, 5.6247, 4) - - def test_0005_group_statistics_dict_at_min_size(self): - """Test the Group Statistic class with data passed as a dict at min size""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=1) - y_input_array = st.norm.rvs(2, 3, size=1) - z_input_array = st.norm.rvs(8, 1, size=1) - data = {"one": x_input_array, "two": y_input_array, "three": z_input_array} - self.assertRaises(MinimumSizeError, lambda: GroupStatistics(data, display=False)) - - def test_0006_group_statistics_dict_single_empty_vector(self): - """Test the Group Statistic class with data passed as a dict and a single missing vector""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=10) - y_input_array = ["this", "is", "a", "string"] - z_input_array = st.norm.rvs(8, 1, size=10) - data = {"one": x_input_array, "two": y_input_array, "three": z_input_array} - output = """ - -Overall Statistics ------------------- - -Number of Groups = 2 -Total = 20 -Grand Mean = 5.1489 -Pooled Std Dev = 1.2409 -Grand Median = 5.3048 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -10 2.3511 1.3732 0.6591 2.3882 4.2466 one -10 7.9466 1.0927 6.3630 7.9607 9.7260 three """ - res = GroupStatistics(data, display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 20) - self.assertEqual(res.k, 2) - self.assertAlmostEqual(res.pooled, 1.2409, 4) - self.assertAlmostEqual(res.pooled_std, 1.2409, 4) - self.assertAlmostEqual(res.gmean, 5.1489, 4) - self.assertAlmostEqual(res.grand_mean, 5.1489, 4) - self.assertAlmostEqual(res.gmedian, 5.3048, 4) - self.assertAlmostEqual(res.grand_median, 5.3048, 4) - - def test_0007_group_statistics_single_group(self): - """Test the Group Statistic class with a single group""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=10) - output = """ - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -10 2.3511 1.3732 0.6591 2.3882 4.2466 1 """ - res = GroupStatistics(x_input_array, display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 10) - self.assertEqual(res.k, 1) - self.assertIsNone(res.pooled) - self.assertIsNone(res.pooled_std) - self.assertIsNone(res.gmean) - self.assertIsNone(res.grand_mean) - self.assertIsNone(res.gmedian) - self.assertIsNone(res.grand_median) - - def test_0008_group_statistics_dict_empty(self): - """Test the Group Statistic class with data passed as empty""" - np.random.seed(987654321) - x_input_array = ["this", "is", "a", "string"] - y_input_array = [float("nan"), float("nan"), "three", float("nan")] - data = {"one": x_input_array, "two": y_input_array} - self.assertRaises(NoDataError, lambda: GroupStatistics(data, display=False)) - - def test_0009_group_statistics_dict_empty_zero_length(self): - """Test the Group Statistic class with data passed as zero length vectors""" - np.random.seed(987654321) - x_input_array = np.array([]) - y_input_array = [] - data = {"one": x_input_array, "two": y_input_array} - self.assertRaises(NoDataError, lambda: GroupStatistics(data, display=False)) - - def test_0010_group_statistics_stacked(self): - """Test the Stacked Group Statistic class""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=100) - y_input_array = st.norm.rvs(2, 3, size=45) - z_input_array = st.norm.rvs(8, 1, size=18) - vals = np.append(x_input_array, np.append(y_input_array, z_input_array)).tolist() - grps = ['x'] * 100 + ['y'] * 45 + ['z'] * 18 - ref = pd.DataFrame({'values': vals, 'groups': grps}) - output = """ - -Overall Statistics ------------------- - -Number of Groups = 3 -Total = 163 -Grand Mean = 4.1568 -Pooled Std Dev = 2.0798 -Grand Median = 2.3293 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -100 2.0083 1.0641 -0.4718 2.0761 4.2466 x -45 2.3678 3.5551 -4.8034 2.2217 11.4199 y -18 8.0944 1.1855 6.0553 7.9712 10.5272 z """ - res = GroupStatisticsStacked(ref['values'], ref['groups'], display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 163) - self.assertEqual(res.k, 3) - self.assertAlmostEqual(res.pooled, 2.0798, 4) - self.assertAlmostEqual(res.pooled_std, 2.0798, 4) - self.assertAlmostEqual(res.gmean, 4.1568, 4) - self.assertAlmostEqual(res.grand_mean, 4.1568, 4) - self.assertAlmostEqual(res.gmedian, 2.3293, 4) - self.assertAlmostEqual(res.grand_median, 2.3293, 4) - - def test_0011_group_statistics_stacked_no_data(self): - """Test the event when all passed data is NA""" - input_array = [np.nan, np.nan, np.nan, np.nan, np.nan] - grps = ['a', 'b', 'a', 'b', 'a'] - ref = pd.DataFrame({'values': input_array, 'groups': grps}) - self.assertRaises(NoDataError, lambda: GroupStatisticsStacked(ref['values'], ref['groups'], display=False)) - - def test_0012_group_statistics_stacked_scalar(self): - """Test the event a scalar is passed and a minimum size error is raised""" - input_array = 1 - grps = 'a' - self.assertRaises(MinimumSizeError, lambda: GroupStatisticsStacked(input_array, grps, display=False)) - - def test_0013_group_statistics_stacked_missing_group(self): - """Test the event when a group is all NA""" - input_array = [1.0, np.nan, 0.95, np.nan, 1.05] - grps = ['a', 'b', 'a', 'b', 'a'] - ref = pd.DataFrame({'values': input_array, 'groups': grps}) - output = """ - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -3 1.0000 0.0500 0.9500 1.0000 1.0500 a """ - res = GroupStatisticsStacked(ref['values'], ref['groups'], display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 3) - self.assertEqual(res.k, 1) - self.assertIsNone(res.pooled) - self.assertIsNone(res.pooled_std) - self.assertIsNone(res.gmean) - self.assertIsNone(res.grand_mean) - self.assertIsNone(res.gmedian) - self.assertIsNone(res.grand_median) - - def test_0014_group_statistics_stacked_vector(self): - """Test the Stacked Group Statistic class with a Vector input object.""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=100) - y_input_array = st.norm.rvs(2, 3, size=45) - z_input_array = st.norm.rvs(8, 1, size=18) - vals = np.append(x_input_array, np.append(y_input_array, z_input_array)).tolist() - grps = ['x'] * 100 + ['y'] * 45 + ['z'] * 18 - ref = pd.DataFrame({'values': vals, 'groups': grps}) - exp = Vector(ref['values'], groups=ref['groups']) - output = """ - -Overall Statistics ------------------- - -Number of Groups = 3 -Total = 163 -Grand Mean = 4.1568 -Pooled Std Dev = 2.0798 -Grand Median = 2.3293 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -100 2.0083 1.0641 -0.4718 2.0761 4.2466 x -45 2.3678 3.5551 -4.8034 2.2217 11.4199 y -18 8.0944 1.1855 6.0553 7.9712 10.5272 z """ - res = GroupStatisticsStacked(exp, display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 163) - self.assertEqual(res.k, 3) - self.assertAlmostEqual(res.pooled, 2.0798, 4) - self.assertAlmostEqual(res.pooled_std, 2.0798, 4) - self.assertAlmostEqual(res.gmean, 4.1568, 4) - self.assertAlmostEqual(res.grand_mean, 4.1568, 4) - self.assertAlmostEqual(res.gmedian, 2.3293, 4) - self.assertAlmostEqual(res.grand_median, 2.3293, 4) - - def test_0015_group_statistics_stacked_missing_groups(self): - """Test the case where the groups argument is not provided.""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=100) - y_input_array = st.norm.rvs(2, 3, size=45) - z_input_array = st.norm.rvs(8, 1, size=18) - vals = np.append(x_input_array, np.append(y_input_array, z_input_array)).tolist() - self.assertRaises(AttributeError, lambda: GroupStatisticsStacked(vals)) - - def test_0016_group_statistics_above_min_size(self): - """Test the Stacked Group Statistic class""" - np.random.seed(987654321) - x_input_array = st.norm.rvs(2, 1, size=100) - y_input_array = st.norm.rvs(2, 3, size=45) - z_input_array = st.norm.rvs(8, 1, size=2) - vals = np.append(x_input_array, np.append(y_input_array, z_input_array)).tolist() - grps = ['x'] * 100 + ['y'] * 45 + ['z'] * 2 - ref = pd.DataFrame({'values': vals, 'groups': grps}) - output = """ - -Overall Statistics ------------------- - -Number of Groups = 3 -Total = 147 -Grand Mean = 4.8060 -Pooled Std Dev = 2.1549 -Grand Median = 2.0912 - - -Group Statistics ----------------- - -n Mean Std Dev Min Median Max Group --------------------------------------------------------------------------------------------------- -100 2.0083 1.0641 -0.4718 2.0761 4.2466 x -45 2.3678 3.5551 -4.8034 2.2217 11.4199 y -2 10.0420 0.6862 9.5568 10.0420 10.5272 z """ - res = GroupStatisticsStacked(ref['values'], ref['groups'], display=False) - self.assertTrue(res) - self.assertEqual(str(res), output) - self.assertEqual(res.total, 147) - self.assertEqual(res.k, 3) - self.assertAlmostEqual(res.pooled, 2.1549, 4) - self.assertAlmostEqual(res.pooled_std, 2.1549, 4) - self.assertAlmostEqual(res.gmean, 4.8060, 4) - self.assertAlmostEqual(res.grand_mean, 4.8060, 4) - self.assertAlmostEqual(res.gmedian, 2.0912, 4) - self.assertAlmostEqual(res.grand_median, 2.0912, 4) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_kruskal.py b/build/lib/sci_analysis/test/test_kruskal.py deleted file mode 100644 index ef82d9a..0000000 --- a/build/lib/sci_analysis/test/test_kruskal.py +++ /dev/null @@ -1,141 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..analysis import Kruskal -from ..analysis.exc import MinimumSizeError, NoDataError - - -class MyTestCase(unittest.TestCase): - def test_500_Kruskal_matched(self): - """Test the Kruskal Wallis class on matched data""" - np.random.seed(987654321) - x_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - y_input_array = st.weibull_min.rvs(*x_parms, size=100) - z_input_array = st.weibull_min.rvs(*x_parms, size=100) - alpha = 0.05 - exp = Kruskal(x_input_array, y_input_array, z_input_array, alpha=alpha, display=False) - output = """ - -Kruskal-Wallis --------------- - -alpha = 0.0500 -h value = 0.4042 -p value = 0.8170 - -H0: Group means are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: Kruskal Type I error") - self.assertAlmostEqual(exp.statistic, 0.4042, delta=0.0001) - self.assertAlmostEqual(exp.h_value, 0.4042, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.817, delta=0.001) - self.assertEqual(str(exp), output) - - def test_503_Kruskal_matched_single_argument(self): - """Test the Kruskal Wallis class on matched data""" - np.random.seed(987654321) - x_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - a = 0.05 - self.assertRaises(NoDataError, lambda: Kruskal(x_input_array, alpha=a, display=False).p_value) - - def test_504_Kruskal_unmatched(self): - """Test the Kruskal Wallis class on unmatched data""" - np.random.seed(987654321) - x_parms = [1.7, 1] - z_parms = [0.8, 1] - x_input_array = st.norm.rvs(*x_parms, size=100) - y_input_array = st.norm.rvs(*x_parms, size=100) - z_input_array = st.norm.rvs(*z_parms, size=100) - alpha = 0.05 - exp = Kruskal(x_input_array, y_input_array, z_input_array, alpha=alpha, display=False) - output = """ - -Kruskal-Wallis --------------- - -alpha = 0.0500 -h value = 37.4069 -p value = 0.0000 - -HA: Group means are not matched -""" - self.assertLess(exp.p_value, alpha, "FAIL: Kruskal Type II error") - self.assertAlmostEqual(exp.statistic, 37.4069, delta=0.0001) - self.assertAlmostEqual(exp.h_value, 37.4069, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_505_Kruskal_matched_just_above_min_size(self): - """Test the Kruskal Wallis class on matched data just above min size""" - np.random.seed(987654321) - x_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=3) - y_input_array = st.weibull_min.rvs(*x_parms, size=3) - z_input_array = st.weibull_min.rvs(*x_parms, size=3) - alpha = 0.05 - exp = Kruskal(x_input_array, y_input_array, z_input_array, alpha=alpha, display=False) - output = """ - -Kruskal-Wallis --------------- - -alpha = 0.0500 -h value = 3.4667 -p value = 0.1767 - -H0: Group means are matched -""" - self.assertGreater(exp.p_value, alpha) - self.assertEqual(str(exp), output) - - def test_506_Kruskal_matched_at_min_size(self): - """Test the Kruskal Wallis class on matched data at min size""" - np.random.seed(987654321) - x_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=2) - y_input_array = st.weibull_min.rvs(*x_parms, size=2) - z_input_array = st.weibull_min.rvs(*x_parms, size=2) - alpha = 0.05 - self.assertRaises(MinimumSizeError, lambda: Kruskal(x_input_array, y_input_array, z_input_array, - alpha=alpha, - display=False).p_value) - - def test_507_Kruskal_matched_single_empty_vector(self): - """Test the Kruskal Wallis class on matched data with single missing vector""" - np.random.seed(987654321) - x_parms = [1.7] - x_input_array = st.weibull_min.rvs(*x_parms, size=100) - y_input_array = ["one", "two", "three", "four", "five"] - z_input_array = st.weibull_min.rvs(*x_parms, size=100) - alpha = 0.05 - exp = Kruskal(x_input_array, y_input_array, z_input_array, alpha=alpha, display=False) - output = """ - -Kruskal-Wallis --------------- - -alpha = 0.0500 -h value = 0.0034 -p value = 0.9532 - -H0: Group means are matched -""" - self.assertGreater(exp.p_value, alpha) - self.assertEqual(str(exp), output) - - def test_508_Kruskal_matched_all_empty(self): - """Test the Kruskal Wallis class on matched data all empty""" - np.random.seed(987654321) - x_input_array = [np.nan, np.nan, np.nan, "four", np.nan] - y_input_array = ["one", "two", "three", "four", "five"] - alpha = 0.05 - self.assertRaises(NoDataError, lambda: Kruskal(x_input_array, y_input_array, - alpha=alpha, - display=False).p_value) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_kstest.py b/build/lib/sci_analysis/test/test_kstest.py deleted file mode 100644 index 57e6aa3..0000000 --- a/build/lib/sci_analysis/test/test_kstest.py +++ /dev/null @@ -1,149 +0,0 @@ -import unittest -import scipy.stats as st -import numpy as np - -from ..analysis import KSTest -from ..analysis.exc import MinimumSizeError, NoDataError - - -class MyTestCase(unittest.TestCase): - def test_250_Kolmogorov_Smirnov_normal_test(self): - """Test the normal distribution detection""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'norm' - test = KSTest(st.norm.rvs(size=100), distro, alpha=alpha, display=False) - output = """ - -Kolmogorov-Smirnov Test ------------------------ - -alpha = 0.0500 -D value = 0.0584 -p value = 0.8853 - -H0: Data is matched to the norm distribution -""" - self.assertGreater(test.p_value, alpha) - self.assertEqual(str(test), output) - - def test_251_Kolmogorov_Smirnov_normal_test_distribution_type(self): - """Test the normal distribution detection""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'norm' - self.assertEqual(KSTest(st.norm.rvs(size=100), distro, alpha=alpha, display=False).distribution, distro) - - def test_252_Kolmogorov_Smirnov_normal_test_statistic(self): - """Test the normal distribution detection""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'norm' - self.assertTrue(KSTest(st.norm.rvs(size=100), distro, alpha=alpha, display=False).statistic) - - def test_253_Kolmogorov_Smirnov_normal_test_D_value(self): - """Test the normal distribution detection""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'norm' - self.assertTrue(KSTest(st.norm.rvs(size=100), distro, alpha=alpha, display=False).d_value) - - def test_254_Kolmogorov_Smirnov_alpha_test_parms_missing(self): - """Test the KSTest to make sure an exception is raised if parms are missing""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'alpha' # not to be confused with the sensitivity alpha - self.assertRaises(TypeError, lambda: KSTest(st.alpha.rvs(size=100), distro, alpha=alpha, display=False)) - - def test_255_Kolmogorov_Smirnov_alpha_test(self): - """Test the alpha distribution detection""" - np.random.seed(987654321) - parms = [3.5] - alpha = 0.05 - distro = 'alpha' - self.assertGreater(KSTest(st.alpha.rvs(*parms, size=100), distro, - parms=parms, alpha=alpha, display=False).p_value, alpha) - - def test_256_Kolmogorov_Smirnov_beta_test(self): - """Test the beta distribution detection""" - np.random.seed(987654321) - parms = [2.3, 0.6] - alpha = 0.05 - distro = 'beta' - self.assertGreater(KSTest(st.beta.rvs(*parms, size=100), distro, - parms=parms, alpha=alpha, display=False).p_value, alpha) - - def test_257_Kolmogorov_Smirnov_cauchy_test(self): - """Test the cauchy distribution detection""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'cauchy' - self.assertGreater(KSTest(st.cauchy.rvs(size=100), distro, - alpha=alpha, display=False).p_value, alpha) - - def test_258_Kolmogorov_Smirnov_chi2_large_test(self): - """Test the chi squared distribution detection with sufficiently large dof""" - np.random.seed(987654321) - parms = [50] - alpha = 0.05 - distro = 'chi2' - self.assertGreater(KSTest(st.chi2.rvs(*parms, size=100), distro, - parms=parms, alpha=alpha, display=False).p_value, alpha) - - def test_259_Kolmogorov_Smirnov_chi2_small_test(self): - """Test the chi squared distribution detection with small dof""" - np.random.seed(987654321) - parms = [5] - alpha = 0.05 - distro = 'chi2' - self.assertGreater(KSTest(st.chi2.rvs(*parms, size=100), distro, - parms=parms, alpha=alpha, display=False).p_value, alpha) - - def test_260_Kolmogorov_Smirnov_weibull_min_test(self): - """Test the weibull min distribution detection""" - np.random.seed(987654321) - parms = [1.7] - alpha = 0.05 - distro = 'weibull_min' - self.assertGreater(KSTest(st.weibull_min.rvs(*parms, size=100), distro, - parms=parms, alpha=alpha, display=False).p_value, alpha) - - def test_261_Kolmogorov_Smirnov_weibull_max_test(self): - """Test the weibull min distribution detection""" - np.random.seed(987654321) - parms = [2.8] - alpha = 0.05 - distro = 'weibull_max' - self.assertGreater(KSTest(st.weibull_max.rvs(*parms, size=100), distro, - parms=parms, alpha=alpha, display=False).p_value, alpha) - - def test_262_Kolmogorov_Smirnov_normal_test_at_min_size(self): - """Test the normal distribution detection at the minimum size""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'norm' - self.assertRaises(MinimumSizeError, lambda: KSTest(st.norm.rvs(size=2), - distro, - alpha=alpha, - display=False).p_value) - - def test_263_Kolmogorov_Smirnov_normal_test_just_above_min_size(self): - """Test the normal distribution detection just above the minimum size""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'norm' - self.assertTrue(KSTest(st.norm.rvs(size=3), distro, alpha=alpha, display=False).p_value) - - def test_264_Kolmogorov_Smirnov_normal_test_empty_vector(self): - """Test the normal distribution detection with an empty vector""" - np.random.seed(987654321) - alpha = 0.05 - distro = 'norm' - self.assertRaises(NoDataError, lambda: KSTest(["one", "two", "three", "four"], - distro, - alpha=alpha, - display=False).p_value) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_linregress.py b/build/lib/sci_analysis/test/test_linregress.py deleted file mode 100644 index d6c33b2..0000000 --- a/build/lib/sci_analysis/test/test_linregress.py +++ /dev/null @@ -1,188 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..analysis import LinearRegression -from ..analysis.exc import MinimumSizeError, NoDataError -from ..data import UnequalVectorLengthError, Vector - - -class MyTestCase(unittest.TestCase): - def test_350_LinRegress_corr(self): - """Test the Linear Regression class for correlation""" - np.random.seed(987654321) - x_input_array = range(1, 101) - y_input_array = [x * 3 for x in x_input_array] - alpha = 0.05 - output = """ - -Linear Regression ------------------ - -n = 100 -Slope = 3.0000 -Intercept = 0.0000 -r = 1.0000 -r^2 = 1.0000 -Std Err = 0.0000 -p value = 0.0000 -""" - self.assertLess(LinearRegression(x_input_array, y_input_array, alpha=alpha, display=False).p_value, alpha, - "FAIL: Linear Regression Type II error") - self.assertEqual(str(LinearRegression(x_input_array, y_input_array, alpha=alpha, display=False)), output) - - def test_351_LinRegress_no_corr(self): - """Test the Linear Regression class for uncorrelated data""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=200) - y_input_array = st.norm.rvs(size=200) - self.assertGreater(LinearRegression(x_input_array, y_input_array, alpha=alpha, display=False).p_value, alpha, - "FAIL: Linear Regression Type I error") - - def test_352_LinRegress_no_corr_slope(self): - """Test the Linear Regression slope""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=200) - y_input_array = st.norm.rvs(size=200) - self.assertAlmostEqual(LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).slope, -0.0969, delta=0.0001, - msg="FAIL: Linear Regression slope") - - def test_353_LinRegress_no_corr_intercept(self): - """Test the Linear Regression intercept""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=200) - y_input_array = st.norm.rvs(size=200) - self.assertAlmostEqual(LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).intercept, -0.0397, delta=0.0001, - msg="FAIL: Linear Regression intercept") - - def test_354_LinRegress_no_corr_r(self): - """Test the Linear Regression r""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=200) - y_input_array = st.norm.rvs(size=200) - self.assertAlmostEqual(LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).r_value, -0.1029, delta=0.0001, - msg="FAIL: Linear Regression r") - - def test_355_LinRegress_no_corr_r2(self): - """Test the Linear Regression r^2""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=200) - y_input_array = st.norm.rvs(size=200) - self.assertAlmostEqual(LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).r_squared, 0.0105, delta=0.0001, - msg="FAIL: Linear Regression r^2") - - def test_356_LinRegress_no_corr_std_err(self): - """Test the Linear Regression std err""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=200) - y_input_array = st.norm.rvs(size=200) - self.assertAlmostEqual(LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).std_err, 0.0666, delta=0.0001, - msg="FAIL: Linear Regression std err") - - def test_357_LinRegress_no_corr_just_above_min_size(self): - """Test the Linear Regression class for uncorrelated data just above minimum size""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=4) - y_input_array = st.norm.rvs(size=4) - self.assertTrue(LinearRegression(x_input_array, y_input_array, alpha=alpha, display=False).p_value, - "FAIL: Linear Regression just above minimum size") - - def test_358_LinRegress_no_corr_at_min_size(self): - """Test the Linear Regression class for uncorrelated data at minimum size""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=3) - y_input_array = st.norm.rvs(size=3) - self.assertRaises(MinimumSizeError, lambda: LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).p_value) - - def test_359_LinRegress_no_corr_unequal_vectors(self): - """Test the Linear Regression class for uncorrelated data with unequal vectors""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=184) - y_input_array = st.norm.rvs(size=200) - self.assertRaises(UnequalVectorLengthError, lambda: LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).p_value) - - def test_360_LinRegress_no_corr_empty_vector(self): - """Test the Linear Regression class for uncorrelated data with an empty vector""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = [float("nan"), "two", "three", "four", float("nan")] - y_input_array = st.norm.rvs(size=5) - self.assertRaises(NoDataError, lambda: LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).p_value) - - def test_361_LinRegress_no_corr_two_empty_vectors(self): - """Test the Linear Regression class for uncorrelated data with two empty vectors""" - alpha = 0.05 - x_input_array = [float("nan"), "two", "three", "four", float("nan")] - y_input_array = ["one", "two", float("nan"), "four", float("nan")] - self.assertRaises(NoDataError, lambda: LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).p_value) - - def test_362_LinRegress_no_corr_statistic(self): - """Test the Linear Regression R^2""" - np.random.seed(987654321) - alpha = 0.05 - x_input_array = st.norm.rvs(size=200) - y_input_array = st.norm.rvs(size=200) - self.assertAlmostEqual(LinearRegression(x_input_array, y_input_array, - alpha=alpha, - display=False).statistic, 0.0105, delta=0.0001, - msg="FAIL: Linear Regression statistic") - - def test_363_LinRegress_vector(self): - """Test the Linear Regression class with an input Vector.""" - np.random.seed(987654321) - x_input_array = range(1, 101) - y_input_array = [x * 3 for x in x_input_array] - alpha = 0.05 - output = """ - -Linear Regression ------------------ - -n = 100 -Slope = 3.0000 -Intercept = 0.0000 -r = 1.0000 -r^2 = 1.0000 -Std Err = 0.0000 -p value = 0.0000 -""" - exp = LinearRegression(Vector(x_input_array, other=y_input_array), alpha=alpha, display=False) - self.assertLess(exp.p_value, alpha, "FAIL: Linear Regression Type II error") - self.assertEqual(str(exp), output) - - def test_364_LinRegress_missing_ydata(self): - """Test the case where no ydata is given.""" - np.random.seed(987654321) - x_input_array = range(1, 101) - self.assertRaises(AttributeError, lambda: LinearRegression(x_input_array)) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_mannwhitney.py b/build/lib/sci_analysis/test/test_mannwhitney.py deleted file mode 100644 index bfac7d9..0000000 --- a/build/lib/sci_analysis/test/test_mannwhitney.py +++ /dev/null @@ -1,167 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..data import Vector -from ..analysis import MannWhitney -from ..analysis.exc import MinimumSizeError, NoDataError - - -class TestMannWhitney(unittest.TestCase): - def test_MannWhitney_matched(self): - """Test the MannWhitney U test with two matched samples""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=100) - y_input = st.weibull_min.rvs(*y_parms, size=100) - alpha = 0.05 - exp = MannWhitney(x_input, y_input, alpha=alpha, display=True) - output = """ - -Mann Whitney U Test -------------------- - -alpha = 0.0500 -u value = 4976.0000 -p value = 0.9542 - -H0: Locations are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: MannWhitney Type I error") - self.assertAlmostEqual(exp.statistic, 4976.0, delta=0.0001, msg="FAIL: MannWhitney statistic incorrect") - self.assertAlmostEqual(exp.u_value, 4976.0, delta=0.0001, msg="FAIL: MannWhitney u_value incorrect") - self.assertAlmostEqual(exp.p_value, 0.9542, delta=0.0001, msg="FAIL: MannWhitney p_value incorrect") - self.assertEqual(str(exp), output) - - def test_MannWhitney_unmatched(self): - """Test the MannWhitney U test with two unmatched samples""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [8.2] - x_input = st.weibull_min.rvs(*x_parms, size=100) - y_input = st.weibull_min.rvs(*y_parms, size=100) - alpha = 0.05 - exp = MannWhitney(x_input, y_input, alpha=alpha, display=False) - output = """ - -Mann Whitney U Test -------------------- - -alpha = 0.0500 -u value = 4068.0000 -p value = 0.0228 - -HA: Locations are not matched -""" - self.assertLess(exp.p_value, alpha, msg="FAIL: ManWhitney Type II error") - self.assertAlmostEqual(exp.statistic, 4068.0, delta=0.0001) - self.assertAlmostEqual(exp.u_value, 4068.0, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0228, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_MannWhitney_matched_just_above_min_size(self): - """Test the MannWhitney U test with matched samples just above minimum size""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=31) - y_input = st.weibull_min.rvs(*y_parms, size=31) - alpha = 0.05 - exp = MannWhitney(x_input, y_input, alpha=alpha, display=False) - output = """ - -Mann Whitney U Test -------------------- - -alpha = 0.0500 -u value = 469.0000 -p value = 0.8769 - -H0: Locations are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: MannWhitney matched just above min size") - self.assertEqual(str(exp), output) - - def test_MannWhitney_unmatched_just_above_min_size(self): - """Test the MannWhitney U test with two unmatched samples just above minimum size""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [8.2] - x_input = st.weibull_min.rvs(*x_parms, size=50) - y_input = st.weibull_min.rvs(*y_parms, size=31) - alpha = 0.1 - exp = MannWhitney(x_input, y_input, alpha=alpha, display=False) - output = """ - -Mann Whitney U Test -------------------- - -alpha = 0.1000 -u value = 597.0000 -p value = 0.0846 - -HA: Locations are not matched -""" - self.assertLess(exp.p_value, alpha, "FAIL: ManWhitney unmatched just above min size") - self.assertEqual(str(exp), output) - - def test_MannWhitney_matched_at_min_size(self): - """Test the MannWhitney U test with matched samples at minimum size""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=45) - y_input = st.weibull_min.rvs(*y_parms, size=30) - alpha = 0.05 - self.assertRaises(MinimumSizeError, lambda: MannWhitney(x_input, y_input, alpha=alpha, display=False)) - - def test_MannWhitney_one_missing_array(self): - """Test the MannWhitney U test with one missing array""" - x_input = [np.random.randint(1, 50) for _ in range(50)] - y_input = ['abcdefghijklmnop'[:np.random.randint(1, 17)] for _ in range(50)] - self.assertRaises(NoDataError, lambda: MannWhitney(x_input, y_input, display=False)) - - def test_MannWhitney_two_missing_arrays(self): - """Test the MannWhitney U test with two missing arrays""" - x_input = ['abcdefghijklmnop'[:np.random.randint(1, 17)] for _ in range(50)] - y_input = ['abcdefghijklmnop'[:np.random.randint(1, 17)] for _ in range(50)] - self.assertRaises(NoDataError, lambda: MannWhitney(x_input, y_input, display=False)) - - def test_MannWhitney_vector_input(self): - """Test the case where the input argument is a Vector object.""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=100) - y_input = st.weibull_min.rvs(*y_parms, size=100) - vector = Vector(x_input).append(Vector(y_input)) - alpha = 0.05 - exp = MannWhitney(vector, alpha=alpha, display=True) - output = """ - -Mann Whitney U Test -------------------- - -alpha = 0.0500 -u value = 4976.0000 -p value = 0.9542 - -H0: Locations are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: MannWhitney Type I error") - self.assertAlmostEqual(exp.statistic, 4976.0, delta=0.0001, msg="FAIL: MannWhitney statistic incorrect") - self.assertAlmostEqual(exp.u_value, 4976.0, delta=0.0001, msg="FAIL: MannWhitney u_value incorrect") - self.assertAlmostEqual(exp.p_value, 0.9542, delta=0.0001, msg="FAIL: MannWhitney p_value incorrect") - self.assertEqual(str(exp), output) - - def test_MannWhitney_missing_second_arg(self): - """Test the case where the second argument is missing.""" - np.random.seed(987654321) - x_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=100) - self.assertRaises(AttributeError, lambda: MannWhitney(x_input)) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_normtest.py b/build/lib/sci_analysis/test/test_normtest.py deleted file mode 100644 index 7282aa6..0000000 --- a/build/lib/sci_analysis/test/test_normtest.py +++ /dev/null @@ -1,135 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..analysis import NormTest -from ..analysis.exc import MinimumSizeError, NoDataError - - -class MyTestCase(unittest.TestCase): - def test_300_Norm_test_single(self): - """Test the normal distribution check""" - np.random.seed(987654321) - parms = [5, 0.1] - alpha = 0.05 - x_input = st.norm.rvs(*parms, size=100) - other = """ - -Shapiro-Wilk test for normality -------------------------------- - -alpha = 0.0500 -W value = 0.9880 -p value = 0.5050 - -H0: Data is normally distributed -""" - self.assertGreater(NormTest(x_input, display=False, alpha=alpha).p_value, alpha, - "FAIL: Normal test Type I error") - self.assertEqual(str(NormTest(x_input, display=False, alpha=alpha)), other) - - def test_301_Norm_test_single_fail(self): - """Test the normal distribution check fails for a different distribution""" - np.random.seed(987654321) - parms = [1.7] - alpha = 0.05 - self.assertLess(NormTest(st.weibull_min.rvs(*parms, size=100), alpha=alpha, display=False).p_value, alpha, - "FAIL: Normal test Type II error") - - def test_302_Norm_test_statistic(self): - """Test the normal distribution statistic value is set""" - np.random.seed(987654321) - parms = [5, 0.1] - alpha = 0.05 - self.assertTrue(NormTest(st.norm.rvs(*parms, size=100), alpha=alpha, display=False).statistic, - "FAIL: Normal test statistic not set") - - def test_303_Norm_test_W_value(self): - """Test the normal distribution W value is set""" - np.random.seed(987654321) - parms = [5, 0.1] - alpha = 0.05 - self.assertTrue(NormTest(st.norm.rvs(*parms, size=100), alpha=alpha, display=False).w_value, - "FAIL: Normal test W value not set") - - def test_304_Norm_test_multi_pass(self): - """Test if multiple vectors are from the normal distribution""" - np.random.seed(987654321) - alpha = 0.05 - groups = [st.norm.rvs(5, 0.1, size=100), st.norm.rvs(4, 0.75, size=75), st.norm.rvs(1, 1, size=50)] - self.assertGreater(NormTest(*groups, alpha=alpha, display=True).p_value, alpha, - "FAIL: Normal test Type I error") - - def test_305_Norm_test_multi_fail(self): - """Test if multiple vectors are from the normal distribution, with one failing""" - np.random.seed(987654321) - alpha = 0.05 - groups = [st.norm.rvs(5, 0.1, size=100), st.weibull_min.rvs(1.7, size=75), st.norm.rvs(1, 1, size=50)] - self.assertLess(NormTest(*groups, alpha=alpha, display=False).p_value, alpha, - "FAIL: Normal test Type II error") - - def test_306_Norm_test_single_just_above_min_size(self): - """Test the normal distribution at just above the minimum size""" - np.random.seed(987654321) - parms = [5, 0.1] - alpha = 0.05 - self.assertGreater(NormTest(st.norm.rvs(*parms, size=3), display=False, alpha=alpha).p_value, alpha, - "FAIL: Normal test just above the minimum size") - - def test_307_Norm_test_single_at_min_size(self): - """Test the normal distribution at the minimum size""" - np.random.seed(987654321) - parms = [5, 0.1] - alpha = 0.05 - self.assertRaises(MinimumSizeError, lambda: NormTest(st.norm.rvs(*parms, size=2), - display=False, - alpha=alpha).p_value) - - def test_308_Norm_test_multi_at_min_size(self): - """Test if multiple vectors are from the normal distribution at the minimum size""" - np.random.seed(987654321) - alpha = 0.05 - groups = [st.norm.rvs(5, 0.1, size=2), st.norm.rvs(4, 0.75, size=10), st.norm.rvs(1, 1, size=50)] - self.assertRaises(MinimumSizeError, lambda: NormTest(*groups, alpha=alpha, display=False).p_value) - - def test_309_Norm_test_multi_with_single_missing_vector(self): - """Test if multiple vectors are from the normal distribution with single vector missing""" - np.random.seed(987654321) - alpha = 0.05 - groups = [st.norm.rvs(5, 0.1, size=100), ["one", "two", "three", "four"], st.norm.rvs(1, 1, size=50)] - self.assertTrue(NormTest(*groups, alpha=alpha, display=False).p_value, - "FAIL: Normal test with single missing vector") - - def test_310_Norm_test_single_empty(self): - """Test with empty vector""" - np.random.seed(987654321) - alpha = 0.05 - self.assertRaises(NoDataError, lambda: NormTest(["one", "two", "three", "four"], - alpha=alpha, - display=False).p_value) - - def test_311_Norm_test_multi_all_empty_vectors(self): - """Test if multiple vectors are from the normal distribution with all missing vectors""" - np.random.seed(987654321) - alpha = 0.05 - groups = [[float("nan"), float("nan"), "three", float("nan")], ["one", "two", "three", "four"]] - self.assertRaises(NoDataError, lambda: NormTest(*groups, alpha=alpha, display=False).p_value) - - def test_312_Norm_test_multi_with_single_scalar(self): - """Test if multiple vectors are from the normal distribution with single scalar""" - np.random.seed(987654321) - alpha = 0.05 - groups = [st.norm.rvs(5, 0.1, size=100), "string", st.norm.rvs(1, 1, size=50)] - self.assertTrue(NormTest(*groups, alpha=alpha, display=False).p_value, - "FAIL: Normal test with single scalar should pass") - - def test_313_Norm_test_multi_with_all_scalar(self): - """Test if multiple vectors are from the normal distribution with all scalar""" - np.random.seed(987654321) - alpha = 0.05 - groups = ["this", "is", "a", "string"] - self.assertRaises(NoDataError, lambda: NormTest(*groups, alpha=alpha, display=False)) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_statistics.py b/build/lib/sci_analysis/test/test_statistics.py deleted file mode 100644 index cc8f66d..0000000 --- a/build/lib/sci_analysis/test/test_statistics.py +++ /dev/null @@ -1,207 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..analysis import VectorStatistics -from ..analysis.exc import MinimumSizeError, NoDataError - - -class MyTestCase(unittest.TestCase): - - display = True - - def test_1000_Vector_stats_count(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - ref = VectorStatistics(input_array, sample=True, display=self.display) - output = """ - -Statistics ----------- - -n = 100 -Mean = 4.0145 -Std Dev = 1.8622 -Std Error = 0.1862 -Skewness = -0.0256 -Kurtosis = -0.4830 -Maximum = 7.9315 -75% = 5.0664 -50% = 4.1331 -25% = 2.6576 -Minimum = -0.3256 -IQR = 2.4088 -Range = 8.2571""" - - self.assertEqual(ref.count, 100, "FAIL: Stat count") - self.assertEqual(str(ref), output) - - def test_1001_Vector_stats_mean(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).mean, - 4.0145, - delta=0.0001, - msg="FAIL: Stat mean") - - def test_1002_Vector_stats_std_dev_sample(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).std_dev, - 1.8622, - delta=0.0001, - msg="FAIL: Stat std dev") - - def test_1003_Vector_stats_std_dev_population(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=False, display=False).std_dev, - 1.8529, - delta=0.0001, - msg="FAIL: Stat std dev") - - def test_1004_Vector_stats_std_error_sample(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).std_err, - 0.1862, - delta=0.0001, - msg="FAIL: Stat std error") - - def test_1004_Vector_stats_std_error_population(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=False, display=False).std_err, - 0.1853, - delta=0.0001, - msg="FAIL: Stat std error") - - def test_1005_Vector_stats_skewness(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).skewness, - -0.0256, - delta=0.0001, - msg="FAIL: Stat skewness") - - def test_1006_Vector_stats_kurtosis(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).kurtosis, - -0.4830, - delta=0.0001, - msg="FAIL: Stat kurtosis") - - def test_1007_Vector_stats_maximum(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).maximum, - 7.9315, - delta=0.0001, - msg="FAIL: Stat maximum") - - def test_1008_Vector_stats_q3(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).q3, - 5.0664, - delta=0.0001, - msg="FAIL: Stat q3") - - def test_1009_Vector_stats_median(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).median, - 4.1331, - delta=0.0001, - msg="FAIL: Stat median") - - def test_1010_Vector_stats_q1(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).q1, - 2.6576, - delta=0.0001, - msg="FAIL: Stat q1") - - def test_1011_Vector_stats_minimum(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).minimum, - -0.3256, - delta=0.0001, - msg="FAIL: Stat minimum") - - def test_1012_Vector_stats_range(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).range, - 8.2571, - delta=0.0001, - msg="FAIL: Stat range") - - def test_1013_Vector_stats_iqr(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertAlmostEqual(VectorStatistics(input_array, sample=True, display=False).iqr, - 2.4088, - delta=0.0001, - msg="FAIL: Stat iqr") - - def test_1014_Vector_stats_name(self): - """Test the vector statistics class""" - np.random.seed(987654321) - parms = [4, 1.75] - input_array = st.norm.rvs(*parms, size=100) - self.assertEqual(VectorStatistics(input_array, sample=True, display=False).name, - "Statistics", - "FAIL: Stat name") - - def test_1015_Vector_stats_min_size(self): - """Test the vector statistics at min size""" - input_array = np.array([14]) - self.assertRaises(MinimumSizeError, lambda: VectorStatistics(input_array, sample=True, display=False).data) - - def test_1016_Vector_stats_just_above_min_size(self): - """Test the vector statistics just above min size""" - input_array = np.array([14, 21]) - self.assertTrue(VectorStatistics(input_array, sample=True, display=False), - "FAIL: Vector statistics at just above min size") - - def test_1017_Vector_stats_empty_array(self): - """Test the vector statistics with empty array""" - self.assertRaises(NoDataError, lambda: VectorStatistics(np.array([]), sample=True, display=False).data) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_ttest.py b/build/lib/sci_analysis/test/test_ttest.py deleted file mode 100644 index e0309be..0000000 --- a/build/lib/sci_analysis/test/test_ttest.py +++ /dev/null @@ -1,248 +0,0 @@ -import unittest -import scipy.stats as st -import numpy as np - -from ..data import Vector -from ..analysis import TTest -from ..analysis.exc import MinimumSizeError, NoDataError - - -class MyTestCase(unittest.TestCase): - # Test TTest - - def test_200_TTest_single_matched(self): - """Test the TTest against a given matched value""" - np.random.seed(987654321) - x_parms = [4, 0.75] - x_input = st.norm.rvs(*x_parms, size=100) - y_val = 4.0 - alpha = 0.05 - exp = TTest(x_input, y_val, display=False) - output = """ - -1 Sample T Test ---------------- - -alpha = 0.0500 -t value = 0.0781 -p value = 0.9379 - -H0: Means are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: TTest single type I error") - self.assertEqual(exp.test_type, '1_sample') - self.assertEqual(exp.mu, 4.0) - self.assertAlmostEqual(exp.statistic, 0.0781, delta=0.0001) - self.assertAlmostEqual(exp.t_value, 0.0781, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.9379, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_205_TTest_single_unmatched(self): - """Test the TTest against a given unmatched value""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_val = 5.0 - alpha = 0.05 - x_input = st.norm.rvs(*x_parms, size=100) - exp = TTest(x_input, y_val, display=False) - output = """ - -1 Sample T Test ---------------- - -alpha = 0.0500 -t value = -12.4518 -p value = 0.0000 - -HA: Means are significantly different -""" - self.assertFalse(exp.p_value > alpha, "FAIL: TTest single type II error") - self.assertEqual(exp.mu, 5.0) - self.assertEqual(exp.test_type, '1_sample') - self.assertAlmostEqual(exp.statistic, -12.4518, delta=0.0001) - self.assertAlmostEqual(exp.statistic, -12.4518, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.0, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_206_TTest_equal_variance_matched(self): - """Test the TTest with two samples with equal variance and matched means""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 0.75] - x_input = st.norm.rvs(*x_parms, size=100) - y_input = st.norm.rvs(*y_parms, size=100) - alpha = 0.05 - exp = TTest(x_input, y_input, display=False) - output = """ - -T Test ------- - -alpha = 0.0500 -t value = -0.2592 -p value = 0.7957 - -H0: Means are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: TTest equal variance matched Type I error") - self.assertIsNone(exp.mu) - self.assertEqual(exp.test_type, 't_test') - self.assertAlmostEqual(exp.statistic, -0.2592, delta=0.0001) - self.assertAlmostEqual(exp.t_value, -0.2592, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.7957, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_209_TTest_equal_variance_unmatched(self): - """Test the TTest with two samples with equal variance and different means""" - np.random.seed(987654321) - x_parms = [4.0, 0.75] - y_parms = [4.5, 0.75] - x_input = st.norm.rvs(*x_parms, size=100) - y_input = st.norm.rvs(*y_parms, size=100) - alpha = 0.05 - exp = TTest(x_input, y_input, display=False) - output = """ - -T Test ------- - -alpha = 0.0500 -t value = -4.6458 -p value = 0.0000 - -HA: Means are significantly different -""" - self.assertLess(exp.p_value, alpha, "FAIL: TTest equal variance unmatched Type II error") - self.assertEqual(exp.test_type, 't_test') - self.assertEqual(str(exp), output) - - def test_210_TTest_unequal_variance_matched(self): - """Test the TTest with two samples with different variances and matched means""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 1.35] - x_input = st.norm.rvs(*x_parms, size=100) - y_input = st.norm.rvs(*y_parms, size=100) - alpha = 0.05 - exp = TTest(x_input, y_input, display=False) - output = """ - -Welch's T Test --------------- - -alpha = 0.0500 -t value = -0.3487 -p value = 0.7278 - -H0: Means are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: TTest different variance matched Type I error") - self.assertEqual(exp.test_type, 'welch_t') - self.assertEqual(str(exp), output) - - def test_211_TTest_unequal_variance_unmatched(self): - """Test the TTest with two samples with different variances and different means""" - np.random.seed(987654321) - x_parms = [4.0, 0.75] - y_parms = [4.5, 1.12] - x_input = st.norm.rvs(*x_parms, size=100) - y_input = st.norm.rvs(*y_parms, size=100) - alpha = 0.05 - exp = TTest(x_input, y_input, display=True) - output = """ - -Welch's T Test --------------- - -alpha = 0.0500 -t value = -3.7636 -p value = 0.0002 - -HA: Means are significantly different -""" - self.assertLess(exp.p_value, alpha, "FAIL: TTest different variance unmatched Type II error") - self.assertEqual(exp.test_type, 'welch_t') - self.assertEqual(str(exp), output) - - def test_214_TTest_equal_variance_matched_min_size_above(self): - """Test the TTest at the minimum size threshold""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 0.75] - x_input = st.norm.rvs(*x_parms, size=4) - y_input = st.norm.rvs(*y_parms, size=4) - alpha = 0.05 - exp = TTest(x_input, y_input, display=False) - output = """ - -T Test ------- - -alpha = 0.0500 -t value = 0.9450 -p value = 0.3811 - -H0: Means are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: TTest minimum size fail") - self.assertEqual(str(exp), output) - - def test_215_TTest_equal_variance_matched_min_size_below(self): - """Test the TTest just above the minimum size threshold""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 0.75] - alpha = 0.05 - self.assertRaises(MinimumSizeError, lambda: TTest(st.norm.rvs(*x_parms, size=3), - st.norm.rvs(*y_parms, size=3), - alpha=alpha, - display=False).p_value) - - def test_216_TTest_equal_variance_matched_one_missing_array(self): - """Test the TTest test with one missing array""" - np.random.seed(987654321) - alpha = 0.05 - self.assertRaises(NoDataError, lambda: TTest([1.1, 1.0, 0.9, 0.8], - ["one", "two", "three", "four"], - alpha=alpha, - display=False).p_value) - - def test_217_TTest_with_vector_input(self): - """Test the TTest test with a vector object.""" - np.random.seed(987654321) - x_parms = [4, 0.75] - y_parms = [4, 0.75] - x_input = st.norm.rvs(*x_parms, size=100) - y_input = st.norm.rvs(*y_parms, size=100) - vector = Vector(x_input).append(Vector(y_input)) - alpha = 0.05 - exp = TTest(vector, display=False) - output = """ - -T Test ------- - -alpha = 0.0500 -t value = -0.2592 -p value = 0.7957 - -H0: Means are matched -""" - self.assertGreater(exp.p_value, alpha, "FAIL: TTest equal variance matched Type I error") - self.assertIsNone(exp.mu) - self.assertEqual(exp.test_type, 't_test') - self.assertAlmostEqual(exp.statistic, -0.2592, delta=0.0001) - self.assertAlmostEqual(exp.t_value, -0.2592, delta=0.0001) - self.assertAlmostEqual(exp.p_value, 0.7957, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_217_TTest_with_missing_second_arg(self): - """Test the case where the second argument is None.""" - np.random.seed(987654321) - x_parms = [4, 0.75] - x_input = st.norm.rvs(*x_parms, size=100) - self.assertRaises(AttributeError, lambda: TTest(x_input)) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_twosampleks.py b/build/lib/sci_analysis/test/test_twosampleks.py deleted file mode 100644 index 1140803..0000000 --- a/build/lib/sci_analysis/test/test_twosampleks.py +++ /dev/null @@ -1,145 +0,0 @@ -import unittest -import numpy as np -import scipy.stats as st - -from ..data import Vector -from ..analysis import TwoSampleKSTest -from ..analysis.exc import MinimumSizeError, NoDataError - - -class TestTwoSampleKS(unittest.TestCase): - def test_two_sample_KS_matched(self): - """Test the Two Sample KS Test with matched samples""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=20) - y_input = st.weibull_min.rvs(*y_parms, size=20) - alpha = 0.05 - exp = TwoSampleKSTest(x_input, y_input, alpha=alpha, display=False) - output = """ - -Two Sample Kolmogorov-Smirnov Test ----------------------------------- - -alpha = 0.0500 -D value = 0.2000 -p value = 0.7710 - -H0: Both samples come from the same distribution -""" - self.assertGreater(exp.p_value, alpha, "FAIL: Two Sample KS Test Type I error") - self.assertEqual(str(exp), output) - - def test_two_sample_KS_unmatched(self): - """Test the Two Sample KS Test with unmatched samples""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [8.2] - x_input = st.weibull_min.rvs(*x_parms, size=20) - y_input = st.weibull_min.rvs(*y_parms, size=20) - alpha = 0.06 - exp = TwoSampleKSTest(x_input, y_input, alpha=alpha, display=False) - output = """ - -Two Sample Kolmogorov-Smirnov Test ----------------------------------- - -alpha = 0.0600 -D value = 0.4000 -p value = 0.0591 - -HA: Samples do not come from the same distribution -""" - self.assertLess(exp.p_value, alpha, "FAIL: Two Sample KS Test Type II error") - self.assertEqual(str(exp), output) - - def test_two_sample_KS_statistic(self): - """Test the Two Sample KS Test test statistic""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=20) - y_input = st.weibull_min.rvs(*y_parms, size=20) - alpha = 0.05 - exp = TwoSampleKSTest(x_input, y_input, alpha=alpha, display=False) - self.assertAlmostEqual(exp.statistic, 0.2, delta=0.1, msg="FAIL: Two Sample KS Test statistic") - self.assertAlmostEqual(exp.d_value, 0.2, delta=0.1, msg="FAIL: Two Sample KS Test d_value") - self.assertAlmostEqual(exp.p_value, 0.771, delta=0.001, msg="FAIL: Two Sample KS Test p_value") - - def test_two_sample_KS_matched_at_min_size(self): - """Test the Two Sample KS Test with matched samples at the minimum size""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=2) - y_input = st.weibull_min.rvs(*y_parms, size=2) - alpha = 0.05 - self.assertRaises(MinimumSizeError, lambda: TwoSampleKSTest(x_input, y_input, alpha=alpha, display=False)) - - def test_two_sample_KS_matched_just_above_min_size(self): - """Test the Two Sample KS Test with matched samples just above the minimum size""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=3) - y_input = st.weibull_min.rvs(*y_parms, size=3) - alpha = 0.05 - exp = TwoSampleKSTest(x_input, y_input, alpha=alpha, display=True) - output = """ - -Two Sample Kolmogorov-Smirnov Test ----------------------------------- - -alpha = 0.0500 -D value = 0.6667 -p value = 0.3197 - -H0: Both samples come from the same distribution -""" - self.assertAlmostEqual(exp.p_value, 0.3197, delta=0.0001) - self.assertAlmostEqual(exp.statistic, 0.6667, delta=0.0001) - self.assertEqual(str(exp), output) - - def test_two_sample_KS_matched_empty(self): - """Test the Two Sample KS Test with empty vectors""" - np.random.seed(987654321) - x_input = [np.nan, np.nan, "one", np.nan] - y_input = ["one", "two", "three", "four"] - alpha = 0.05 - self.assertRaises(NoDataError, lambda: TwoSampleKSTest(x_input, y_input, alpha=alpha, display=False)) - - def test_two_sample_KS_vector_input(self): - """Test the Two Sample KS Test with a Vector object.""" - np.random.seed(987654321) - x_parms = [1.7] - y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=20) - y_input = st.weibull_min.rvs(*y_parms, size=20) - vector = Vector(x_input).append(Vector(y_input)) - alpha = 0.05 - exp = TwoSampleKSTest(vector, alpha=alpha, display=False) - output = """ - -Two Sample Kolmogorov-Smirnov Test ----------------------------------- - -alpha = 0.0500 -D value = 0.2000 -p value = 0.7710 - -H0: Both samples come from the same distribution -""" - self.assertGreater(exp.p_value, alpha, "FAIL: Two Sample KS Test Type I error") - self.assertEqual(str(exp), output) - - def test_two_sample_KS_with_missing_second_arg(self): - """Test the case where the second argument is None.""" - np.random.seed(987654321) - x_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=20) - self.assertRaises(AttributeError, lambda: TwoSampleKSTest(x_input)) - - -if __name__ == '__main__': - unittest.main() diff --git a/build/lib/sci_analysis/test/test_vector.py b/build/lib/sci_analysis/test/test_vector.py deleted file mode 100644 index 11bfc6a..0000000 --- a/build/lib/sci_analysis/test/test_vector.py +++ /dev/null @@ -1,341 +0,0 @@ -import unittest -import numpy as np -import pandas as pd -import scipy.stats as st -from ..data import Vector, UnequalVectorLengthError - - -class MyTestCase(unittest.TestCase): - # Test vector creation - - def test_100_create_vector_mixed_list(self): - """Test vector creation from a mixed list""" - input_array = [1.0, "2", '3.0', "four", 5.65] - out_array = [1.0, 2.0, 3.0, 5.65] - self.assertListEqual(out_array, Vector(input_array).data.tolist()) - - def test_101_create_vector_missing_val(self): - """Test vector creation from a missing value list""" - input_array = ["1.0", "", 3, '4.1', ""] - out_array = [1.0, 3.0, 4.1] - self.assertListEqual(out_array, Vector(input_array).data.tolist()) - - def test_102_create_vector_empty_list(self): - """Test vector creation from an empty list""" - self.assertTrue(Vector().data.empty) - - def test_103_create_vector_2dim_array(self): - """Test vector creation from a 2dim array""" - input_array = np.array([[1, 2, 3], [1, 2, 3]]) - out_array = [1.0, 2.0, 3.0, 1.0, 2.0, 3.0] - self.assertListEqual(out_array, Vector(input_array).data.tolist()) - - def test_104_create_vector_dict(self): - """Test vector creation from a dict""" - input_array = {"one": 1, "two": 2.0, "three": "3", "four": "four"} - self.assertTrue(Vector(input_array).is_empty()) - - def test_105_create_vector_tuple(self): - """Test vector creation from a tuple""" - input_array = (1, 2, 3, 4, 5) - out_array = [1., 2., 3., 4., 5.] - self.assertListEqual(out_array, Vector(input_array).data.tolist()) - - def test_106_create_vector_array(self): - """Test vector creation from an array""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=100) - test_obj = Vector(input_array) - self.assertEqual(len(test_obj), 100) - self.assertIsInstance(test_obj, Vector) - self.assertIsInstance(test_obj.data, pd.Series) - self.assertEqual(test_obj.data_type, np.dtype('float64')) - - def test_107_create_vector_array_large(self): - """Test vector creation from a large array""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=1000000) - test_obj = Vector(input_array) - self.assertEqual(len(test_obj), 1000000) - self.assertIsInstance(test_obj, Vector) - self.assertIsInstance(test_obj.data, pd.Series) - self.assertEqual(test_obj.data_type, np.dtype('float64')) - - def test_108_create_vector_from_vector(self): - """Test vector creation from a vector""" - np.random.seed(987654321) - input_array = Vector(st.norm.rvs(size=100)) - second_array = Vector(input_array) - # print(id(input_array)) - # print(id(second_array)) - self.assertEqual(second_array.data_type, np.dtype('float64')) - - def test_109_create_vector_2dim_list(self): - """Test vector creation from a 2dim list""" - input_array = [[1, 2, 3], [1, 2, 3]] - out_array = [1.0, 2.0, 3.0, 1.0, 2.0, 3.0] - self.assertListEqual(out_array, Vector(input_array).data.tolist()) - - def test_120_create_vector_none(self): - """Test vector creation from None""" - self.assertTrue(Vector(None).is_empty()) - - def test_121_vector_is_empty_empty_list(self): - """Test the vector is_empty method""" - input_array = [] - self.assertTrue(Vector(input_array).is_empty()) - - def test_122_vector_is_empty_empty_array(self): - """Test the vector is_empty method""" - input_array = np.array([]) - self.assertTrue(Vector(input_array).is_empty()) - - # Test drop nan functions - - def test_124_drop_nan(self): - """Test the drop_nan method""" - input_array = ["1.0", "", 3, '4.1', ""] - out_array = [1.0, 3.0, 4.1] - self.assertListEqual(out_array, Vector(input_array).data.tolist()) - - def test_125_drop_nan_empty(self): - """Test the drop_nan method on an empty array""" - input_array = ["one", "two", "three", "four"] - self.assertTrue(Vector(input_array).is_empty()) - - def test_126_drop_nan_intersect(self): - """Test the drop_nan_intersect method""" - input_array_1 = [1., np.nan, 3., np.nan, 5.] - input_array_2 = [11., np.nan, 13., 14., 15.] - out1 = [1., 3., 5.] - out2 = [11., 13., 15.] - vector = Vector(input_array_1, input_array_2) - self.assertListEqual(out1, vector.data.tolist()) - self.assertListEqual(out2, vector.other.tolist()) - - def test_127_drop_nan_intersect_empty(self): - """Test the drop_nan_intersect method with one empty array""" - # This test caught a bug when developing the Vector constructor refactor in 2.0.0 - input_array_2 = ["one", "two", "three", "four", "five"] - input_array_1 = [11., np.nan, 13., 14., 15.] - self.assertTrue(Vector(input_array_1, input_array_2).other.empty) - - def test_129_vector_data_prep(self): - """Test the vector data_prep method""" - np.random.seed(987654321) - input_array = st.norm.rvs(size=100) - input_array[4] = np.nan - input_array[16] = np.nan - input_array[32] = np.nan - input_array[64] = np.nan - self.assertEqual(len(Vector(input_array)), 96) - - def test_131_vector_data_prep_two_arrays(self): - """Test the vector data_prep method when there are two vectors""" - # This test caught a bug when developing the Vector constructor refactor in 2.0.0 - np.random.seed(987654321) - input_array_1 = st.norm.rvs(size=100) - input_array_2 = st.norm.rvs(size=100) - input_array_1[8] = np.nan - input_array_1[16] = np.nan - input_array_1[32] = np.nan - input_array_1[64] = np.nan - input_array_2[1] = np.nan - input_array_2[2] = np.nan - input_array_2[4] = np.nan - input_array_2[8] = np.nan - vector = Vector(input_array_1, input_array_2) - x, y = vector.data, vector.other - self.assertEqual((len(x), len(y)), (93, 93)) - - def test_132_vector_data_prep_two_unequal_arrays(self): - """Test the vector data_prep method when there are two vectors with different lengths""" - np.random.seed(987654321) - input_array_1 = st.norm.rvs(size=92) - input_array_2 = st.norm.rvs(size=100) - self.assertRaises(UnequalVectorLengthError, lambda: Vector(input_array_1, input_array_2)) - - def test_133_vector_data_prep_two_empty_arrays(self): - """Test the vector data_prep method when there are two empty vectors""" - input_array_1 = ["one", "two", "three", "four", "five"] - input_array_2 = ["three", "four", "five", "six", "seven"] - self.assertTrue(Vector(input_array_1, input_array_2).is_empty()) - - def test_134_vector_data_prep_int(self): - """Test the vector data_prep method on an int value""" - self.assertTrue(Vector(4).data.equals(pd.Series([4.], name='ind'))) - - def test_135_vector_data_prep_float(self): - """Test the vector data_prep method on an int value""" - self.assertTrue(Vector(4.0).data.equals(pd.Series([4.], name='ind'))) - - def test_136_vector_data_prep_string(self): - """Test the vector data_prep method on an int value""" - self.assertTrue(Vector("four").is_empty()) - - def test_137_basic_groupby(self): - """Test the group property produces the correct dictionary""" - ind = [1, 2, 3, 1, 2, 3, 1, 2, 3] - grp = ['a', 'b', 'c', 'c', 'a', 'b', 'b', 'c', 'a'] - groups = Vector(ind, groups=grp).groups - self.assertTrue(groups['a'].equals(pd.Series([1., 2., 3.], index=[0, 4, 8], name='a'))) - self.assertTrue(groups['b'].equals(pd.Series([2., 3., 1.], index=[1, 5, 6], name='b'))) - self.assertTrue(groups['c'].equals(pd.Series([3., 1., 2.], index=[2, 3, 7], name='c'))) - - def test_138_nan_groupby(self): - """Test the group property where certain values in data are NaN.""" - ind = [1, np.nan, 3, np.nan, 2, 3, np.nan, 2, 3] - grp = ['a', 'b', 'c', 'c', 'a', 'b', 'b', 'c', 'a'] - groups = Vector(ind, groups=grp).groups - self.assertTrue(groups['a'].equals(pd.Series([1., 2., 3.], index=[0, 4, 8], name='a'))) - self.assertTrue(groups['b'].equals(pd.Series([3.], index=[5], name='b'))) - self.assertTrue(groups['c'].equals(pd.Series([3., 2.], index=[2, 7], name='c'))) - - def test_139_nan_drop_groupby(self): - """Test the group property where certain values in data are NaN which causes a group to be dropped.""" - ind = [1, np.nan, 3, 1, 2, np.nan, np.nan, 2, 3] - grp = ['a', 'b', 'c', 'c', 'a', 'b', 'b', 'c', 'a'] - groups = Vector(ind, groups=grp).groups - self.assertTrue(groups['a'].equals(pd.Series([1., 2., 3.], index=[0, 4, 8], name='a'))) - self.assertTrue(groups['c'].equals(pd.Series([3., 1., 2.], index=[2, 3, 7], name='c'))) - self.assertNotIn('b', groups.keys()) - - def test_140_vector_groups_dtype_passed_group_names(self): - """Test to make sure the dtype of the groups column is categorical.""" - ind = [1, 2, 3, 1, 2, 3, 1, 2, 3] - grp = ['a', 'b', 'c', 'c', 'a', 'b', 'b', 'c', 'a'] - groups = Vector(ind, groups=grp) - self.assertEqual(groups.values['grp'].dtype, 'category') - - def test_141_vector_groups_dtype_passed_no_group(self): - """Test to make sure the dtype of the groups column is categorical.""" - ind = st.norm.rvs(size=1000) - groups = Vector(ind) - self.assertEqual(groups.values['grp'].dtype, 'category') - - def test_142_vector_append_existing_groups_with_new_groups(self): - """Test appending a new vector to an existing one.""" - ind1 = [1, 2, 3, 1, 2, 3, 1, 2, 3] - grp1 = ['a', 'b', 'c', 'c', 'a', 'b', 'b', 'c', 'a'] - ind2 = [1, 2, 3] - grp2 = ['d', 'd', 'd'] - input1 = Vector(ind1, groups=grp1) - input2 = Vector(ind2, groups=grp2) - new_input = input1.append(input2) - groups = new_input.groups - self.assertTrue(groups['a'].equals(pd.Series([1., 2., 3.], index=[0, 4, 8], name='a'))) - self.assertTrue(groups['b'].equals(pd.Series([2., 3., 1.], index=[1, 5, 6], name='b'))) - self.assertTrue(groups['c'].equals(pd.Series([3., 1., 2.], index=[2, 3, 7], name='c'))) - self.assertTrue(groups['d'].equals(pd.Series([1., 2., 3.], index=[9, 10, 11], name='d'))) - self.assertIn('d', groups.keys()) - - def test_143_vector_append_existing_groups_with_existing_groups(self): - """Test appending a new vector to an existing one.""" - ind1 = [1, 2, 3, 1, 2, 3, 1, 2, 3] - grp1 = ['a', 'b', 'c', 'c', 'a', 'b', 'b', 'c', 'a'] - ind2 = [1, 2, 3] - grp2 = ['a', 'b', 'c'] - input1 = Vector(ind1, groups=grp1) - input2 = Vector(ind2, groups=grp2) - new_input = input1.append(input2) - groups = new_input.groups - self.assertTrue(groups['a'].equals(pd.Series([1., 2., 3., 1.], index=[0, 4, 8, 9], name='a'))) - self.assertTrue(groups['b'].equals(pd.Series([2., 3., 1., 2.], index=[1, 5, 6, 10], name='b'))) - self.assertTrue(groups['c'].equals(pd.Series([3., 1., 2., 3.], index=[2, 3, 7, 11], name='c'))) - - def test_144_vector_append_generated_groups_1(self): - """Test appending a new vector to an existing one.""" - ind1 = [0, 1, 2, 3, 4] - ind2 = [5, 6, 7, 8, 9] - input1 = Vector(ind1) - input2 = Vector(ind2) - new_input = input1.append(input2) - groups = new_input.groups - self.assertTrue(groups[1].equals(pd.Series([0., 1., 2., 3., 4.], index=[0, 1, 2, 3, 4], name=1))) - self.assertTrue(groups[2].equals(pd.Series([5., 6., 7., 8., 9.], index=[5, 6, 7, 8, 9], name=2))) - self.assertListEqual([1, 2], list(groups.keys())) - - def test_145_vector_append_generated_groups_2(self): - """Test appending a new vector to an existing one.""" - ind1 = [0, 1, 2, 3, 4] - ind2 = [5, 6, 7, 8, 9] - ind3 = [10, 11, 12, 13, 14] - input1 = Vector(ind1) - input2 = Vector(ind2) - input3 = Vector(ind3) - new_input = input1.append(input2).append(input3) - groups = new_input.groups - self.assertTrue(groups[1].equals(pd.Series([0., 1., 2., 3., 4.], index=[0, 1, 2, 3, 4], name=1))) - self.assertTrue(groups[2].equals(pd.Series([5., 6., 7., 8., 9.], index=[5, 6, 7, 8, 9], name=2))) - self.assertTrue(groups[3].equals(pd.Series([10., 11., 12., 13., 14.], index=[10, 11, 12, 13, 14], name=3))) - self.assertListEqual([1, 2, 3], list(groups.keys())) - - def test_146_vector_append_not_a_vector(self): - """Test the error raised by appending a non-vector object.""" - input1 = [1, 2, 3, 4, 5] - input2 = [6, 7, 8, 9, 10] - self.assertRaises(ValueError, lambda: Vector(input1).append(input2)) - - def test_147_empty_vector_append_none(self): - """Test to make sure appending an empty Vector returns the original Vector.""" - input_array = [] - self.assertTrue(Vector(input_array).append(Vector(None)).data.empty) - - def test_148_vector_append_none(self): - """Test to make sure appending an empty Vector returns the original Vector.""" - input_array = [1, 2, 3, 4, 5] - self.assertTrue(Vector(input_array).append(Vector(None)).data.equals(pd.Series(input_array).astype('float'))) - - def test_149_vector_paired_groups(self): - """Test paired groups.""" - ind_x_1 = [0, 1, 2, 3, 4] - ind_y_1 = [5, 6, 7, 8, 9] - ind_x_2 = [10, 11, 12, 13, 14] - ind_y_2 = [15, 16, 17, 18, 19] - input1 = Vector(ind_x_1, other=ind_y_1) - input2 = Vector(ind_x_2, other=ind_y_2) - new_input = input1.append(input2) - groups = new_input.paired_groups - self.assertTrue(groups[1][0].equals(pd.Series([0., 1., 2., 3., 4.]))) - self.assertTrue(groups[1][1].equals(pd.Series([5., 6., 7., 8., 9.]))) - self.assertTrue(groups[2][0].equals(pd.Series([10., 11., 12., 13., 14.], index=[5, 6, 7, 8, 9]))) - self.assertTrue(groups[2][1].equals(pd.Series([15., 16., 17., 18., 19.], index=[5, 6, 7, 8, 9]))) - self.assertListEqual([1, 2], list(groups.keys())) - - def test_150_vector_flatten_singled(self): - """Test the Vector flatten method on a single vector.""" - np.random.seed(987654321) - input_array = Vector(st.norm.rvs(size=100)) - self.assertEqual(len(input_array.flatten()), 1) - self.assertTrue(input_array.data.equals(input_array.flatten()[0])) - - def test_151_vector_flatten_several_groups(self): - """Test the Vector flatten method on a a single vector with multiple groups.""" - np.random.seed(987654321) - input_array_1 = st.norm.rvs(size=100) - input_array_2 = st.norm.rvs(size=100) - input_array_3 = st.norm.rvs(size=100) - input_array = Vector(input_array_1).append(Vector(input_array_2)).append(Vector(input_array_3)) - self.assertEqual(len(input_array.flatten()), 3) - self.assertEqual(type(input_array.flatten()), tuple) - self.assertTrue(input_array.groups[1].equals(input_array.flatten()[0])) - self.assertTrue(input_array.groups[2].equals(input_array.flatten()[1])) - self.assertTrue(input_array.groups[3].equals(input_array.flatten()[2])) - - def test_152_vector_flatten_several_paired_groups(self): - """Test the Vector flatten method on a paired vector with multiple groups.""" - np.random.seed(987654321) - input_array_1 = st.norm.rvs(size=100) - input_array_2 = st.norm.rvs(size=100) - input_array_3 = st.norm.rvs(size=100) - input_array_4 = st.norm.rvs(size=100) - input_array = Vector(input_array_1, other=input_array_2).append(Vector(input_array_3, other=input_array_4)) - self.assertEqual(len(input_array.flatten()), 4) - self.assertTrue(input_array.groups[1].equals(input_array.flatten()[0])) - self.assertTrue(input_array.groups[2].equals(input_array.flatten()[1])) - self.assertTrue(input_array.paired_groups[1][1].equals(input_array.flatten()[2])) - self.assertTrue(input_array.paired_groups[2][1].equals(input_array.flatten()[3])) - - -if __name__ == '__main__': - unittest.main() diff --git a/sci_analysis.egg-info/PKG-INFO b/sci_analysis.egg-info/PKG-INFO index 8e2d0b0..ce8003d 100644 --- a/sci_analysis.egg-info/PKG-INFO +++ b/sci_analysis.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: sci-analysis -Version: 2.1.1 +Version: 2.1.2 Summary: An easy to use and powerful python-based data exploration and analysis tool Home-page: https://github.com/cmmorrow/sci-analysis Author: chris morrow From 7bdb5a35cae58504bd840f5d6c618eb5044e958b Mon Sep 17 00:00:00 2001 From: Chris Morrow Date: Fri, 20 Jul 2018 00:22:25 -0500 Subject: [PATCH 2/7] Fixed Mann Whitney min size --- sci_analysis/analysis/__init__.py | 4 ++-- sci_analysis/analysis/hypo_tests.py | 2 +- sci_analysis/test/test_analyze.py | 10 ++++++++++ sci_analysis/test/test_mannwhitney.py | 20 ++++++++++---------- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/sci_analysis/analysis/__init__.py b/sci_analysis/analysis/__init__.py index f13d546..bde07aa 100644 --- a/sci_analysis/analysis/__init__.py +++ b/sci_analysis/analysis/__init__.py @@ -165,7 +165,7 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if norm.p_value > alpha: TTest(xdata[0], xdata[1], alpha=alpha) tested.append('TTest') - elif len(xdata[0]) > 25 and len(xdata[1]) > 25: + elif len(xdata[0]) > 20 and len(xdata[1]) > 20: MannWhitney(xdata[0], xdata[1], alpha=alpha) tested.append('MannWhitney') else: @@ -222,7 +222,7 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if norm.p_value > alpha: TTest(*group_data) tested.append('TTest') - elif len(group_data[0]) > 25 and len(group_data[1]) > 25: + elif len(group_data[0]) > 20 and len(group_data[1]) > 20: MannWhitney(*group_data) tested.append('MannWhitney') else: diff --git a/sci_analysis/analysis/hypo_tests.py b/sci_analysis/analysis/hypo_tests.py index 1c976b8..0abad60 100644 --- a/sci_analysis/analysis/hypo_tests.py +++ b/sci_analysis/analysis/hypo_tests.py @@ -209,7 +209,7 @@ class MannWhitney(Test): _statistic_name = 'u value' _h0 = "H0: Locations are matched" _ha = "HA: Locations are not matched" - _min_size = 30 + _min_size = 20 def __init__(self, a, b=None, alpha=None, display=True): """ diff --git a/sci_analysis/test/test_analyze.py b/sci_analysis/test/test_analyze.py index 20a338e..d59cab4 100644 --- a/sci_analysis/test/test_analyze.py +++ b/sci_analysis/test/test_analyze.py @@ -431,6 +431,16 @@ def test_138_stacked_kw_default(self): save_to='{}test_analyze_138'.format(self.save_path)), ['Stacked Oneway', 'Kruskal']) + def test_139_stacked_two_group_mann_whitney(self): + np.random.seed(987654321) + size = 42 + df = pd.DataFrame({'input': st.weibull_max.rvs(1.2, size=size), + 'Condition': ['Group A', 'Group B'] * (size // 2)}) + self.assertEqual(analyze(df['input'], groups=df['Condition'], + debug=True, + save_to='{}test_analyze_139'.format(self.save_path)), + ['Stacked Oneway', 'MannWhitney']) + if __name__ == '__main__': unittest.main() diff --git a/sci_analysis/test/test_mannwhitney.py b/sci_analysis/test/test_mannwhitney.py index bfac7d9..575b52b 100644 --- a/sci_analysis/test/test_mannwhitney.py +++ b/sci_analysis/test/test_mannwhitney.py @@ -65,8 +65,8 @@ def test_MannWhitney_matched_just_above_min_size(self): np.random.seed(987654321) x_parms = [1.7] y_parms = [1.7] - x_input = st.weibull_min.rvs(*x_parms, size=31) - y_input = st.weibull_min.rvs(*y_parms, size=31) + x_input = st.weibull_min.rvs(*x_parms, size=21) + y_input = st.weibull_min.rvs(*y_parms, size=21) alpha = 0.05 exp = MannWhitney(x_input, y_input, alpha=alpha, display=False) output = """ @@ -75,8 +75,8 @@ def test_MannWhitney_matched_just_above_min_size(self): ------------------- alpha = 0.0500 -u value = 469.0000 -p value = 0.8769 +u value = 219.0000 +p value = 0.9799 H0: Locations are matched """ @@ -89,7 +89,7 @@ def test_MannWhitney_unmatched_just_above_min_size(self): x_parms = [1.7] y_parms = [8.2] x_input = st.weibull_min.rvs(*x_parms, size=50) - y_input = st.weibull_min.rvs(*y_parms, size=31) + y_input = st.weibull_min.rvs(*y_parms, size=21) alpha = 0.1 exp = MannWhitney(x_input, y_input, alpha=alpha, display=False) output = """ @@ -98,12 +98,12 @@ def test_MannWhitney_unmatched_just_above_min_size(self): ------------------- alpha = 0.1000 -u value = 597.0000 -p value = 0.0846 +u value = 440.0000 +p value = 0.2871 -HA: Locations are not matched +H0: Locations are matched """ - self.assertLess(exp.p_value, alpha, "FAIL: ManWhitney unmatched just above min size") + self.assertGreater(exp.p_value, alpha, "FAIL: ManWhitney unmatched just above min size") self.assertEqual(str(exp), output) def test_MannWhitney_matched_at_min_size(self): @@ -112,7 +112,7 @@ def test_MannWhitney_matched_at_min_size(self): x_parms = [1.7] y_parms = [1.7] x_input = st.weibull_min.rvs(*x_parms, size=45) - y_input = st.weibull_min.rvs(*y_parms, size=30) + y_input = st.weibull_min.rvs(*y_parms, size=20) alpha = 0.05 self.assertRaises(MinimumSizeError, lambda: MannWhitney(x_input, y_input, alpha=alpha, display=False)) From 55825881c92b3fab6803551a3f7a31f6fe3420d9 Mon Sep 17 00:00:00 2001 From: normaljosh Date: Mon, 3 Sep 2018 08:08:25 -0500 Subject: [PATCH 3/7] updated tests: ignoring futurewarning on boxplots, changed method of replacing with np.nan to .at() and changed expected results for correlation --- sci_analysis/test/test_graph_boxplots.py | 2 ++ sci_analysis/test/test_groupcorrelation.py | 12 ++++++------ sci_analysis/test/test_grouplinregress.py | 10 +++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/sci_analysis/test/test_graph_boxplots.py b/sci_analysis/test/test_graph_boxplots.py index 1bbefc1..b11dcaa 100644 --- a/sci_analysis/test/test_graph_boxplots.py +++ b/sci_analysis/test/test_graph_boxplots.py @@ -1,3 +1,5 @@ +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) import unittest import numpy as np import pandas as pd diff --git a/sci_analysis/test/test_groupcorrelation.py b/sci_analysis/test/test_groupcorrelation.py index ab7ab8b..9022f43 100644 --- a/sci_analysis/test/test_groupcorrelation.py +++ b/sci_analysis/test/test_groupcorrelation.py @@ -123,7 +123,7 @@ def test_spearman_correlation_four_groups(self): exp.r_value) self.assertTupleEqual((0.007932793279327931, 0.014029402940294028, -0.12266426642664265, 0.9005940594059406), exp.statistic) - self.assertTupleEqual((0.9375641178035645, 0.8898160391011217, 0.22405419866382636, 3.0794115586718083e-37), + self.assertTupleEqual((0.9375641178035644, 0.8898160391011217, 0.22405419866382636, 3.0794115586718083e-37), exp.p_value) self.assertEqual(str(exp), output) @@ -139,11 +139,11 @@ def test_pearson_correlation_with_missing_data(self): cs_y = np.concatenate((input_1[1], input_2[1], input_3[1], input_4[1])) grp = [1] * 100 + [2] * 100 + [3] * 100 + [4] * 100 input_array = pd.DataFrame({'a': cs_x, 'b': cs_y, 'c': grp}) - input_array['a'][24] = np.nan - input_array['a'][256] = np.nan - input_array['b'][373] = np.nan - input_array['b'][24] = np.nan - input_array['b'][128] = np.nan + input_array.at[24, 'a'] = np.nan + input_array.at[256, 'a'] = np.nan + input_array.at[24, 'b'] = np.nan + input_array.at[128, 'b'] = np.nan + input_array.at[373, 'b'] = np.nan output = """ Pearson Correlation Coefficient diff --git a/sci_analysis/test/test_grouplinregress.py b/sci_analysis/test/test_grouplinregress.py index 0362f03..25401d1 100644 --- a/sci_analysis/test/test_grouplinregress.py +++ b/sci_analysis/test/test_grouplinregress.py @@ -260,11 +260,11 @@ def test_linregress_missing_data(self): cs_y = np.concatenate((input_1[1], input_2[1], input_3[1], input_4[1])) grp = [1] * 100 + [2] * 100 + [3] * 100 + [4] * 100 input_array = pd.DataFrame({'a': cs_x, 'b': cs_y, 'c': grp}) - input_array['a'][24] = np.nan - input_array['a'][256] = np.nan - input_array['b'][373] = np.nan - input_array['b'][24] = np.nan - input_array['b'][128] = np.nan + input_array.at[24, 'a'] = np.nan + input_array.at[256, 'a'] = np.nan + input_array.at[373, 'b'] = np.nan + input_array.at[24, 'b'] = np.nan + input_array.at[128, 'b'] = np.nan output = """ Linear Regression From 55aa9ae8bb5bb82a3ea2120b9f7b0a04f8ad7a6c Mon Sep 17 00:00:00 2001 From: normaljosh Date: Mon, 3 Sep 2018 08:46:12 -0500 Subject: [PATCH 4/7] Getting changes from Graphscatter_labels, highlighting and dropped_vals --- sci_analysis/data/numeric.py | 2 + sci_analysis/graphs/vector.py | 72 ++++++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/sci_analysis/data/numeric.py b/sci_analysis/data/numeric.py index 3a7c8cc..199c766 100644 --- a/sci_analysis/data/numeric.py +++ b/sci_analysis/data/numeric.py @@ -142,6 +142,7 @@ def drop_nan(self): arr : pandas.DataFrame A copy of the Numeric object's internal Series with all NaN values removed. """ + self._dropped_vals = self._values[self._ind].isnull() return self._values.dropna(how='any', subset=[self._ind]) def drop_nan_intersect(self): @@ -154,6 +155,7 @@ def drop_nan_intersect(self): arr : pandas.DataFrame A tuple of numpy Arrays corresponding to the internal Vector and seq with all nan values removed. """ + self._dropped_vals = (self._values[self._dep].isnull() | self._values[self._ind].isnull()) return self._values.dropna(how='any', subset=[self._ind, self._dep]) @property diff --git a/sci_analysis/graphs/vector.py b/sci_analysis/graphs/vector.py index 545e959..cb952dd 100644 --- a/sci_analysis/graphs/vector.py +++ b/sci_analysis/graphs/vector.py @@ -1,6 +1,7 @@ import warnings import six from math import sqrt, fabs +import pandas as pd # matplotlib imports from matplotlib.pyplot import (show, subplot, yticks, xlabel, ylabel, figure, setp, savefig, close, xticks, @@ -9,7 +10,7 @@ from matplotlib.patches import Circle # Numpy imports -from numpy import polyfit, polyval, sort, arange, array, linspace, mgrid, vstack, reshape, std, sum, mean, median +from numpy import polyfit, polyval, sort, arange, array, linspace, mgrid, vstack, reshape, std, sum, mean, median, ndarray # Scipy imports from scipy.stats import probplot, gaussian_kde, t @@ -262,12 +263,16 @@ def __init__(self, xdata, ydata=None, **kwargs): :param _points: Display the scatter points. :param _contours: Display the density contours :param _boxplot_borders: Display the boxplot borders + :param _highlight: an array-like with points to highlight based on labels + :param _labels: a vector object with the graph labels :param _title: The title of the graph. :param _save_to: Save the graph to the specified path. :return: pass """ self._fit = kwargs['fit'] if 'fit' in kwargs else True self._points = kwargs['points'] if 'points' in kwargs else True + self._labels = kwargs['labels'] if 'labels' in kwargs else [False] + self._highlight = kwargs['highlight'] if 'highlight' in kwargs else None self._contours = kwargs['contours'] if 'contours' in kwargs else False self._contour_props = (31, 1.1) self._boxplot_borders = kwargs['boxplot_borders'] if 'boxplot_borders' in kwargs else False @@ -281,7 +286,7 @@ def __init__(self, xdata, ydata=None, **kwargs): else: raise AttributeError('ydata argument cannot be None.') else: - super(GraphScatter, self).__init__(Vector(xdata, other=ydata), xname=xname, yname=yname) + super(GraphScatter, self).__init__(Vector(xdata, other=ydata), xname=xname, yname=yname) def calc_contours(self): """ @@ -333,7 +338,7 @@ def draw(self): ------- pass """ - + # Setup the grid variables x = self._data.data y = self._data.other @@ -357,6 +362,17 @@ def draw(self): else: gs = GridSpec(self._nrows, self._ncols) + #Setup highlight and labels + if pd.Series(self._labels).tolist() != [False]: + #filter out values if drop_nan was used + self._labels = self._labels[~self._data._dropped_vals] + if len(self._labels) != len(self._data): + raise AttributeError('The length or index of passed labels does not match the length or index of xdata and ydata') + #converts to series, in case passedlabels is a list or numpy array, sets index same as ind + self._labels = pd.Series(data=self._labels, index=self._data.data.index) + if self._highlight is not None and pd.Series(self._labels).tolist() == [False]: + raise AttributeError('Must include labels to highlight by') + # Draw the main graph ax2 = subplot(gs[main_plot]) @@ -364,7 +380,28 @@ def draw(self): if self._points: # A 2-D array needs to be passed to prevent matplotlib from applying the default cmap if the size < 4. color = (self.get_color(0),) - ax2.scatter(x, y, c=color, marker='o', linewidths=0, alpha=0.6, zorder=1) + alpha_trans = 0.8 + if self._highlight: + #find index of the labels which are in the highlight list + labelmask = self._labels.isin(self._highlight) + #get x and y position of those labels + x_labels = x.loc[labelmask] + y_labels = y.loc[labelmask] + x_nolabels = x.loc[~labelmask] + y_nolabels = y.loc[~labelmask] + ax2.scatter(x_labels, y_labels, c=color, marker='o', linewidths=0, alpha=.8, zorder=1) + ax2.scatter(x_nolabels, y_nolabels, c=color, marker='o', linewidths=0, alpha=.2, zorder=1) + else: + ax2.scatter(x, y, c=color, marker='o', linewidths=0, alpha=alpha_trans, zorder=1) + + # Draw the point labels + if len(self._labels) > 1: + if self._highlight: + for k in self._labels[labelmask].index: + ax2.annotate(self._labels[k], xy=(x[k], y[k]), alpha=1) + else: + for k in x.index: + ax2.annotate(self._labels[k], xy=(x[k], y[k]), alpha=1) # Draw the contours if self._contours: @@ -432,15 +469,18 @@ def __init__(self, xdata, ydata=None, groups=None, **kwargs): :param xdata: The x-axis data. :param ydata: The y-axis data. :param _fit: Display the optional line fit. + :param _highlight: Give list of groups to highlight in scatter. :param _points: Display the scatter points. :param _contours: Display the density contours :param _boxplot_borders: Display the boxplot borders + :param _labels: a vector object with the graph labels :param _title: The title of the graph. :param _save_to: Save the graph to the specified path. :return: pass """ self._fit = kwargs['fit'] if 'fit' in kwargs else True self._points = kwargs['points'] if 'points' in kwargs else True + self._labels = kwargs['labels'] if 'labels' in kwargs else [False] self._highlight = kwargs['highlight'] if 'highlight' in kwargs else None self._boxplot_borders = kwargs['boxplot_borders'] if 'boxplot_borders' in kwargs else True self._title = kwargs['title'] if 'title' in kwargs else 'Group Bivariate' @@ -480,7 +520,7 @@ def draw(self): ------- pass """ - + # Setup the grid variables x = self._data.data y = self._data.other @@ -505,6 +545,15 @@ def draw(self): else: gs = GridSpec(self._nrows, self._ncols) + #Setup highlight and labels + if pd.Series(self._labels).tolist() != [False]: + #filter out values if drop_nan was used + self._labels = self._labels[~self._data._dropped_vals] + if len(self._labels) != len(self._data): + raise AttributeError('The length of passed labels does not match the length of xdata and ydata') + #converts to series, in case passedlabels is a list or numpy array, sets index same as ind + self._labels = pd.Series(data=self._labels, index=self._data.data.index) + # Draw the main graph ax2 = subplot(gs[main_plot]) @@ -514,7 +563,7 @@ def draw(self): if self._highlight is not None: try: if grp in self._highlight: - alpha_trans = 0.6 + alpha_trans = 0.8 else: alpha_trans = 0.2 except TypeError: @@ -528,6 +577,17 @@ def draw(self): color = (self.get_color(i),) ax2.scatter(grp_x, grp_y, c=color, marker='o', linewidths=0, alpha=alpha_trans, zorder=1, label=grp) + # Draw the point labels + if len(self._labels) > 1: + if self._highlight is not None: + if grp in self._highlight: + for k in grp_x.index: + ax2.annotate(self._labels[k], xy=(grp_x[k], grp_y[k]), alpha=1) + + else: + for k in grp_x.index: + ax2.annotate(self._labels[k], xy=(grp_x[k], grp_y[k]), alpha=1) + # Draw the fit line if self._fit: fit_x, fit_y = self.calc_fit(grp_x, grp_y) From 56b6cac86e7789ae45637fb3c65a8e17799ce25b Mon Sep 17 00:00:00 2001 From: normaljosh Date: Mon, 3 Sep 2018 08:58:35 -0500 Subject: [PATCH 5/7] Getting tests from GraphScatter_labels --- sci_analysis/test/test_graph_groupscatter.py | 42 ++++++++++++++++++++ sci_analysis/test/test_graph_scatter.py | 36 ++++++++++++++++- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/sci_analysis/test/test_graph_groupscatter.py b/sci_analysis/test/test_graph_groupscatter.py index da5eb53..b43aca2 100644 --- a/sci_analysis/test/test_graph_groupscatter.py +++ b/sci_analysis/test/test_graph_groupscatter.py @@ -372,6 +372,48 @@ def test_27_scatter_two_groups_negative_corr(self): self.assertTrue(GraphGroupScatter(input_array['a'], input_array['b'], groups=input_array['c'], save_to='{}test_group_scatter_27'.format(self.save_path))) + def test_28_scatter_two_groups_labels(self): + np.random.seed(987654321) + input_1_x = st.norm.rvs(size=100) + input_1_y = [x + st.norm.rvs(0, 0.5, size=1)[0] for x in input_1_x] + input_2_x = st.norm.rvs(size=100) + input_2_y = [(x / 2) + st.norm.rvs(0, 0.2, size=1)[0] for x in input_2_x] + input_labels_array = np.random.choice(list('ABCDE'), size=(200)) + grp = [1] * 100 + [2] * 100 + cs_x = np.concatenate((input_1_x, input_2_x)) + cs_y = np.concatenate((input_1_y, input_2_y)) + input_array = pd.DataFrame({'a': cs_x, 'b': cs_y, 'c': grp}) + self.assertTrue(GraphGroupScatter(input_array['a'], input_array['b'], groups=input_array['c'], labels=input_labels_array, + save_to='{}test_group_scatter_28'.format(self.save_path))) + + def test_29_scatter_two_groups_labels_missing_data(self): + np.random.seed(987654321) + input_1_x = st.norm.rvs(size=100) + input_1_y = [x + st.norm.rvs(0, 0.5, size=1)[0] for x in input_1_x] + input_2_x = st.norm.rvs(size=100) + input_2_y = [(x / 2) + st.norm.rvs(0, 0.2, size=1)[0] for x in input_2_x] + input_labels_array = np.random.choice(list('ABCDE'), size=(220)) + grp = [1] * 110 + [2] * 110 + cs_x = np.concatenate((input_1_x, input_2_x)) + cs_y = np.concatenate((input_1_y, input_2_y)) + indicies_x = list(np.random.randint(0, 199, 20)) + indicies_y = list(np.random.randint(0, 199, 20)) + for i in indicies_x: + cs_x = np.insert(cs_x, i, np.nan, axis=0) + for i in indicies_y: + cs_y = np.insert(cs_y, i, np.nan, axis=0) + input_array = pd.DataFrame({'a': cs_x, 'b': cs_y, 'c': grp}) + self.assertTrue(GraphGroupScatter(input_array['a'], input_array['b'], groups=input_array['c'], labels=input_labels_array, + save_to='{}test_group_scatter_29'.format(self.save_path))) + + + def test_30_groupscatter_dataframe(self): + """tests graphscater with dataframe input""" + np.random.seed(987654321) + df = pd.DataFrame(np.random.randn(100, 2), columns=list('xy')) + df['labels'] = np.random.choice(list('ABCDE'), len(df)).tolist() + df['groups'] = np.random.choice(list('XYZ'), len(df)).tolist() + self.assertTrue(GraphGroupScatter(df['x'], df['y'], groups=df['groups'], labels= df['labels'], save_to='{}test_scatter_30'.format(self.save_path))) if __name__ == '__main__': unittest.main() diff --git a/sci_analysis/test/test_graph_scatter.py b/sci_analysis/test/test_graph_scatter.py index 8f74923..ada08b5 100644 --- a/sci_analysis/test/test_graph_scatter.py +++ b/sci_analysis/test/test_graph_scatter.py @@ -2,7 +2,7 @@ import numpy as np import scipy.stats as st from os import path, getcwd - +import pandas as pd from ..graphs import GraphScatter from ..data import Vector from ..analysis.exc import NoDataError @@ -321,6 +321,40 @@ def test_132_negative_corr(self): input_y_array = np.array([3 - x + st.norm.rvs(0, 0.5, size=1) for x in input_x_array]) self.assertTrue(GraphScatter(input_x_array, input_y_array, save_to='{}test_scatter_132'.format(self.save_path))) + def test_133_missing_data_labels(self): + """Test labels where there's missing data in both arrays""" + np.random.seed(987654321) + input_x_array = st.norm.rvs(size=2000) + input_y_array = st.norm.rvs(size=2000) + input_labels_array = np.random.choice(list('ABCDE'), size=(2000)) + xmask = np.random.randint(0,2,size=input_x_array.shape).astype(np.bool) + ymask = np.random.randint(0,2,size=input_y_array.shape).astype(np.bool) + input_x_array[xmask] = np.nan + input_y_array[ymask] = np.nan + self.assertTrue(GraphScatter(input_x_array, input_y_array, labels= input_labels_array, save_to='{}test_scatter_133'.format(self.save_path))) + + def test_134_Graphscatter_labels(self): + """Generates graphscatter with labels""" + np.random.seed(987654321) + input_x_array = st.norm.rvs(size=2000) + input_y_array = st.norm.rvs(size=2000) + input_labels_array = np.random.choice(list('ABCDE'), size=(2000)) + self.assertTrue(GraphScatter(input_x_array, input_y_array, labels= input_labels_array, save_to='{}test_scatter_134'.format(self.save_path))) + + def test_135_Graphscatter_highlights(self): + """Generates graphscatter with highlights""" + np.random.seed(987654321) + input_x_array = st.norm.rvs(size=2000) + input_y_array = st.norm.rvs(size=2000) + input_labels_array = np.random.choice(list('ABCDE'), size=(2000)) + self.assertTrue(GraphScatter(input_x_array, input_y_array, labels= input_labels_array, highlight=['E'], save_to='{}test_scatter_135'.format(self.save_path))) + + def test_136_Graphscatter_dataframe(self): + """tests graphscater with dataframe input""" + np.random.seed(987654321) + df = pd.DataFrame(np.random.randn(100, 2), columns=list('xy')) + df['labels'] = np.random.choice(list('ABCDE'), len(df)).tolist() + self.assertTrue(GraphScatter(df['x'], df['y'], labels= df['labels'], save_to='{}test_scatter_136'.format(self.save_path))) if __name__ == '__main__': unittest.main() From 057babeb066152a5491785ff3a57ff52becf4cc5 Mon Sep 17 00:00:00 2001 From: normaljosh Date: Sun, 9 Sep 2018 17:24:17 -0500 Subject: [PATCH 6/7] added df_output function, to allow statistics to be output as lists of dataframes --- sci_analysis/analysis/__init__.py | 82 +++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/sci_analysis/analysis/__init__.py b/sci_analysis/analysis/__init__.py index bde07aa..78285b8 100644 --- a/sci_analysis/analysis/__init__.py +++ b/sci_analysis/analysis/__init__.py @@ -6,7 +6,7 @@ from .hypo_tests import NormTest, KSTest, TwoSampleKSTest, MannWhitney, TTest, Anova, Kruskal, EqualVariance from .comparison import LinearRegression, Correlation, GroupCorrelation, GroupLinearRegression from .stats import VectorStatistics, GroupStatistics, GroupStatisticsStacked, CategoricalStatistics - +import pandas as pd def determine_analysis_type(data, other=None, groups=None): """Attempts to determine the type of data and returns the corresponding sci_analysis Data object. @@ -62,6 +62,18 @@ def determine_analysis_type(data, other=None, groups=None): else: return Categorical(data) +def output_df(statistics_list): + df_list = [] + for stat_data in statistics_list: + #check whether to make df or series + if isinstance(stat_data._results, tuple): + df_list.append(['Overall Statistics', pd.Series(stat_data._results[0])]) + df_list.append(['Group Statistics', stat_data._results[1]]) + elif isinstance(stat_data._results, list): + df_list.append([stat_data._name, pd.DataFrame(stat_data._results)]) + else: + df_list.append([stat_data._name, pd.Series(stat_data._results)]) + return df_list def analyse(xdata, ydata=None, groups=None, **kwargs): """ @@ -132,6 +144,7 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): from ..data import (is_dict, is_iterable, is_group, is_dict_group, is_vector) from .exc import NoDataError debug = True if 'debug' in kwargs else False + df_out = True if 'df_out' in kwargs else False tested = list() if xdata is None: @@ -163,13 +176,13 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if len(xdata) == 2: norm = NormTest(*xdata, alpha=alpha, display=False) if norm.p_value > alpha: - TTest(xdata[0], xdata[1], alpha=alpha) + corr_data = TTest(xdata[0], xdata[1], alpha=alpha) tested.append('TTest') elif len(xdata[0]) > 20 and len(xdata[1]) > 20: - MannWhitney(xdata[0], xdata[1], alpha=alpha) + corr_data = MannWhitney(xdata[0], xdata[1], alpha=alpha) tested.append('MannWhitney') else: - TwoSampleKSTest(xdata[0], xdata[1], alpha=alpha) + corr_data = TwoSampleKSTest(xdata[0], xdata[1], alpha=alpha) tested.append('TwoSampleKSTest') else: e = EqualVariance(*xdata, alpha=alpha) @@ -177,12 +190,19 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): # If normally distributed and variances are equal, perform one-way ANOVA # Otherwise, perform a non-parametric Kruskal-Wallis test if e.test_type == 'Bartlett' and e.p_value > alpha: - Anova(*xdata, alpha=alpha) + corr_data = Anova(*xdata, alpha=alpha) tested.append('Anova') else: - Kruskal(*xdata, alpha=alpha) + corr_data = Kruskal(*xdata, alpha=alpha) tested.append('Kruskal') - return tested if debug else None + if df_out: + return output_df([out_stats, e, corr_data]) + else: + return None + if df_out: + return output_df([out_stats, corr_data]) + else: + return None if ydata is not None: _data = determine_analysis_type(xdata, other=ydata, groups=groups) @@ -196,17 +216,23 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): # Show the scatter plot, correlation and regression stats GraphGroupScatter(_data, **kwargs) - GroupLinearRegression(_data, alpha=alpha) - GroupCorrelation(_data, alpha=alpha) - return tested if debug else None + linreg_data = GroupLinearRegression(_data, alpha=alpha) + corr_data = GroupCorrelation(_data, alpha=alpha) + if df_out: + return output_df([linreg_data, corr_data]) + else: + return None else: tested.append('Bivariate') # Show the scatter plot, correlation and regression stats GraphScatter(_data, **kwargs) - LinearRegression(_data, alpha=alpha) - Correlation(_data, alpha=alpha) - return tested if debug else None + linreg_data = LinearRegression(_data, alpha=alpha) + corr_data = Correlation(_data, alpha=alpha) + if df_out: + return output_df([linreg_data, corr_data]) + else: + return None elif is_vector(_data) and len(_data.groups) > 1: # Compare Stacked Group Means and Variance tested.append('Stacked Oneway') @@ -220,23 +246,31 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if len(group_data) == 2: norm = NormTest(*group_data, alpha=alpha, display=False) if norm.p_value > alpha: - TTest(*group_data) + corr_data = TTest(*group_data) tested.append('TTest') elif len(group_data[0]) > 20 and len(group_data[1]) > 20: - MannWhitney(*group_data) + corr_data = MannWhitney(*group_data) tested.append('MannWhitney') else: - TwoSampleKSTest(*group_data) + corr_data = TwoSampleKSTest(*group_data) tested.append('TwoSampleKSTest') else: e = EqualVariance(*group_data, alpha=alpha) if e.test_type == 'Bartlett' and e.p_value > alpha: - Anova(*group_data, alpha=alpha) + corr_data = Anova(*group_data, alpha=alpha) tested.append('Anova') else: - Kruskal(*group_data, alpha=alpha) + corr_data = Kruskal(*group_data, alpha=alpha) tested.append('Kruskal') - return tested if debug else None + if df_out: + return output_df([out_stats, e, corr_data]) + else: + return None + if df_out: + return output_df([out_stats, corr_data]) + else: + return None + else: # Histogram and Basic Stats or Categories and Frequencies if is_vector(_data): @@ -262,11 +296,17 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): GraphHisto(_data, mean=out_stats.mean, std_dev=out_stats.std_dev, **kwargs) print(out_stats) print(fit) - return tested if debug else None + if df_out: + return output_df([out_stats, fit]) + else: + return None else: tested.append('Frequencies') # Show the histogram and stats GraphFrequency(_data, **kwargs) CategoricalStatistics(xdata, **kwargs) - return tested if debug else None + if df_out: + return output_df([out_stats, fit]) + else: + return None From 57c48fefa3bc56942397f4eddcd5d773d0238587 Mon Sep 17 00:00:00 2001 From: normaljosh Date: Sun, 9 Sep 2018 17:34:44 -0500 Subject: [PATCH 7/7] putting debug functionality back in --- sci_analysis/analysis/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sci_analysis/analysis/__init__.py b/sci_analysis/analysis/__init__.py index 78285b8..9c1e25d 100644 --- a/sci_analysis/analysis/__init__.py +++ b/sci_analysis/analysis/__init__.py @@ -198,11 +198,11 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if df_out: return output_df([out_stats, e, corr_data]) else: - return None + return tested if debug else None if df_out: return output_df([out_stats, corr_data]) else: - return None + return tested if debug else None if ydata is not None: _data = determine_analysis_type(xdata, other=ydata, groups=groups) @@ -221,7 +221,7 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if df_out: return output_df([linreg_data, corr_data]) else: - return None + return tested if debug else None else: tested.append('Bivariate') @@ -232,7 +232,7 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if df_out: return output_df([linreg_data, corr_data]) else: - return None + return tested if debug else None elif is_vector(_data) and len(_data.groups) > 1: # Compare Stacked Group Means and Variance tested.append('Stacked Oneway') @@ -265,11 +265,11 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if df_out: return output_df([out_stats, e, corr_data]) else: - return None + return tested if debug else None if df_out: return output_df([out_stats, corr_data]) else: - return None + return tested if debug else None else: # Histogram and Basic Stats or Categories and Frequencies @@ -299,7 +299,7 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if df_out: return output_df([out_stats, fit]) else: - return None + return tested if debug else None else: tested.append('Frequencies') @@ -309,4 +309,4 @@ def analyze(xdata, ydata=None, groups=None, alpha=0.05, **kwargs): if df_out: return output_df([out_stats, fit]) else: - return None + return tested if debug else None