diff --git a/nominal/paired_two_sample_test_of_nominal_scale.py b/nominal/paired_two_sample_test_of_nominal_scale.py index 0827515..fdde029 100755 --- a/nominal/paired_two_sample_test_of_nominal_scale.py +++ b/nominal/paired_two_sample_test_of_nominal_scale.py @@ -12,6 +12,7 @@ class PairedTwoSampleTestOfNominalScale: def test(self, data): """ + There is a question which we can answer yes (1) or no (0). data = {"Before": [1,1,1,1,1,...,0], "After": [1,1,1,1,1,...,0]} Yes No Total @@ -26,10 +27,10 @@ def test(self, data): number of Yes => No: b number of No => Yes: c """ - # check data length is 2 - if len(data.keys()) != 2 and len(data[data.keys()[0]]) != len(data[data.keys()[1]]): - print "Please check the components of your data." - print "length of data should be four" + # check if the number of samples are appropriate + if len(data.keys()) != 2 or len(data[data.keys()[0]]) != len(data[data.keys()[1]]): + print ("Please check the components of your data.") + print ("the number of each data should be equal") sys.exit() else: b = 0 @@ -39,13 +40,17 @@ def test(self, data): b += 1 elif data[(data.keys())[0]][i] == 0 and data[(data.keys())[1]][i] == 1: c += 1 - # z = abs(b-c)-1 / root(b+c) - # chi2 = pow((abs(b-c)-1), 2.0) / (b+c) + # calculating chi-square value with Yate's continuity correction (イェーツの連続修正) + chi2 = pow((abs(b-c)-1), 2.0) / (b+c) + + ''' + If there is no consideration on Yate's continuity correction: chi2 = pow(abs(b-c) - 1.0, 2.0) / (b+c) + ''' p = stats.chi2.pdf(chi2, df=1) - # pdf: probability density function - # cdf: Cumulative distribution function - # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html print "chi2 value: {}".format(chi2) print "p value: {}".format(p) return p + +if __name__ == '__main__': + pass diff --git a/nominal/unpaired_two_sample_test_of_nominal_scale.py b/nominal/unpaired_two_sample_test_of_nominal_scale.py index 193f8a4..1ddd7ec 100755 --- a/nominal/unpaired_two_sample_test_of_nominal_scale.py +++ b/nominal/unpaired_two_sample_test_of_nominal_scale.py @@ -6,27 +6,25 @@ class UnpairedTwoSampleTestOfNominalScale: def test(self, data): - # check data length - if len(data.keys()) != 2: - print "len(data.keys()) should be two" - sys.exit() - elif len(data[(data.keys())[0]]) != len(data[(data.keys())[1]]): - print "len(data[(data.keys())[0]]) and len(data[(data.keys())[1]]) should be same" + # check if the number of samples are appropriate + if len(data.keys()) != 2 or len(data[data.keys()[0]]) != len(data[data.keys()[1]]): + print ("Please check the components of your data.") + print ("the number of each data should be equal") sys.exit() else: """ - Is there any difference between the number of people who satisfies Condition1 and Yes (a) and that of people who satisfies Condition2 and Yes (c)? + Is there any difference between the number of people who satisfies Condition1 as Yes (a) and that of people who satisfies Condition2 as Yes (c)? data = {"Condition1": [a, b], "Condition2": [c, d]} - OrderedDict([('Illness', [52, 8]), ('Healty', [48, 42])]) + ex. OrderedDict([('Illness', [52, 8]), ('Healty', [48, 42])]) - Yes No Total <= sum_row + Yes No Total <= sum_row: [a+b, c+d] -------------------------------------- Condition1 a b a+b Condition2 c d c+d -------------------------------------- Total a+c b+d n (= a+b+c+d) ^ - |_ sum_column + |_ sum_column: [a+c, b+d] """ # calculate n