From cffc755f871584b8f6415bf0c56ae4e8a6c8218e Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 22 Jan 2024 15:03:22 +0100 Subject: [PATCH 1/9] first implementation of the test logic, unweighted --- .coveragerc | 7 +++++++ quantipy/core/quantify/engine.py | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 000000000..77e140dbf --- /dev/null +++ b/.coveragerc @@ -0,0 +1,7 @@ +[run] +omit = + .tox/* + setup.py + **/test_* + */tests/* + */savReaderWriter/* diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index a2769f1e2..83295068a 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2280,6 +2280,39 @@ def _apply_base_flags(self, sigres, replace=True): sigres[res_col] = sigres[res_col] + flag return sigres + def sc_chi_sq(self, return_diagnostics=False, level=3.84): + """ + """ + q_unw = self.Quantity._copy() + q_unw.w = "@1" + counts = q_unw.count(margin=False, as_df=False).result + r_base = q_unw.rbase[1:] + c_base = q_unw.cbase[0][1:] + t_base = q_unw.rbase[0][0] + + subsample_pct = counts / c_base + sample_pct = r_base / t_base + diffs_direction = np.sign( + subsample_pct - sample_pct.repeat(counts.shape[1], axis=1) + ) + + # Compute First addend + helper_term_a = (r_base * c_base) / t_base + addend_a = (counts - helper_term_a) ** 2 / helper_term_a + # Compute second addend + helper_term_b = (t_base - r_base) * c_base / t_base + addend_b = ((c_base - counts) - helper_term_b) ** 2 / helper_term_b + # Chi^2 is the sum of addend_a and addend_b + cell_chi_sq_matrix = addend_a + addend_b + org_chi_sq_matrix = cell_chi_sq_matrix.copy() + + cell_chi_sq_matrix[cell_chi_sq_matrix < level] = np.NaN + result = pd.DataFrame(np.sign((cell_chi_sq_matrix * diffs_direction))) + result = result.replace(-1, "-") + result = result.replace(1, "+") + + return result + class Nest(object): """ Description of class... From af2a1c65871b31df4b742d82fcfa327be22ef17f Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 22 Jan 2024 15:04:17 +0100 Subject: [PATCH 2/9] first implementation of the test logic, unweighted --- .coveragerc | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 77e140dbf..000000000 --- a/.coveragerc +++ /dev/null @@ -1,7 +0,0 @@ -[run] -omit = - .tox/* - setup.py - **/test_* - */tests/* - */savReaderWriter/* From 9482549f854491712a3252646de31da6fd812c93 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 22 Jan 2024 16:35:03 +0100 Subject: [PATCH 3/9] adding validation and return of diagnostics --- quantipy/core/quantify/engine.py | 41 ++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index 83295068a..636efeda0 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2280,9 +2280,34 @@ def _apply_base_flags(self, sigres, replace=True): sigres[res_col] = sigres[res_col] + flag return sigres - def sc_chi_sq(self, return_diagnostics=False, level=3.84): - """ + def sc_chi_sq(self, return_diagnostics=False, level="mid"): """ + Produce a Quantum-like single-classification Chi^2 Test matrix. + + Parameters + ---------- + return_diagnostics : bool, default False + If True, a second return will also provide all interim figures used + in the computation. + level : str, default "mid" + The level of significance. The threshold map to confidence levels + of the dof = 1 Chi distribution at 90%, 95% and 99%. + + Returns + ------- + result : pd.DataFrame + The main result output of flagged differences. + diagnostics: tuple + The collection of all interim figures, i.e.: + * chi sq cell matrix + * cell counts + * sample and subsample proportions + """ + valid_levels = {"low": 2.71, "mid": 3.84, "high": 6.63} + if not level in valid_levels.keys(): + raise ValueError( + "'level' must be one of {}.".format(valid_levels.keys()) + ) q_unw = self.Quantity._copy() q_unw.w = "@1" counts = q_unw.count(margin=False, as_df=False).result @@ -2306,12 +2331,18 @@ def sc_chi_sq(self, return_diagnostics=False, level=3.84): cell_chi_sq_matrix = addend_a + addend_b org_chi_sq_matrix = cell_chi_sq_matrix.copy() - cell_chi_sq_matrix[cell_chi_sq_matrix < level] = np.NaN + cell_chi_sq_matrix[cell_chi_sq_matrix < valid_levels[level]] = np.NaN result = pd.DataFrame(np.sign((cell_chi_sq_matrix * diffs_direction))) result = result.replace(-1, "-") result = result.replace(1, "+") - - return result + if return_diagnostics: + return result, ( + org_chi_sq_matrix, + counts, r_base, c_base, t_base, + subsample_pct, sample_pct + ) + else: + return result class Nest(object): """ From e9e785c094113e91cf059af3887fe45ef48055c1 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 22 Jan 2024 16:36:30 +0100 Subject: [PATCH 4/9] formatting --- quantipy/core/quantify/engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index 636efeda0..7bac05aac 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2335,6 +2335,7 @@ def sc_chi_sq(self, return_diagnostics=False, level="mid"): result = pd.DataFrame(np.sign((cell_chi_sq_matrix * diffs_direction))) result = result.replace(-1, "-") result = result.replace(1, "+") + if return_diagnostics: return result, ( org_chi_sq_matrix, From ec1a1b86f576cd811aed57f81789e3d7a3b90111 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 22 Jan 2024 16:39:42 +0100 Subject: [PATCH 5/9] docstring updates --- quantipy/core/quantify/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index 7bac05aac..0a7451f73 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2282,7 +2282,7 @@ def _apply_base_flags(self, sigres, replace=True): def sc_chi_sq(self, return_diagnostics=False, level="mid"): """ - Produce a Quantum-like single-classification Chi^2 Test matrix. + Produce a Quantum-like single-classification Chi^2 Test result. Parameters ---------- From deec5fb78ea75f7748b4f43f4cfd9a9b915b1280 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 22 Jan 2024 17:01:11 +0100 Subject: [PATCH 6/9] clean up, order of kwargs --- quantipy/core/quantify/engine.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index 0a7451f73..c89e7c9ea 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2280,18 +2280,18 @@ def _apply_base_flags(self, sigres, replace=True): sigres[res_col] = sigres[res_col] + flag return sigres - def sc_chi_sq(self, return_diagnostics=False, level="mid"): + def sc_chi_sq(self, level="mid", return_diagnostics=False): """ Produce a Quantum-like single-classification Chi^2 Test result. Parameters ---------- + level : str, default "mid" + The level of significance. The thresholds map to confidence levels + of the dof = 1 Chi distribution at 90%, 95% and 99%. return_diagnostics : bool, default False If True, a second return will also provide all interim figures used in the computation. - level : str, default "mid" - The level of significance. The threshold map to confidence levels - of the dof = 1 Chi distribution at 90%, 95% and 99%. Returns ------- @@ -2303,7 +2303,11 @@ def sc_chi_sq(self, return_diagnostics=False, level="mid"): * cell counts * sample and subsample proportions """ - valid_levels = {"low": 2.71, "mid": 3.84, "high": 6.63} + valid_levels = { + "low": 2.71, # 90% + "mid": 3.84, # 95% + "high": 6.63 # 99% + } if not level in valid_levels.keys(): raise ValueError( "'level' must be one of {}.".format(valid_levels.keys()) From b78b2ebbfab70d8fab02cfe3c7c14801b9bddf50 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 6 Feb 2024 10:25:13 +0100 Subject: [PATCH 7/9] using cell figures always, using grand total row bases --- quantipy/core/quantify/engine.py | 38 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index c89e7c9ea..b2eed96b8 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2312,25 +2312,29 @@ def sc_chi_sq(self, level="mid", return_diagnostics=False): raise ValueError( "'level' must be one of {}.".format(valid_levels.keys()) ) - q_unw = self.Quantity._copy() - q_unw.w = "@1" - counts = q_unw.count(margin=False, as_df=False).result - r_base = q_unw.rbase[1:] - c_base = q_unw.cbase[0][1:] - t_base = q_unw.rbase[0][0] - - subsample_pct = counts / c_base - sample_pct = r_base / t_base - diffs_direction = np.sign( - subsample_pct - sample_pct.repeat(counts.shape[1], axis=1) + + q = self.Quantity._copy() + q._get_matrix() + + counts_w = q.count(margin=False, as_df=False).result + r_base_w = q.rbase[1:] + c_base_w = q.cbase[0][1:] + t_base_w = q.rbase[0][0] + + subsample_pct = counts_w / c_base_w + sample_pct = r_base_w / t_base_w + + diffs_direction = subsample_pct - sample_pct.repeat( + counts_w.shape[1], axis=1 ) + diffs_direction = np.sign(diffs_direction) # Compute First addend - helper_term_a = (r_base * c_base) / t_base - addend_a = (counts - helper_term_a) ** 2 / helper_term_a + helper_term_a = (r_base_w * c_base_w) / t_base_w + addend_a = (counts_w - helper_term_a) ** 2 / helper_term_a # Compute second addend - helper_term_b = (t_base - r_base) * c_base / t_base - addend_b = ((c_base - counts) - helper_term_b) ** 2 / helper_term_b + helper_term_b = (t_base_w - r_base_w) * c_base_w / t_base_w + addend_b = ((c_base_w - counts_w) - helper_term_b) ** 2 / helper_term_b # Chi^2 is the sum of addend_a and addend_b cell_chi_sq_matrix = addend_a + addend_b org_chi_sq_matrix = cell_chi_sq_matrix.copy() @@ -2340,10 +2344,12 @@ def sc_chi_sq(self, level="mid", return_diagnostics=False): result = result.replace(-1, "-") result = result.replace(1, "+") + result.index, result.columns = self.multiindex[0], self.multiindex[1] + if return_diagnostics: return result, ( org_chi_sq_matrix, - counts, r_base, c_base, t_base, + counts_w, r_base_w, c_base_w, t_base_w, subsample_pct, sample_pct ) else: From e4b69830f15a31d203ff7bd0bf99e47ef2eced80 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 19 Feb 2024 07:42:32 +0100 Subject: [PATCH 8/9] adjust local names to reference only figures, no weights --- quantipy/core/quantify/engine.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index b2eed96b8..c59f726fc 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2280,7 +2280,7 @@ def _apply_base_flags(self, sigres, replace=True): sigres[res_col] = sigres[res_col] + flag return sigres - def sc_chi_sq(self, level="mid", return_diagnostics=False): + def chi_square_sc(self, level="mid", return_diagnostics=False): """ Produce a Quantum-like single-classification Chi^2 Test result. @@ -2317,24 +2317,24 @@ def sc_chi_sq(self, level="mid", return_diagnostics=False): q._get_matrix() counts_w = q.count(margin=False, as_df=False).result - r_base_w = q.rbase[1:] - c_base_w = q.cbase[0][1:] - t_base_w = q.rbase[0][0] + r_base = q.rbase[1:] + c_base = q.cbase[0][1:] + t_base = q.rbase[0][0] - subsample_pct = counts_w / c_base_w - sample_pct = r_base_w / t_base_w + subsample_pct = counts / c_base + sample_pct = r_base / t_base diffs_direction = subsample_pct - sample_pct.repeat( - counts_w.shape[1], axis=1 + counts.shape[1], axis=1 ) diffs_direction = np.sign(diffs_direction) # Compute First addend - helper_term_a = (r_base_w * c_base_w) / t_base_w - addend_a = (counts_w - helper_term_a) ** 2 / helper_term_a + helper_term_a = (r_base * c_base) / t_base + addend_a = (counts - helper_term_a) ** 2 / helper_term_a # Compute second addend - helper_term_b = (t_base_w - r_base_w) * c_base_w / t_base_w - addend_b = ((c_base_w - counts_w) - helper_term_b) ** 2 / helper_term_b + helper_term_b = (t_base - r_base) * c_base / t_base + addend_b = ((c_base - counts) - helper_term_b) ** 2 / helper_term_b # Chi^2 is the sum of addend_a and addend_b cell_chi_sq_matrix = addend_a + addend_b org_chi_sq_matrix = cell_chi_sq_matrix.copy() @@ -2349,7 +2349,7 @@ def sc_chi_sq(self, level="mid", return_diagnostics=False): if return_diagnostics: return result, ( org_chi_sq_matrix, - counts_w, r_base_w, c_base_w, t_base_w, + counts, r_base, c_base, t_base, subsample_pct, sample_pct ) else: From 54c3e3bd728a40684e0ddd9f763589b6a1afd9ed Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 19 Feb 2024 07:44:06 +0100 Subject: [PATCH 9/9] adjust local names to reference only figures, no weights --- quantipy/core/quantify/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantipy/core/quantify/engine.py b/quantipy/core/quantify/engine.py index c59f726fc..0776da613 100644 --- a/quantipy/core/quantify/engine.py +++ b/quantipy/core/quantify/engine.py @@ -2316,7 +2316,7 @@ def chi_square_sc(self, level="mid", return_diagnostics=False): q = self.Quantity._copy() q._get_matrix() - counts_w = q.count(margin=False, as_df=False).result + counts = q.count(margin=False, as_df=False).result r_base = q.rbase[1:] c_base = q.cbase[0][1:] t_base = q.rbase[0][0]