From 3540abb34e6ec5d02135dfb7094290e788be31e0 Mon Sep 17 00:00:00 2001 From: Jacob Luke Date: Sun, 7 Jan 2024 15:46:47 +0800 Subject: [PATCH 1/5] Rewrite delta-delta Bootstrap function 1. Initiate the array using np.empty rather than np.repeat(np.nans) 2. Add solid checking for unusual cases 3. Reform the code using proper Python guideline 4. Ditch the usage of pandas for sampling --- dabest/_stats_tools/confint_2group_diff.py | 99 +++++++++------------- nbs/API/confint_2group_diff.ipynb | 99 +++++++++------------- 2 files changed, 82 insertions(+), 116 deletions(-) diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index da073e32..41b2e444 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -155,74 +155,57 @@ def compute_delta2_bootstrapped_diff( """ rng = RandomState(PCG64(random_seed)) - x1_len = len(x1) - x2_len = len(x2) - x3_len = len(x3) - x4_len = len(x4) - out_delta_g = np.repeat(np.nan, resamples) - deltadelta = np.repeat(np.nan, resamples) - - n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len - s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4) - - sd_numerator = ( - (n_a2_b1 - 1) * s_a2_b1**2 - + (n_a1_b1 - 1) * s_a1_b1**2 - + (n_a2_b2 - 1) * s_a2_b2**2 - + (n_a1_b2 - 1) * s_a1_b2**2 - ) - sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1) + + x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4]) + + # Calculating pooled sample standard deviation + stds = [np.std(x) for x in [x1, x2, x3, x4]] + ns = [len(x) for x in [x1, x2, x3, x4]] + + sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds)) + sd_denominator = sum(n - 1 for n in ns) + + # Avoid division by zero + if sd_denominator == 0: + raise ValueError("Insufficient data to compute pooled standard deviation.") + pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator) - for i in range(int(resamples)): + # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later) + if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0: + raise ValueError("Pooled sample standard deviation is NaN or zero.") + + out_delta_g = np.empty(resamples) + deltadelta = np.empty(resamples) + + # Bootstrapping + for i in range(resamples): + # Paired or unpaired resampling if is_paired: - if (x1_len != x2_len) or (x3_len != x4_len): - raise ValueError("The two arrays do not have the same length.") - df_paired_1 = pd.DataFrame( - { - "value": np.concatenate([x1, x3]), - "array_id": np.repeat(["x1", "x3"], [x1_len, x3_len]), - } - ) - df_paired_2 = pd.DataFrame( - { - "value": np.concatenate([x2, x4]), - "array_id": np.repeat(["x2", "x4"], [x1_len, x3_len]), - } - ) - x_sample_index = rng.choice( - len(df_paired_1), len(df_paired_1), replace=True - ) - x_sample_1 = df_paired_1.loc[x_sample_index] - x_sample_2 = df_paired_2.loc[x_sample_index] - x1_sample = x_sample_1[x_sample_1["array_id"] == "x1"]["value"] - x2_sample = x_sample_2[x_sample_2["array_id"] == "x2"]["value"] - x3_sample = x_sample_1[x_sample_1["array_id"] == "x3"]["value"] - x4_sample = x_sample_2[x_sample_2["array_id"] == "x4"]["value"] + if len(x1) != len(x2) or len(x3) != len(x4): + raise ValueError("Each control group must have the same length as its corresponding test group in paired analysis.") + indices_1 = rng.choice(len(x1), len(x1), replace=True) + indices_2 = rng.choice(len(x3), len(x3), replace=True) + + x1_sample, x2_sample = x1[indices_1], x2[indices_1] + x3_sample, x4_sample = x3[indices_2], x4[indices_2] else: - df = pd.DataFrame( - { - "value": np.concatenate([x1, x2, x3, x4]), - "array_id": np.repeat( - ["x1", "x2", "x3", "x4"], [x1_len, x2_len, x3_len, x4_len] - ), - } - ) - x_sample_index = rng.choice(len(df), len(df), replace=True) - x_sample = df.loc[x_sample_index] - x1_sample = x_sample[x_sample["array_id"] == "x1"]["value"] - x2_sample = x_sample[x_sample["array_id"] == "x2"]["value"] - x3_sample = x_sample[x_sample["array_id"] == "x3"]["value"] - x4_sample = x_sample[x_sample["array_id"] == "x4"]["value"] + x1_sample = rng.choice(x1, len(x1), replace=True) + x2_sample = rng.choice(x2, len(x2), replace=True) + x3_sample = rng.choice(x3, len(x3), replace=True) + x4_sample = rng.choice(x4, len(x4), replace=True) + # Calculating deltas delta_1 = np.mean(x2_sample) - np.mean(x1_sample) delta_2 = np.mean(x4_sample) - np.mean(x3_sample) delta_delta = delta_2 - delta_1 + deltadelta[i] = delta_delta out_delta_g[i] = delta_delta / pooled_sample_sd - delta_g = ( - (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1)) - ) / pooled_sample_sd + + # Empirical delta_g calculation + delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd + return out_delta_g, delta_g, deltadelta diff --git a/nbs/API/confint_2group_diff.ipynb b/nbs/API/confint_2group_diff.ipynb index 0a887588..55328833 100644 --- a/nbs/API/confint_2group_diff.ipynb +++ b/nbs/API/confint_2group_diff.ipynb @@ -212,74 +212,57 @@ " \"\"\"\n", "\n", " rng = RandomState(PCG64(random_seed))\n", - " x1_len = len(x1)\n", - " x2_len = len(x2)\n", - " x3_len = len(x3)\n", - " x4_len = len(x4)\n", - " out_delta_g = np.repeat(np.nan, resamples)\n", - " deltadelta = np.repeat(np.nan, resamples)\n", - "\n", - " n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len\n", - " s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4)\n", - "\n", - " sd_numerator = (\n", - " (n_a2_b1 - 1) * s_a2_b1**2\n", - " + (n_a1_b1 - 1) * s_a1_b1**2\n", - " + (n_a2_b2 - 1) * s_a2_b2**2\n", - " + (n_a1_b2 - 1) * s_a1_b2**2\n", - " )\n", - " sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1)\n", + "\n", + " x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4])\n", + "\n", + " # Calculating pooled sample standard deviation\n", + " stds = [np.std(x) for x in [x1, x2, x3, x4]]\n", + " ns = [len(x) for x in [x1, x2, x3, x4]]\n", + "\n", + " sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))\n", + " sd_denominator = sum(n - 1 for n in ns)\n", + "\n", + " # Avoid division by zero\n", + " if sd_denominator == 0:\n", + " raise ValueError(\"Insufficient data to compute pooled standard deviation.\")\n", + "\n", " pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n", "\n", - " for i in range(int(resamples)):\n", + " # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later)\n", + " if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:\n", + " raise ValueError(\"Pooled sample standard deviation is NaN or zero.\")\n", + "\n", + " out_delta_g = np.empty(resamples)\n", + " deltadelta = np.empty(resamples)\n", + "\n", + " # Bootstrapping\n", + " for i in range(resamples):\n", + " # Paired or unpaired resampling\n", " if is_paired:\n", - " if (x1_len != x2_len) or (x3_len != x4_len):\n", - " raise ValueError(\"The two arrays do not have the same length.\")\n", - " df_paired_1 = pd.DataFrame(\n", - " {\n", - " \"value\": np.concatenate([x1, x3]),\n", - " \"array_id\": np.repeat([\"x1\", \"x3\"], [x1_len, x3_len]),\n", - " }\n", - " )\n", - " df_paired_2 = pd.DataFrame(\n", - " {\n", - " \"value\": np.concatenate([x2, x4]),\n", - " \"array_id\": np.repeat([\"x2\", \"x4\"], [x1_len, x3_len]),\n", - " }\n", - " )\n", - " x_sample_index = rng.choice(\n", - " len(df_paired_1), len(df_paired_1), replace=True\n", - " )\n", - " x_sample_1 = df_paired_1.loc[x_sample_index]\n", - " x_sample_2 = df_paired_2.loc[x_sample_index]\n", - " x1_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x1\"][\"value\"]\n", - " x2_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x2\"][\"value\"]\n", - " x3_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x3\"][\"value\"]\n", - " x4_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x4\"][\"value\"]\n", + " if len(x1) != len(x2) or len(x3) != len(x4):\n", + " raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n", + " indices_1 = rng.choice(len(x1), len(x1), replace=True)\n", + " indices_2 = rng.choice(len(x3), len(x3), replace=True)\n", + "\n", + " x1_sample, x2_sample = x1[indices_1], x2[indices_1]\n", + " x3_sample, x4_sample = x3[indices_2], x4[indices_2]\n", " else:\n", - " df = pd.DataFrame(\n", - " {\n", - " \"value\": np.concatenate([x1, x2, x3, x4]),\n", - " \"array_id\": np.repeat(\n", - " [\"x1\", \"x2\", \"x3\", \"x4\"], [x1_len, x2_len, x3_len, x4_len]\n", - " ),\n", - " }\n", - " )\n", - " x_sample_index = rng.choice(len(df), len(df), replace=True)\n", - " x_sample = df.loc[x_sample_index]\n", - " x1_sample = x_sample[x_sample[\"array_id\"] == \"x1\"][\"value\"]\n", - " x2_sample = x_sample[x_sample[\"array_id\"] == \"x2\"][\"value\"]\n", - " x3_sample = x_sample[x_sample[\"array_id\"] == \"x3\"][\"value\"]\n", - " x4_sample = x_sample[x_sample[\"array_id\"] == \"x4\"][\"value\"]\n", + " x1_sample = rng.choice(x1, len(x1), replace=True)\n", + " x2_sample = rng.choice(x2, len(x2), replace=True)\n", + " x3_sample = rng.choice(x3, len(x3), replace=True)\n", + " x4_sample = rng.choice(x4, len(x4), replace=True)\n", "\n", + " # Calculating deltas\n", " delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n", " delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n", " delta_delta = delta_2 - delta_1\n", + "\n", " deltadelta[i] = delta_delta\n", " out_delta_g[i] = delta_delta / pooled_sample_sd\n", - " delta_g = (\n", - " (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))\n", - " ) / pooled_sample_sd\n", + "\n", + " # Empirical delta_g calculation\n", + " delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd\n", + "\n", " return out_delta_g, delta_g, deltadelta\n", "\n", "\n", From a31bf96ce391a8a53e5d8617edf7ca96eeac2329 Mon Sep 17 00:00:00 2001 From: Jacob Luke Date: Sun, 7 Jan 2024 15:46:47 +0800 Subject: [PATCH 2/5] Rewrite delta-delta Bootstrap function 1. Initiate the array using np.empty rather than np.repeat(np.nans) 2. Add solid checking for unusual cases 3. Reform the code using proper Python guideline 4. Ditch the usage of pandas for sampling --- dabest/_stats_tools/confint_2group_diff.py | 99 +++++++++------------- nbs/API/confint_2group_diff.ipynb | 99 +++++++++------------- 2 files changed, 82 insertions(+), 116 deletions(-) diff --git a/dabest/_stats_tools/confint_2group_diff.py b/dabest/_stats_tools/confint_2group_diff.py index da073e32..41b2e444 100644 --- a/dabest/_stats_tools/confint_2group_diff.py +++ b/dabest/_stats_tools/confint_2group_diff.py @@ -155,74 +155,57 @@ def compute_delta2_bootstrapped_diff( """ rng = RandomState(PCG64(random_seed)) - x1_len = len(x1) - x2_len = len(x2) - x3_len = len(x3) - x4_len = len(x4) - out_delta_g = np.repeat(np.nan, resamples) - deltadelta = np.repeat(np.nan, resamples) - - n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len - s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4) - - sd_numerator = ( - (n_a2_b1 - 1) * s_a2_b1**2 - + (n_a1_b1 - 1) * s_a1_b1**2 - + (n_a2_b2 - 1) * s_a2_b2**2 - + (n_a1_b2 - 1) * s_a1_b2**2 - ) - sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1) + + x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4]) + + # Calculating pooled sample standard deviation + stds = [np.std(x) for x in [x1, x2, x3, x4]] + ns = [len(x) for x in [x1, x2, x3, x4]] + + sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds)) + sd_denominator = sum(n - 1 for n in ns) + + # Avoid division by zero + if sd_denominator == 0: + raise ValueError("Insufficient data to compute pooled standard deviation.") + pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator) - for i in range(int(resamples)): + # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later) + if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0: + raise ValueError("Pooled sample standard deviation is NaN or zero.") + + out_delta_g = np.empty(resamples) + deltadelta = np.empty(resamples) + + # Bootstrapping + for i in range(resamples): + # Paired or unpaired resampling if is_paired: - if (x1_len != x2_len) or (x3_len != x4_len): - raise ValueError("The two arrays do not have the same length.") - df_paired_1 = pd.DataFrame( - { - "value": np.concatenate([x1, x3]), - "array_id": np.repeat(["x1", "x3"], [x1_len, x3_len]), - } - ) - df_paired_2 = pd.DataFrame( - { - "value": np.concatenate([x2, x4]), - "array_id": np.repeat(["x2", "x4"], [x1_len, x3_len]), - } - ) - x_sample_index = rng.choice( - len(df_paired_1), len(df_paired_1), replace=True - ) - x_sample_1 = df_paired_1.loc[x_sample_index] - x_sample_2 = df_paired_2.loc[x_sample_index] - x1_sample = x_sample_1[x_sample_1["array_id"] == "x1"]["value"] - x2_sample = x_sample_2[x_sample_2["array_id"] == "x2"]["value"] - x3_sample = x_sample_1[x_sample_1["array_id"] == "x3"]["value"] - x4_sample = x_sample_2[x_sample_2["array_id"] == "x4"]["value"] + if len(x1) != len(x2) or len(x3) != len(x4): + raise ValueError("Each control group must have the same length as its corresponding test group in paired analysis.") + indices_1 = rng.choice(len(x1), len(x1), replace=True) + indices_2 = rng.choice(len(x3), len(x3), replace=True) + + x1_sample, x2_sample = x1[indices_1], x2[indices_1] + x3_sample, x4_sample = x3[indices_2], x4[indices_2] else: - df = pd.DataFrame( - { - "value": np.concatenate([x1, x2, x3, x4]), - "array_id": np.repeat( - ["x1", "x2", "x3", "x4"], [x1_len, x2_len, x3_len, x4_len] - ), - } - ) - x_sample_index = rng.choice(len(df), len(df), replace=True) - x_sample = df.loc[x_sample_index] - x1_sample = x_sample[x_sample["array_id"] == "x1"]["value"] - x2_sample = x_sample[x_sample["array_id"] == "x2"]["value"] - x3_sample = x_sample[x_sample["array_id"] == "x3"]["value"] - x4_sample = x_sample[x_sample["array_id"] == "x4"]["value"] + x1_sample = rng.choice(x1, len(x1), replace=True) + x2_sample = rng.choice(x2, len(x2), replace=True) + x3_sample = rng.choice(x3, len(x3), replace=True) + x4_sample = rng.choice(x4, len(x4), replace=True) + # Calculating deltas delta_1 = np.mean(x2_sample) - np.mean(x1_sample) delta_2 = np.mean(x4_sample) - np.mean(x3_sample) delta_delta = delta_2 - delta_1 + deltadelta[i] = delta_delta out_delta_g[i] = delta_delta / pooled_sample_sd - delta_g = ( - (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1)) - ) / pooled_sample_sd + + # Empirical delta_g calculation + delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd + return out_delta_g, delta_g, deltadelta diff --git a/nbs/API/confint_2group_diff.ipynb b/nbs/API/confint_2group_diff.ipynb index 0a887588..55328833 100644 --- a/nbs/API/confint_2group_diff.ipynb +++ b/nbs/API/confint_2group_diff.ipynb @@ -212,74 +212,57 @@ " \"\"\"\n", "\n", " rng = RandomState(PCG64(random_seed))\n", - " x1_len = len(x1)\n", - " x2_len = len(x2)\n", - " x3_len = len(x3)\n", - " x4_len = len(x4)\n", - " out_delta_g = np.repeat(np.nan, resamples)\n", - " deltadelta = np.repeat(np.nan, resamples)\n", - "\n", - " n_a1_b1, n_a2_b1, n_a1_b2, n_a2_b2 = x1_len, x2_len, x3_len, x4_len\n", - " s_a1_b1, s_a2_b1, s_a1_b2, s_a2_b2 = np.std(x1), np.std(x2), np.std(x3), np.std(x4)\n", - "\n", - " sd_numerator = (\n", - " (n_a2_b1 - 1) * s_a2_b1**2\n", - " + (n_a1_b1 - 1) * s_a1_b1**2\n", - " + (n_a2_b2 - 1) * s_a2_b2**2\n", - " + (n_a1_b2 - 1) * s_a1_b2**2\n", - " )\n", - " sd_denominator = (n_a2_b1 - 1) + (n_a1_b1 - 1) + (n_a2_b2 - 1) + (n_a1_b2 - 1)\n", + "\n", + " x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4])\n", + "\n", + " # Calculating pooled sample standard deviation\n", + " stds = [np.std(x) for x in [x1, x2, x3, x4]]\n", + " ns = [len(x) for x in [x1, x2, x3, x4]]\n", + "\n", + " sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))\n", + " sd_denominator = sum(n - 1 for n in ns)\n", + "\n", + " # Avoid division by zero\n", + " if sd_denominator == 0:\n", + " raise ValueError(\"Insufficient data to compute pooled standard deviation.\")\n", + "\n", " pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n", "\n", - " for i in range(int(resamples)):\n", + " # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later)\n", + " if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:\n", + " raise ValueError(\"Pooled sample standard deviation is NaN or zero.\")\n", + "\n", + " out_delta_g = np.empty(resamples)\n", + " deltadelta = np.empty(resamples)\n", + "\n", + " # Bootstrapping\n", + " for i in range(resamples):\n", + " # Paired or unpaired resampling\n", " if is_paired:\n", - " if (x1_len != x2_len) or (x3_len != x4_len):\n", - " raise ValueError(\"The two arrays do not have the same length.\")\n", - " df_paired_1 = pd.DataFrame(\n", - " {\n", - " \"value\": np.concatenate([x1, x3]),\n", - " \"array_id\": np.repeat([\"x1\", \"x3\"], [x1_len, x3_len]),\n", - " }\n", - " )\n", - " df_paired_2 = pd.DataFrame(\n", - " {\n", - " \"value\": np.concatenate([x2, x4]),\n", - " \"array_id\": np.repeat([\"x2\", \"x4\"], [x1_len, x3_len]),\n", - " }\n", - " )\n", - " x_sample_index = rng.choice(\n", - " len(df_paired_1), len(df_paired_1), replace=True\n", - " )\n", - " x_sample_1 = df_paired_1.loc[x_sample_index]\n", - " x_sample_2 = df_paired_2.loc[x_sample_index]\n", - " x1_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x1\"][\"value\"]\n", - " x2_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x2\"][\"value\"]\n", - " x3_sample = x_sample_1[x_sample_1[\"array_id\"] == \"x3\"][\"value\"]\n", - " x4_sample = x_sample_2[x_sample_2[\"array_id\"] == \"x4\"][\"value\"]\n", + " if len(x1) != len(x2) or len(x3) != len(x4):\n", + " raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n", + " indices_1 = rng.choice(len(x1), len(x1), replace=True)\n", + " indices_2 = rng.choice(len(x3), len(x3), replace=True)\n", + "\n", + " x1_sample, x2_sample = x1[indices_1], x2[indices_1]\n", + " x3_sample, x4_sample = x3[indices_2], x4[indices_2]\n", " else:\n", - " df = pd.DataFrame(\n", - " {\n", - " \"value\": np.concatenate([x1, x2, x3, x4]),\n", - " \"array_id\": np.repeat(\n", - " [\"x1\", \"x2\", \"x3\", \"x4\"], [x1_len, x2_len, x3_len, x4_len]\n", - " ),\n", - " }\n", - " )\n", - " x_sample_index = rng.choice(len(df), len(df), replace=True)\n", - " x_sample = df.loc[x_sample_index]\n", - " x1_sample = x_sample[x_sample[\"array_id\"] == \"x1\"][\"value\"]\n", - " x2_sample = x_sample[x_sample[\"array_id\"] == \"x2\"][\"value\"]\n", - " x3_sample = x_sample[x_sample[\"array_id\"] == \"x3\"][\"value\"]\n", - " x4_sample = x_sample[x_sample[\"array_id\"] == \"x4\"][\"value\"]\n", + " x1_sample = rng.choice(x1, len(x1), replace=True)\n", + " x2_sample = rng.choice(x2, len(x2), replace=True)\n", + " x3_sample = rng.choice(x3, len(x3), replace=True)\n", + " x4_sample = rng.choice(x4, len(x4), replace=True)\n", "\n", + " # Calculating deltas\n", " delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n", " delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n", " delta_delta = delta_2 - delta_1\n", + "\n", " deltadelta[i] = delta_delta\n", " out_delta_g[i] = delta_delta / pooled_sample_sd\n", - " delta_g = (\n", - " (np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))\n", - " ) / pooled_sample_sd\n", + "\n", + " # Empirical delta_g calculation\n", + " delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd\n", + "\n", " return out_delta_g, delta_g, deltadelta\n", "\n", "\n", From 3c97860c3cc321064cba37295770a2c35cec9202 Mon Sep 17 00:00:00 2001 From: Jacob Luke Date: Thu, 11 Jan 2024 21:04:16 +0800 Subject: [PATCH 3/5] Changes in several documentations --- CODE_OF_CONDUCT.md | 76 ++++++++++++++ CONTRIBUTING.md | 23 +++++ README.md | 98 +++++++------------ nbs/01-getting_started.ipynb | 10 +- .../posts/robust-beautiful/four_samples.csv | 16 +++ .../robust-beautiful/robust-beautiful.ipynb | 2 +- nbs/read_me.ipynb | 25 +++-- nbs/tutorials/03-proportion_plot.ipynb | 20 ++-- nbs/tutorials/05-delta_delta.ipynb | 36 +++---- 9 files changed, 206 insertions(+), 100 deletions(-) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 nbs/blog/posts/robust-beautiful/four_samples.csv diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..191d1ab4 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at joseshowh@gmail.com. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..1350d01b --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# Contributing to DABEST-Python + + +## Did you find a bug? +- Ensure the bug was not already reported by searching in [Issues](https://github.com/ACCLAB/DABEST-python/issues). Check that the bug hasn't been addressed in a closed issue. + +- If the bug isn't being addressed, open a new one. Be sure to include a title and clear description, and a [minimally reproducible code sample](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) demonstrating the expected behavior that is not occurring. + + +## Did you write a patch that fixes a bug? +- Open a new GitHub [pull request](https://help.github.com/en/articles/about-pull-requests) (PR for short) with the patch. + +- Create the PR into the development branch, which is indicated by `v{latest version number}-dev`. + +- Clearly state the problem and solution in the PR description. Include the relevant [issue number](https://guides.github.com/features/issues/) if applicable. + + +## Do you intend to add a new feature or change an existing one? +- Suggest your change by opening an issue, and adding an Enhancement tag. +- If the maintainers and the community are in favour, create a fork and start writing code. + + +DABEST is a community tool for estimation statistics and analysis. We look forward to more robust and more elegant data visualizations from you all! diff --git a/README.md b/README.md index a3f3bfb0..71148289 100644 --- a/README.md +++ b/README.md @@ -53,21 +53,21 @@ DABEST is a package for **D**ata **A**nalysis using **B**ootstrap-Coupled **EST**imation. [Estimation -statistics](https://en.wikipedia.org/wiki/Estimation_statistics) is a +statistics](https://en.wikipedia.org/wiki/Estimation_statistics) are a [simple framework](https://thenewstatistics.com/itns/) that avoids the [pitfalls](https://www.nature.com/articles/nmeth.3288) of significance -testing. It uses familiar statistical concepts: means, mean differences, -and error bars. More importantly, it focuses on the effect size of one’s -experiment/intervention, as opposed to a false dichotomy engendered by -*P* values. +testing. It employs familiar statistical concepts such as means, mean +differences, and error bars. More importantly, it focuses on the effect +size of one’s experiment or intervention, rather than succumbing to a +false dichotomy engendered by *P* values. -An estimation plot has two key features. +An estimation plot comprises two key features. -1. It presents all datapoints as a swarmplot, which orders each point - to display the underlying distribution. +1. It presents all data points as a swarm plot, ordering each point to + display the underlying distribution. -2. It presents the effect size as a **bootstrap 95% confidence - interval** on a **separate but aligned axes**. +2. It illustrates the effect size as a **bootstrap 95% confidence + interval** on a **separate but aligned axis**. ![The five kinds of estimation plots](showpiece.png "The five kinds of estimation plots.") @@ -77,21 +77,24 @@ allowing everyone access to high-quality estimation plots. ## Installation -This package is tested on Python 3.6, 3.7, and 3.8. It is highly +This package is tested on Python 3.6, 3.7, 3.8 and 3.10. It is highly recommended to download the [Anaconda distribution](https://www.continuum.io/downloads) of Python in order to obtain the dependencies easily. You can install this package via `pip`. -To install, at the command line run + +or –\> ``` shell -pip install --upgrade dabest +pip install dabest ``` You can also @@ -110,7 +113,7 @@ pip install . import pandas as pd import dabest -# Load the iris dataset. Requires internet access. +# Load the iris dataset. This step requires internet access. iris = pd.read_csv("https://github.com/mwaskom/seaborn-data/raw/master/iris.csv") # Load the above data into `dabest`. @@ -150,50 +153,16 @@ tracker](https://github.com/ACCLAB/DABEST-python/issues/new). ## Contributing All contributions are welcome; please read the [Guidelines for -contributing](https://github.com/ACCLAB/DABEST-python/blob/master/CONTRIBUTING.md) -first. +contributing](CONTRIBUTING.md) first. -We also have a [Code of -Conduct](https://github.com/ACCLAB/DABEST-python/blob/master/CODE_OF_CONDUCT.md) -to foster an inclusive and productive space. +We also have a [Code of Conduct](CODE_OF_CONDUCT.md) to foster an +inclusive and productive space. ### A wish list for new features -Currently, DABEST offers functions to handle data traditionally analyzed -with Student’s paired and unpaired t-tests. It also offers plots for -multiplexed versions of these, and the estimation counterpart to a 1-way -analysis of variance (ANOVA), the shared-control design. While these -five functions execute a large fraction of common biomedical data -analyses, there remain three others: 2-way data, time-series group data, -and proportional data. We aim to add these new functions to both the R -and Python libraries. - -- In many experiments, four groups are investigate to isolate an - interaction, for example: a genotype × drug effect. Here, wild-type - and mutant animals are each subjected to drug or sham treatments; the - data are traditionally analysed with a 2×2 ANOVA. We have received - requests by email, Twitter, and GitHub to implement an estimation - counterpart to the 2-way ANOVA. To do this, we will implement - $\Delta\Delta$ plots, in which the difference of means ($\Delta$) of - two groups is subtracted from a second two-group $\Delta$. - **Implemented in v2023.02.14.** - -- Currently, DABEST can analyse multiple paired data in a single plot, - and multiple groups with a common, shared control. However, a common - design in biomedical science is to follow the same group of subjects - over multiple, successive time points. An estimation plot for this - would combine elements of the two other designs, and could be used in - place of a repeated-measures ANOVA. **Implemented in v2023.02.14** - -- We have observed that proportional data are often analyzed in - neuroscience and other areas of biomedical research. However, compared - to other data types, the charts are frequently impoverished: often, - they omit error bars, sample sizes, and even P values—let alone effect - sizes. We would like DABEST to feature proportion charts, with error - bars and a curve for the distribution of the proportional differences. - **Implemented in v2023.02.14** - -We encourage contributions for the above features. +If you have any specific comments and ideas for new features that you +would like to share with us, please fill this +[form](https://docs.google.com/forms/d/1XnF-dkPFUYDPxH9WNXK0L2uU0uv2ZDy4paiCJuk_Kn0). ## Acknowledgements @@ -206,13 +175,20 @@ Stanislav Ott. ## Testing -To test DABEST, you will need to install -[pytest](https://docs.pytest.org/en/latest). +To test DABEST, you need to install +[pytest](https://docs.pytest.org/en/latest) and +[nbdev](https://nbdev.fast.ai/). + +- Run `pytest` in the root directory of the source distribution. This + runs the test suite in the folder `dabest/tests/mpl_image_tests`. +- Run `nbdev_test` in the root directory of the source distribution. + This runs the value assertion tests in parent folder `dabest/tests` + +The test suite ensures that the bootstrapping functions and the plotting +functions perform as expected. -Run `pytest` in the root directory of the source distribution. This runs -the test suite in the folder `dabest/tests`. The test suite will ensure -that the bootstrapping functions and the plotting functions perform as -expected. +For detailed information, please refer to the [test +folder](nbs/tests/README.md) ## DABEST in other languages diff --git a/nbs/01-getting_started.ipynb b/nbs/01-getting_started.ipynb index ad5084dd..ac0e884b 100644 --- a/nbs/01-getting_started.ipynb +++ b/nbs/01-getting_started.ipynb @@ -75,7 +75,7 @@ "Clone the [DABEST-python repo](https://github.com/ACCLAB/DABEST-python) locally (see instructions [here](https://help.github.com/articles/cloning-a-repository/)).\n", "\n", "Then, navigate to the cloned repo in the command line and run\n", - "```\n", + "``` shell\n", "$ pip install .\n", "```" ] @@ -93,9 +93,13 @@ "id": "a9f8cb3e", "metadata": {}, "source": [ - "To test DABEST, you will need to install [pytest](https://docs.pytest.org/en/latest/). \n", + "To test DABEST, you will need to install [pytest](https://docs.pytest.org/en/latest/) and [nbdev](https://nbdev.fast.ai/). \n", + "\n", + "Run ``nbdev_export && nbdev_test`` in the root directory of the source distribution. This runs the value assertion tests in ``dabest/tests`` folder\n", + "\n", + "Run ``pytest`` in the root directory of the source distribution. This runs the image-based tests in ``dabest/tests/mpl_image_tests`` sub folder.\n", "\n", - "Run ``pytest`` in the root directory of the source distribution. This runs the test suite in ``dabest/tests`` folder including also the image-based tests of the ``mpl_image_tests`` sub folder. The test suite will ensure that the bootstrapping functions and the plotting functions perform as expected.\n", + "The test suite will ensure that the bootstrapping functions and the plotting functions perform as expected.\n", "\n" ] }, diff --git a/nbs/blog/posts/robust-beautiful/four_samples.csv b/nbs/blog/posts/robust-beautiful/four_samples.csv new file mode 100644 index 00000000..547296b5 --- /dev/null +++ b/nbs/blog/posts/robust-beautiful/four_samples.csv @@ -0,0 +1,16 @@ +A,B,C,D +8.109188439895592,9.33184689521894,9.354787823122058,12.672612419124242 +10.131749781263766,9.33184689521894,9.455474658800501,7.66146433397944 +7.178065881431648,14.342995181076896,9.577728760474645,7.66146410459298 +7.070698283373329,6.826272752289961,9.580692407448097,7.66146410459298 +14.530513949101707,11.837421038147918,8.976055745893488,7.66146410459298 +12.837160139759924,9.33184689521894,13.280120639175362,12.672612419124242 +8.98967523846707,9.33184689521894,5.284223374586355,7.66146410459298 +9.548347716611921,14.342995181076896,9.580597428100837,12.672612419124242 +10.98994063849879,9.33184689521894,9.547969576803277,7.66146410459298 +12.402350479094743,6.826272752289961,9.435510391485826,7.66146410459298 +11.694550072150143,6.826272752289961,9.277034488877351,12.672612419124242 +4.8799780463809785,9.33184689521894,9.389691597155812,12.672612419124242 +9.528364669906528,9.33184689521894,9.586309213728654,12.672612419124242 +9.392042031837274,9.33184689521894,17.540147276068026,7.66146410459298 +12.717374632226587,14.342995181076896,10.13365661827971,12.672612419124242 diff --git a/nbs/blog/posts/robust-beautiful/robust-beautiful.ipynb b/nbs/blog/posts/robust-beautiful/robust-beautiful.ipynb index e913c1aa..703526fb 100644 --- a/nbs/blog/posts/robust-beautiful/robust-beautiful.ipynb +++ b/nbs/blog/posts/robust-beautiful/robust-beautiful.ipynb @@ -69,7 +69,7 @@ "In the above figure, four different samples with wildly different\n", "distributions--as seen in the swarmplot on the left panel--look exactly\n", "the same when visualized with a barplot on the right panel. (You can\n", - "download the [dataset](_static/four_samples.csv) to see for yourself.)\n", + "download the [dataset](four_samples.csv) to see for yourself.)\n", "\n", "We're not the first ones (see these articles:\n", "[article 1](https://www.nature.com/articles/nmeth.2837),\n", diff --git a/nbs/read_me.ipynb b/nbs/read_me.ipynb index 9f6d0519..4a5005cb 100644 --- a/nbs/read_me.ipynb +++ b/nbs/read_me.ipynb @@ -88,13 +88,14 @@ "You can install this package via `pip`.\n", "\n", "To install, at the command line run\n", - "\n", "```shell\n", - "pip install --upgrade dabest\n", + "pip install dabest\n", "```\n", "You can also [clone](https://help.github.com/articles/cloning-a-repository) this repo locally.\n", "\n", @@ -161,9 +162,9 @@ "source": [ "## Contributing\n", "\n", - "All contributions are welcome; please read the [Guidelines for contributing](https://github.com/ACCLAB/DABEST-python/blob/master/CONTRIBUTING.md) first.\n", + "All contributions are welcome; please read the [Guidelines for contributing](CONTRIBUTING.md) first.\n", "\n", - "We also have a [Code of Conduct](https://github.com/ACCLAB/DABEST-python/blob/master/CODE_OF_CONDUCT.md) to foster an inclusive and productive space.\n" + "We also have a [Code of Conduct](CODE_OF_CONDUCT.md) to foster an inclusive and productive space.\n" ] }, { @@ -172,7 +173,7 @@ "metadata": {}, "source": [ "### A wish list for new features\n", - "If you have any specific comments and ideas for new features that you would like to share with us, please fill this form. **Add the link to a google doc form**" + "If you have any specific comments and ideas for new features that you would like to share with us, please fill this [form](https://docs.google.com/forms/d/1XnF-dkPFUYDPxH9WNXK0L2uU0uv2ZDy4paiCJuk_Kn0)." ] }, { @@ -187,15 +188,25 @@ "\n", "## Testing\n", "\n", - "To test DABEST, you need to install [pytest](https://docs.pytest.org/en/latest).\n", + "To test DABEST, you need to install [pytest](https://docs.pytest.org/en/latest) and [nbdev](https://nbdev.fast.ai/).\n", "\n", - "Run `pytest` in the root directory of the source distribution. This runs the test suite in the folder `dabest/tests`. The test suite ensures that the bootstrapping functions and the plotting functions perform as expected.\n", + "- Run `pytest` in the root directory of the source distribution. This runs the test suite in the folder `dabest/tests/mpl_image_tests`. \n", + "- Run `nbdev_test` in the root directory of the source distribution. This runs the value assertion tests in parent folder `dabest/tests`\n", "\n", + "The test suite ensures that the bootstrapping functions and the plotting functions perform as expected.\n", + "\n", + "For detailed information, please refer to the [test folder](nbs/tests/README.md)\n", "\n", "## DABEST in other languages\n", "\n", "DABEST is also available in R ([dabestr](https://github.com/ACCLAB/dabestr)) and Matlab ([DABEST-Matlab](https://github.com/ACCLAB/DABEST-Matlab)).\n" ] + }, + { + "cell_type": "markdown", + "id": "7106313a", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/nbs/tutorials/03-proportion_plot.ipynb b/nbs/tutorials/03-proportion_plot.ipynb index 61c73919..05416336 100644 --- a/nbs/tutorials/03-proportion_plot.ipynb +++ b/nbs/tutorials/03-proportion_plot.ipynb @@ -267,7 +267,7 @@ "id": "7070baac", "metadata": {}, "source": [ - "### Convenient funtion to create a dataset for Unpaired Proportional Plot" + "### Convenient funtion to create a dataset for Unpaired Proportion Plot" ] }, { @@ -542,7 +542,7 @@ "id": "f8e4b193", "metadata": {}, "source": [ - "## Generating proportional plots" + "## Generating proportion plots" ] }, { @@ -758,7 +758,7 @@ "id": "67dbf66e", "metadata": {}, "source": [ - "## Generating paired proportion plots" + "## Generating Results" ] }, { @@ -973,7 +973,7 @@ "id": "3e649272", "metadata": {}, "source": [ - "## Generating paired proportional plots" + "## Generating Sankey plots for paired proportions and repeated-measures proportions" ] }, { @@ -982,9 +982,9 @@ "id": "e6c37cd5", "metadata": {}, "source": [ - "For the paired version of the proportional plot, we adopt the style of a Sankey Diagram. The width of each bar in each xtick represents the proportion of the corresponding label in the group, and the strip denotes the paired relationship for each observation.\n", + "For the paired version of the proportion plot, we adopt the style of a Sankey Diagram. The width of each bar in each xtick represents the proportion of the corresponding label in the group, and the strip denotes the paired relationship for each observation.\n", "\n", - "Starting from v2023.3.29, the paired version of the proportional plot receives a major upgrade We introduce the ``sankey`` and ``flow`` parameters to control the plot. By default, both ``sankey`` and ``flow`` are set to True to cater the needs of repeated measures. When ``sankey`` is set to False, DABEST will generate a bar plot with a similar aesthetic to the paired proportional plot. When ``flow`` is set to False, each group of comparsion forms a Sankey diagram that does not connect to other groups of comparison.\n", + "Starting from v2023.3.29, the paired version of the proportion plot receives a major upgrade We introduce the ``sankey`` and ``flow`` parameters to control the plot. By default, both ``sankey`` and ``flow`` are set to True to cater the needs of repeated measures. When ``sankey`` is set to False, DABEST will generate a bar plot with a similar aesthetic to the paired proportion plot. When ``flow`` is set to False, each group of comparsion forms a Sankey diagram that does not connect to other groups of comparison.\n", "\n", "Similar to the unpaired version, the ``.plot()`` method is used to produce a **Gardner-Altman estimation plot**, the only difference is that\n", "the ``is_paired`` parameter is set to either ``baseline`` or ``sequential`` when loading data.\n" @@ -1028,7 +1028,7 @@ "id": "6984eaf5", "metadata": {}, "source": [ - "The paired proportional plot also supports the ``float_contrast`` parameter, which can be set to ``False`` to produce a **Cumming estimation plot**.\n" + "The Sankey plots for paired proportions also supports the ``float_contrast`` parameter, which can be set to ``False`` to produce a **Cumming estimation plot**.\n" ] }, { @@ -1060,7 +1060,7 @@ "source": [ "The upper part (grey section) of the bar represents the proportion of observations in the dataset that do not belong to the category, equivalent to the proportion of 0 in the data. The lower part, conversely, represents the proportion of observations that belong to the category, synonymous with **success**, equivalent to the proportion of 1 in the data. \n", "\n", - "Repeated measures are also supported in the paired proportional plot. By adjusting the ``is_paired`` parameter, two types of plot can be generated.\n", + "Repeated measures are also supported in the Sankey plots for paired proportions. By adjusting the ``is_paired`` parameter, two types of plot can be generated.\n", "\n", "By default, the raw data plot (upper part) in both ``baseline`` and ``sequential`` repeated measures remains the same; the only difference is the lower part. For detailed information about repeated measures, please refer to :doc:`repeatedmeasures` ." ] @@ -1181,7 +1181,7 @@ "id": "f7600b4d", "metadata": {}, "source": [ - "By changing the ``sankey`` and ``flow`` parameters, you can generate different types of paired proportional plots." + "By changing the ``sankey`` and ``flow`` parameters, you can generate different types of Sankey plots for paired proportions." ] }, { @@ -1246,7 +1246,7 @@ "id": "e686109e", "metadata": {}, "source": [ - "Several exclusive parameters can be provided to the ``plot()`` method to customize the paired proportional plot.\n", + "Several exclusive parameters can be provided to the ``plot()`` method to customize the Sankey plots for paired proportions.\n", "By modifying the sankey_kwargs parameter, you can customize the Sankey plot. The following parameters are supported:\n", "\n", "- **width**: The width of each Sankey bar. Default is 0.5.\n", diff --git a/nbs/tutorials/05-delta_delta.ipynb b/nbs/tutorials/05-delta_delta.ipynb index 2690b963..3920d0f5 100644 --- a/nbs/tutorials/05-delta_delta.ipynb +++ b/nbs/tutorials/05-delta_delta.ipynb @@ -607,7 +607,7 @@ } ], "source": [ - "unpaired_delta2.delta_g.plot();\n" + "unpaired_delta2.delta_g.plot();" ] }, { @@ -759,23 +759,6 @@ "unpaired_delta2.mean_diff.delta_delta" ] }, - { - "cell_type": "markdown", - "id": "3ba800cc", - "metadata": {}, - "source": [ - "The ``delta_delta`` object has its own attributes, containing various information of delta - delta.\n", - "\n", - " - ``difference``: the mean bootstrapped differences between the 2 groups of bootstrapped mean differences \n", - " - ``bootstraps``: the 2 groups of bootstrapped mean differences \n", - " - ``bootstraps_delta_delta``: the bootstrapped differences between the 2 groups of bootstrapped mean differences \n", - " - ``permutations``: the mean difference between the two groups of bootstrapped mean differences calculated based on the permutation data\n", - " - ``permutations_var``: the pooled group variances of two groups of bootstrapped mean differences calculated based on permutation data\n", - " - ``permutations_delta_delta``: the delta-delta calculated based on the permutation data\n", - "\n", - "``delta_delta.to_dict()`` will return all the attributes in a dictionary format." - ] - }, { "cell_type": "markdown", "id": "75dde9a4", @@ -816,6 +799,23 @@ "source": [ "unpaired_delta2.delta_g.delta_delta" ] + }, + { + "cell_type": "markdown", + "id": "3ba800cc", + "metadata": {}, + "source": [ + "The ``delta_delta`` object has its own attributes, containing various information of delta - delta.\n", + "\n", + " - ``difference``: the mean bootstrapped differences between the 2 groups of bootstrapped mean differences \n", + " - ``bootstraps``: the 2 groups of bootstrapped mean differences \n", + " - ``bootstraps_delta_delta``: the bootstrapped differences between the 2 groups of bootstrapped mean differences \n", + " - ``permutations``: the mean difference between the two groups of bootstrapped mean differences calculated based on the permutation data\n", + " - ``permutations_var``: the pooled group variances of two groups of bootstrapped mean differences calculated based on permutation data\n", + " - ``permutations_delta_delta``: the delta-delta calculated based on the permutation data\n", + "\n", + "``delta_delta.to_dict()`` will return all the attributes in a dictionary format." + ] } ], "metadata": { From b73a8009bce75ff03c0a57c6423e9760a4010863 Mon Sep 17 00:00:00 2001 From: Jacob Luke Date: Fri, 12 Jan 2024 09:31:03 +0800 Subject: [PATCH 4/5] Small fixes to documentation and url config --- dabest/_modidx.py | 4 ++-- nbs/nbdev.yml | 4 ++-- nbs/tutorials/01-basics.ipynb | 4 ++-- nbs/tutorials/03-proportion_plot.ipynb | 2 +- nbs/tutorials/04-mini_meta_delta.ipynb | 2 +- settings.ini | 2 +- setup.py | 3 +++ 7 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dabest/_modidx.py b/dabest/_modidx.py index 6fa1525a..02255adb 100644 --- a/dabest/_modidx.py +++ b/dabest/_modidx.py @@ -2,8 +2,8 @@ d = { 'settings': { 'branch': 'master', 'doc_baseurl': '/DABEST-python', - 'doc_host': 'https://ZHANGROU-99.github.io', - 'git_url': 'https://github.com/ZHANGROU-99/DABEST-python', + 'doc_host': 'https://acclab.github.io', + 'git_url': 'https://github.com/acclab/DABEST-python', 'lib_path': 'dabest'}, 'syms': { 'dabest._stats_tools.confint_1group': { 'dabest._stats_tools.confint_1group.compute_1group_acceleration': ( 'API/confint_1group.html#compute_1group_acceleration', 'dabest/_stats_tools/confint_1group.py'), diff --git a/nbs/nbdev.yml b/nbs/nbdev.yml index d5ab7123..34cfc3c6 100644 --- a/nbs/nbdev.yml +++ b/nbs/nbdev.yml @@ -3,7 +3,7 @@ project: website: title: "dabest" - site-url: "https://ZHANGROU-99.github.io/DABEST-python" + site-url: "https://acclab.github.io/DABEST-python" description: "Data Analysis and Visualization using Bootstrap-Coupled Estimation." repo-branch: master - repo-url: "https://github.com/ZHANGROU-99/DABEST-python" + repo-url: "https://github.com/acclab/DABEST-python" diff --git a/nbs/tutorials/01-basics.ipynb b/nbs/tutorials/01-basics.ipynb index b391eb8b..ed2aeac2 100644 --- a/nbs/tutorials/01-basics.ipynb +++ b/nbs/tutorials/01-basics.ipynb @@ -462,7 +462,7 @@ "\"unpaired mean difference\"). The confidence interval is reported as:\n", "[*confidenceIntervalWidth* *LowerBound*, *UpperBound*]\n", "\n", - "This confidence interval is generated through bootstrap resampling. See [`bootstraps`](/blog/posts/bootstraps/bootstraps.html) for more details.\n", + "This confidence interval is generated through bootstrap resampling. See [`bootstraps`](/blog/posts/bootstraps/bootstraps.ipynb) for more details.\n", "\n", "Since v0.3.0, DABEST will report the p-value of the [non-parametric two-sided approximate permutation t-test](https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests). This is also known as *the Monte Carlo permutation test*.\n", "\n", @@ -875,7 +875,7 @@ "source": [ "To generate a **Gardner-Altman estimation plot**, simply use the\n", "``.plot()`` method. You can learn more about its genesis and design\n", - "inspiration at [`robust-beautiful`](/blog/posts/robust-beautiful/robust-beautiful.html).\n", + "inspiration at [`robust-beautiful`](/blog/posts/robust-beautiful/robust-beautiful.ipynb).\n", "\n", "Each instance of an effect size has access to the ``.plot()`` method. This allows you to quickly create plots for different effect sizes with ease." ] diff --git a/nbs/tutorials/03-proportion_plot.ipynb b/nbs/tutorials/03-proportion_plot.ipynb index 05416336..e29a858c 100644 --- a/nbs/tutorials/03-proportion_plot.ipynb +++ b/nbs/tutorials/03-proportion_plot.ipynb @@ -1062,7 +1062,7 @@ "\n", "Repeated measures are also supported in the Sankey plots for paired proportions. By adjusting the ``is_paired`` parameter, two types of plot can be generated.\n", "\n", - "By default, the raw data plot (upper part) in both ``baseline`` and ``sequential`` repeated measures remains the same; the only difference is the lower part. For detailed information about repeated measures, please refer to :doc:`repeatedmeasures` ." + "By default, the raw data plot (upper part) in both ``baseline`` and ``sequential`` repeated measures remains the same; the only difference is the lower part. For detailed information about repeated measures, please refer to [repeated measures](02-repeated_measures.ipynb) ." ] }, { diff --git a/nbs/tutorials/04-mini_meta_delta.ipynb b/nbs/tutorials/04-mini_meta_delta.ipynb index 1e49d475..4621d180 100644 --- a/nbs/tutorials/04-mini_meta_delta.ipynb +++ b/nbs/tutorials/04-mini_meta_delta.ipynb @@ -300,7 +300,7 @@ "id": "1a3bcd5c", "metadata": {}, "source": [ - "When this ``Dabest`` object is invoked, it should indicate that effect sizes will be calculated for each group, along with the weighted delta. It is important to note once again that the weighted delta will only be calculated for mean differences" + "When this `dabest` object is invoked, it should indicate that effect sizes will be calculated for each group, along with the weighted delta. It is important to note once again that the weighted delta will only be calculated for mean differences" ] }, { diff --git a/settings.ini b/settings.ini index 0bd64c4d..08e05a64 100644 --- a/settings.ini +++ b/settings.ini @@ -35,7 +35,7 @@ description = Data Analysis and Visualization using Bootstrap-Coupled Estimation keywords = nbdev jupyter notebook python language = English status = 3 -user = ZHANGROU-99 +user = acclab requirements = fastcore pandas~=1.5.0 numpy~=1.23.5 matplotlib~=3.6.3 seaborn~=0.12.2 scipy~=1.9.3 datetime statsmodels lqrt dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1 diff --git a/setup.py b/setup.py index 6e22e810..e8fbfbd8 100644 --- a/setup.py +++ b/setup.py @@ -28,6 +28,8 @@ min_python = cfg['min_python'] lic = licenses.get(cfg['license'].lower(), (cfg['license'], None)) dev_requirements = (cfg.get('dev_requirements') or '').split() +project_urls = {} +if cfg.get('doc_host'): project_urls["Documentation"] = cfg['doc_host'] + cfg.get('doc_baseurl', '') setuptools.setup( name = cfg['lib_name'], @@ -51,6 +53,7 @@ 'console_scripts': cfg.get('console_scripts','').split(), 'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d'] }, + project_urls = project_urls, **setup_cfg) From 101a00b951ba49576b89d43c59e0808463d74e34 Mon Sep 17 00:00:00 2001 From: Jacob Luke Date: Mon, 22 Jan 2024 16:37:35 +0800 Subject: [PATCH 5/5] Edit on feature request and bug report --- .github/ISSUE_TEMPLATE/bug_report.md | 34 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 23 +++++++++++++++ CONTRIBUTING.md | 4 +-- README.md | 6 ++-- nbs/read_me.ipynb | 2 +- 5 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..0f4d0257 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,34 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. +2. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Your package version (please complete the following information):** + - dabest: [e.g. 2023.3.29] + - pandas: + - numpy: + - matplotlib: + - seaborn: + - scipy: + + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..b260e30f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,23 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Is a dataset available for testing out the functionality** +If yes, please leave a Google Drive link + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1350d01b..52b583c1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ ## Did you find a bug? - Ensure the bug was not already reported by searching in [Issues](https://github.com/ACCLAB/DABEST-python/issues). Check that the bug hasn't been addressed in a closed issue. -- If the bug isn't being addressed, open a new one. Be sure to include a title and clear description, and a [minimally reproducible code sample](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) demonstrating the expected behavior that is not occurring. +- If the bug isn't being addressed, open a new issue using the Bug report template. Be sure to fill in the necessary information, and a [minimally reproducible code sample](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) demonstrating the expected behavior that is not occurring. ## Did you write a patch that fixes a bug? @@ -16,7 +16,7 @@ ## Do you intend to add a new feature or change an existing one? -- Suggest your change by opening an issue, and adding an Enhancement tag. +- Suggest your change by opening an issue using the Feature request template. - If the maintainers and the community are in favour, create a fork and start writing code. diff --git a/README.md b/README.md index 71148289..8e257ee2 100644 --- a/README.md +++ b/README.md @@ -161,8 +161,10 @@ inclusive and productive space. ### A wish list for new features If you have any specific comments and ideas for new features that you -would like to share with us, please fill this -[form](https://docs.google.com/forms/d/1XnF-dkPFUYDPxH9WNXK0L2uU0uv2ZDy4paiCJuk_Kn0). +would like to share with us, please read the [Guidelines for +contributing](CONTRIBUTING.md), create a new issue using Feature request +template or create a new post in [our Google +Group](https://groups.google.com/g/estimationstats). ## Acknowledgements diff --git a/nbs/read_me.ipynb b/nbs/read_me.ipynb index 4a5005cb..65fa4252 100644 --- a/nbs/read_me.ipynb +++ b/nbs/read_me.ipynb @@ -173,7 +173,7 @@ "metadata": {}, "source": [ "### A wish list for new features\n", - "If you have any specific comments and ideas for new features that you would like to share with us, please fill this [form](https://docs.google.com/forms/d/1XnF-dkPFUYDPxH9WNXK0L2uU0uv2ZDy4paiCJuk_Kn0)." + "If you have any specific comments and ideas for new features that you would like to share with us, please read the [Guidelines for contributing](CONTRIBUTING.md), create a new issue using Feature request template or create a new post in [our Google Group](https://groups.google.com/g/estimationstats)." ] }, {