From 81fac06cd5219dbddf347eb0957bee64e478cac4 Mon Sep 17 00:00:00 2001 From: Nui Date: Wed, 12 Mar 2025 21:49:47 +0100 Subject: [PATCH] lab - two sample hypothesis test - complete, no bonus --- your-code/main.ipynb | 770 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 727 insertions(+), 43 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..6bc6b03 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Libraries\n", + "import math \n", + "import matplotlib.pyplot as plt # viz\n", + "import scipy.stats as sts # stats\n", + "import statsmodels.api as sm\n", + "import statsmodels.formula.api as smf\n" ] }, { @@ -35,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -53,12 +62,155 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "pokemon.head()" ] }, { @@ -70,12 +222,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Legendary\n", + "False 735\n", + "True 65\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon['Legendary'].value_counts()\n" ] }, { @@ -87,11 +253,70 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
Legendary
False417.213605106.760417
True637.38461560.937389
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "Legendary \n", + "False 417.213605 106.760417\n", + "True 637.384615 60.937389" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", + "pokemon.groupby('Legendary')['Total'].agg(['mean', 'std'])\n", + "\n", "\n" ] }, @@ -106,12 +331,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 25.83\n", + "P-Value: 0.00000000000000000000000000000000000000000000009358\n", + "\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# filter legendary and non-legendary poekmons\n", + "legend = pokemon[pokemon[\"Legendary\"] == True]\n", + "non_legend = pokemon[pokemon[\"Legendary\"] == False]\n", + "\n", + "# perform two-sample t-test for independent samples\n", + "t_stat, p_value = sts.ttest_ind(legend['Total'], non_legend['Total'], equal_var=False)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.50f}\")\n", + "print()" ] }, { @@ -123,12 +367,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "\n", + "# a very low p-value means that we can reject H0 (no difference between groups)\n", + "# magnitude of t-statistic shows that the difference between the 2 groups is large relative to variability within each group \n" ] }, { @@ -140,11 +386,386 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
Type 1
Bug696952696969696969696969
Dark313121313131313131313131
Dragon323221323232323232323232
Electric444417444444444444444444
Fairy17172171717171717171717
Fighting27277272727272727272727
Fire525224525252525252525252
Flying442444444444
Ghost323222323232323232323232
Grass707037707070707070707070
Ground323219323232323232323232
Ice242411242424242424242424
Normal989837989898989898989898
Poison282813282828282828282828
Psychic575719575757575757575757
Rock444435444444444444444444
Steel272722272727272727272727
Water11211253112112112112112112112112112
\n", + "
" + ], + "text/plain": [ + " # Name Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n", + "Type 1 \n", + "Bug 69 69 52 69 69 69 69 69 69 \n", + "Dark 31 31 21 31 31 31 31 31 31 \n", + "Dragon 32 32 21 32 32 32 32 32 32 \n", + "Electric 44 44 17 44 44 44 44 44 44 \n", + "Fairy 17 17 2 17 17 17 17 17 17 \n", + "Fighting 27 27 7 27 27 27 27 27 27 \n", + "Fire 52 52 24 52 52 52 52 52 52 \n", + "Flying 4 4 2 4 4 4 4 4 4 \n", + "Ghost 32 32 22 32 32 32 32 32 32 \n", + "Grass 70 70 37 70 70 70 70 70 70 \n", + "Ground 32 32 19 32 32 32 32 32 32 \n", + "Ice 24 24 11 24 24 24 24 24 24 \n", + "Normal 98 98 37 98 98 98 98 98 98 \n", + "Poison 28 28 13 28 28 28 28 28 28 \n", + "Psychic 57 57 19 57 57 57 57 57 57 \n", + "Rock 44 44 35 44 44 44 44 44 44 \n", + "Steel 27 27 22 27 27 27 27 27 27 \n", + "Water 112 112 53 112 112 112 112 112 112 \n", + "\n", + " Speed Generation Legendary \n", + "Type 1 \n", + "Bug 69 69 69 \n", + "Dark 31 31 31 \n", + "Dragon 32 32 32 \n", + "Electric 44 44 44 \n", + "Fairy 17 17 17 \n", + "Fighting 27 27 27 \n", + "Fire 52 52 52 \n", + "Flying 4 4 4 \n", + "Ghost 32 32 32 \n", + "Grass 70 70 70 \n", + "Ground 32 32 32 \n", + "Ice 24 24 24 \n", + "Normal 98 98 98 \n", + "Poison 28 28 28 \n", + "Psychic 57 57 57 \n", + "Rock 44 44 44 \n", + "Steel 27 27 27 \n", + "Water 112 112 112 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", + "pokemon.groupby('Type 1').count()\n", "\n" ] }, @@ -157,12 +778,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# Your code here:\n", - "\n" + "df_mean_std = pokemon.groupby('Type 1')['Total'].agg(['mean', 'std'])\n", + "water = df_mean_std.loc[['Water']]\n", + "\n", + "non_water = df_mean_std.drop('Water', axis=0)\n" ] }, { @@ -174,12 +798,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -0.31\n", + "P-Value: 0.76\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# perform two-sample t-test for independent samples\n", + "t_stat, p_value = sts.ttest_ind(water['mean'], non_water['mean'], equal_var=True)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.2f}\")" ] }, { @@ -191,12 +828,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# negative magnitude of t-statistic suggests that water pokemons total mean is lower than all non-water pokemons total points mean\n", + "# p-value > 0.05 suggesting that null hypothesis cannot be rejected\n" ] }, { @@ -210,12 +848,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.000017\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "from scipy.stats import ttest_rel\n", + "\n", + "\n", + "t_stat, p_value = sts.ttest_rel(pokemon['Attack'], pokemon['Defense'])\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")" ] }, { @@ -227,12 +879,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# positive t-test shows that mean of Attack is greater than mean of defense, and 4,33 magnitude suggests that the difference is large relative to the variability of the data\n", + "# p-value is very small, meaning that null hypothesis can be rejected\n" ] }, { @@ -244,12 +897,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 0.85\n", + "P-Value: 0.393369\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "t_stat, p_value = sts.ttest_rel(pokemon['Sp. Atk'], pokemon['Sp. Def'])\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")" ] }, { @@ -261,12 +926,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# positive value of t-test meaning the mean of special attack is greater than mean of special defense, 0.85 magnitude is quite small suggesting the difference is small relative to the variability \n", + "# p-value > 0.05 suggesting that null hypothesis cannot be rejected" ] }, { @@ -280,12 +946,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.000017\n" + ] + } + ], "source": [ "# Your code here:\n", - " \n", + "from scipy.stats import ttest_1samp\n", + "\n", + "pokemon['Atk_Def_Diff'] = pokemon['Attack'] - pokemon['Defense']\n", + "\n", + "t_stat, p_value = sts.ttest_1samp(pokemon['Atk_Def_Diff'], 0)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")\n", + "\n", + "# confirmed\n", + "\n", " " ] }, @@ -302,7 +986,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -319,7 +1003,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -336,7 +1020,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -354,7 +1038,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -368,7 +1052,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.4" } }, "nbformat": 4,