From a52023837646a7d0fbe2b81d3626bc3c3b616100 Mon Sep 17 00:00:00 2001 From: martaverfer Date: Wed, 12 Mar 2025 16:35:10 +0100 Subject: [PATCH] Two hyp test Lab --- your-code/main.ipynb | 691 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 617 insertions(+), 74 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..e9406a5 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "# Libraries\n", + "import pandas as pd # manipulate dataframes\n", + "import numpy as np # numerical python\n", + "import matplotlib.pyplot as plt # viz\n", + "import scipy.stats as sts\n", + "from scipy.stats import chi2_contingency" ] }, { @@ -35,13 +40,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Run this code:\n", - "\n", - "pokemon = pd.read_csv('../pokemon.csv')" + "data = pd.read_csv('../pokemon.csv')\n", + "pokemon = data.copy()" ] }, { @@ -53,12 +58,154 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon.head()" ] }, { @@ -70,12 +217,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Legendary\n", + "False 735\n", + "True 65\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon[\"Legendary\"].value_counts()\n" ] }, { @@ -87,12 +248,70 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
Legendary
False417.21106.76
True637.3860.94
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "Legendary \n", + "False 417.21 106.76\n", + "True 637.38 60.94" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "points_legend_df = pokemon.groupby('Legendary')['Total'].agg(['mean', 'std']).round(2)\n", + "points_legend_df\n" ] }, { @@ -104,14 +323,65 @@ "In the cell below, use the `ttest_ind` function in `scipy.stats` to compare the the total points for legendary and non-legendary Pokemon. Since we do not have any information about the population, assume the variances are not equal." ] }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# 0\n", + "Name 0\n", + "Type 1 0\n", + "Type 2 386\n", + "Total 0\n", + "HP 0\n", + "Attack 0\n", + "Defense 0\n", + "Sp. Atk 0\n", + "Sp. Def 0\n", + "Speed 0\n", + "Generation 0\n", + "Legendary 0\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon.isnull().sum()" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 25.83\n", + "P-Value: 0.00000000000000000000000000000000000000000000009358\n", + "\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "# Extract total points for Legendary and non legendary Pokemon\n", + "legendary = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n", + "non_legendary = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n", + "\n", + "# Perform two-sample t-test for independent samples\n", + "t_stat, p_value = sts.ttest_ind(legendary, non_legendary, equal_var=False) # equal_var False as we do not have any information about the population\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.50f}\")\n", + "print()" ] }, { @@ -123,12 +393,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points for legendary and non-legendary Pokemon are significantly different.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "# We set out significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean total points for legendary and non-legendary Pokemon are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points for legendary and non-legendary Pokemon are significantly different.\")" ] }, { @@ -140,12 +425,42 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Type 1\n", + "Water 112\n", + "Normal 98\n", + "Grass 70\n", + "Bug 69\n", + "Psychic 57\n", + "Fire 52\n", + "Rock 44\n", + "Electric 44\n", + "Ground 32\n", + "Ghost 32\n", + "Dragon 32\n", + "Dark 31\n", + "Poison 28\n", + "Fighting 27\n", + "Steel 27\n", + "Ice 24\n", + "Fairy 17\n", + "Flying 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon[\"Type 1\"].value_counts()" ] }, { @@ -157,12 +472,74 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
Type1_water
False435.86121.09
True430.46113.19
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "Type1_water \n", + "False 435.86 121.09\n", + "True 430.46 113.19" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "# Create a new column with True values when it is a Type 1 water Pokemon, else false\n", + "pokemon['Type1_water'] = pokemon['Type 1'] == 'Water'\n", + "\n", + "# Compare the mean and std of total points for water Pokemon to all other Pokemon\n", + "points_water_df = pokemon.groupby('Type1_water')['Total'].agg(['mean', 'std']).round(2)\n", + "points_water_df" ] }, { @@ -176,10 +553,27 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -0.46\n", + "P-Value: 0.64\n", + "\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "# Extract total points for Legendary and non legendary Pokemon\n", + "water = pokemon[pokemon[\"Type1_water\"] == True][\"Total\"]\n", + "non_water = pokemon[pokemon[\"Type1_water\"] == False][\"Total\"]\n", + "\n", + "# Perform two-sample t-test for independent samples\n", + "t_stat, p_value = sts.ttest_ind(water, non_water, equal_var=False) # equal_var False as we do not have any information about the population\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.2f}\")" ] }, { @@ -191,12 +585,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fail to Reject the Null Hypothesis: The mean total points for water and non-water type Pokemon are not significantly different.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "# We set out significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean total points for water and non-water type Pokemon are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points for water and non-water type Pokemon are significantly different.\")" ] }, { @@ -210,12 +619,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.000017\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "# Perform paired t-test\n", + "t_stat, p_value = sts.ttest_rel(pokemon[\"Attack\"], pokemon[\"Defense\"])\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")" ] }, { @@ -227,12 +648,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject the Null Hypothesis: There is a significant difference between each Pokemon's defense and attack scores.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between each Pokemon's defense and attack scores.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between each Pokemon's defense and attack scores.\")" ] }, { @@ -244,12 +680,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 0.85\n", + "P-Value: 0.393369\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "# Perform paired t-test\n", + "t_stat, p_value = sts.ttest_rel(pokemon[\"Sp. Atk\"], pokemon[\"Sp. Def\"])\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")" ] }, { @@ -266,7 +714,14 @@ "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between each Pokemon's special defense and special attack scores.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between each Pokemon's special defense and special attack scores.\")" ] }, { @@ -282,11 +737,21 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -4.33\n", + "P-Value: 0.000017\n" + ] + } + ], "source": [ "# Your code here:\n", - " \n", - " " + "t_stat, p_value = sts.ttest_1samp(pokemon[\"Defense\"]-pokemon[\"Attack\"], popmean=0) # we want to test whether the mean of the differences is zero\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\") # We can confirm that the mean of the difference is 0" ] }, { @@ -302,12 +767,70 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Type1_waterFalseTrue
Legendary
False627108
True614
\n", + "
" + ], + "text/plain": [ + "Type1_water False True \n", + "Legendary \n", + "False 627 108\n", + "True 61 4" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "legend_type_crosstab = pd.crosstab(pokemon['Legendary'], pokemon['Type1_water'])\n", + "legend_type_crosstab" ] }, { @@ -319,12 +842,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.08625467249550949" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "chi2_stats, chi2_pvalue, _, _ = chi2_contingency(legend_type_crosstab)\n", + "chi2_pvalue" ] }, { @@ -336,25 +871,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fail to Reject the Null Hypothesis: No significant difference between a Pokemon being legenadary or not and a Pokemon being Type 1 water or not.\n" + ] + } + ], "source": [ "# Your answer here:\n", - "\n" + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if chi2_pvalue > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between a Pokemon being legenadary or not and a Pokemon being Type 1 water or not.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between a Pokemon being legenadary or not and a Pokemon being Type 1 water or not.\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -368,7 +911,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.7" } }, "nbformat": 4,