From ab89d3ef5fca24fc347ee243ef52d3feb2655fe5 Mon Sep 17 00:00:00 2001 From: EmiliaHorton12 Date: Fri, 14 Mar 2025 18:21:30 +0100 Subject: [PATCH] Update main.ipynb --- your-code/main.ipynb | 487 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 428 insertions(+), 59 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..c5f5370 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 193, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import math \n", + "import pandas as pd # manipulate dataframes\n", + "import numpy as np # numerical python\n", + "import matplotlib.pyplot as plt # viz\n", + "\n", + "# New libraries\n", + "import scipy.stats as st # stats\n", + "import statsmodels.api as sm\n", + "import statsmodels.formula.api as smf\n", + "from scipy.stats import chi2_contingency" ] }, { @@ -35,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 143, "metadata": {}, "outputs": [], "source": [ @@ -53,12 +62,154 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon.head()" ] }, { @@ -70,12 +221,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Legendary\n", + "False 735\n", + "True 65\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon['Legendary'].value_counts()" ] }, { @@ -87,12 +252,69 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
Legendary
False417.213605106.760417
True637.38461560.937389
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "Legendary \n", + "False 417.213605 106.760417\n", + "True 637.384615 60.937389" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon.groupby('Legendary')['Total'].agg(['mean', 'std'])" ] }, { @@ -106,12 +328,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 25.8336\n", + "P-value: 0.0000\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "legendary = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n", + "non_legendary = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n", + "t_stat, p_value = st.ttest_ind(legendary, non_legendary, equal_var=False)\n", + "print(f\"T-statistic: {t_stat:.4f}\")\n", + "print(f\"P-value: {p_value:.4f}\")" ] }, { @@ -128,7 +363,9 @@ "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# H0 = there is a significant difference between Legendary and non-legendary\n", + "# H1 = there is no significant difference between Legendary and non-legendary\n", + "# The sresult is statisticallt significant, fail to reject H0." ] }, { @@ -140,12 +377,42 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 157, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Type 1\n", + "Water 112\n", + "Normal 98\n", + "Grass 70\n", + "Bug 69\n", + "Psychic 57\n", + "Fire 52\n", + "Electric 44\n", + "Rock 44\n", + "Ghost 32\n", + "Ground 32\n", + "Dragon 32\n", + "Dark 31\n", + "Poison 28\n", + "Fighting 27\n", + "Steel 27\n", + "Ice 24\n", + "Fairy 17\n", + "Flying 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon.value_counts('Type 1')" ] }, { @@ -157,12 +424,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mean 430.455357\n", + "std 113.188266\n", + "Name: Total, dtype: float64" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "water_pokemon = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"].agg(['mean', 'std'])\n", + "other_pokemon = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"].agg(['mean', 'std'])\n", + "water_pokemon" ] }, { @@ -174,12 +456,23 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: -0.0298\n", + "P-value: 0.9790\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "t_stat, p_value = st.ttest_ind(water_pokemon, other_pokemon, equal_var=True)\n", + "print(f\"T-statistic: {t_stat:.4f}\")\n", + "print(f\"P-value: {p_value:.4f}\")" ] }, { @@ -196,7 +489,9 @@ "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# H0 = there is no difference between water pokemons and the others. \n", + "# H1 = there is a difference between water pokemons and the others.\n", + "# result is not statisticallt significant, fail to reject H0." ] }, { @@ -210,12 +505,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: -4.3256\n", + "P-value: 0.0000\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "defense = pokemon['Defense']\n", + "attack = pokemon['Attack']\n", + "t_stat, p_value = st.ttest_rel(defense, attack)\n", + "print(f\"T-statistic: {t_stat:.4f}\")\n", + "print(f\"P-value: {p_value:.4f}\")" ] }, { @@ -232,24 +540,39 @@ "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# H0 = there is no difference between pokemons\n", + "# H1 = there is a difference between pokemons\n", + "# reject H0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We are also curious about whether therer is a significant difference between the mean of special defense and the mean of special attack. Perform the hypothesis test in the cell below. " + "<-- We are also curious about whether therer is a significant difference between the mean of special defense and the mean of special attack. Perform the hypothesis test in the cell below. -->" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: -0.8540\n", + "P-value: 0.3934\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "sp_def = pokemon['Sp. Def']\n", + "sp_atk = pokemon['Sp. Atk']\n", + "t_stat, p_value = st.ttest_rel(sp_def, sp_atk)\n", + "print(f\"T-statistic: {t_stat:.4f}\")\n", + "print(f\"P-value: {p_value:.4f}\")" ] }, { @@ -266,7 +589,9 @@ "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# H0 = there is no difference between pokemons\n", + "# H1 = there is a difference between pokemons\n", + "# fail to reject H0" ] }, { @@ -280,13 +605,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 187, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "One-sample t-test on the difference:\n", + "T-statistic: -4.3256, P-value: 0.0000\n", + "\n", + "Paired t-test (standard approach):\n", + "T-statistic: -4.3256, P-value: 0.0000\n" + ] + } + ], "source": [ "# Your code here:\n", - " \n", - " " + "difference = pokemon['Defense'] - pokemon['Attack']\n", + "t_stat_1samp, p_value_1samp = st.ttest_1samp(difference, popmean=0)\n", + "\n", + "# Step 3: Perform the standard paired t-test (should match results)\n", + "t_stat_paired, p_value_paired = st.ttest_rel(pokemon['Defense'], pokemon['Attack'])\n", + "print(\"One-sample t-test on the difference:\")\n", + "print(f\"T-statistic: {t_stat_1samp:.4f}, P-value: {p_value_1samp:.4f}\")\n", + "\n", + "print(\"\\nPaired t-test (standard approach):\")\n", + "print(f\"T-statistic: {t_stat_paired:.4f}, P-value: {p_value_paired:.4f}\")" ] }, { @@ -302,12 +647,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 189, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Legendary False True \n", + "Type 1 \n", + "False 627 61\n", + "True 108 4\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "tab = pd.crosstab(pokemon['Type 1'] == 'Water', pokemon['Legendary'])\n", + "print(tab)" ] }, { @@ -319,12 +676,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 197, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.08625467249550949" + ] + }, + "execution_count": 197, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "_, chi2_pvalue, _, _ = chi2_contingency(tab)\n", + "chi2_pvalue" ] }, { @@ -341,7 +710,7 @@ "outputs": [], "source": [ "# Your answer here:\n", - "\n" + "# The result is not statistically significant (p = 0.0863). You cannot reject H0.\n" ] }, { @@ -372,5 +741,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }