From 412d5725c50227c244ebe7d05a54d7c6d030e7e0 Mon Sep 17 00:00:00 2001 From: fmalacrida Date: Thu, 12 Dec 2024 17:09:32 +0100 Subject: [PATCH] Update main.ipynb --- your-code/main.ipynb | 572 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 496 insertions(+), 76 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..242cb1b 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ - "# import numpy and pandas\n", - "\n" + "import pandas as pd\n", + "import numpy as np\n", + "from scipy.stats import ttest_ind\n", + "from scipy.stats import ttest_rel\n", + "from scipy.stats import ttest_1samp\n", + "from scipy.stats import chi2_contingency\n" ] }, { @@ -35,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -53,12 +57,154 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon.head()\n" ] }, { @@ -70,12 +216,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Legendary\n", + "False 735\n", + "True 65\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "legendary_counts = pokemon['Legendary'].value_counts()\n", + "legendary_counts" ] }, { @@ -87,12 +248,70 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
Legendary
False417.213605106.760417
True637.38461560.937389
\n", + "
" + ], + "text/plain": [ + " mean std\n", + "Legendary \n", + "False 417.213605 106.760417\n", + "True 637.384615 60.937389" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "stats_total = pokemon.groupby('Legendary')['Total'].agg(['mean', 'std'])\n", + "stats_total\n" ] }, { @@ -106,12 +325,28 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 25.8335743895517\n", + "P-value: 9.357954335957446e-47\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "legendary_total = pokemon[pokemon['Legendary'] == True]['Total']\n", + "non_legendary_total = pokemon[pokemon['Legendary'] == False]['Total']\n", + "\n", + "t_stat, p_value = ttest_ind(legendary_total, non_legendary_total, equal_var=False)\n", + "\n", + "print(f\"T-statistic: {t_stat}\")\n", + "print(f\"P-value: {p_value}\")" ] }, { @@ -122,13 +357,13 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "The high T-test means that there is a big difference between the means of both groups.\n", + "The P-value shows numbers smaller then the significance level. \n", + "Conclusion: significant difference of points between both groups, sugesting that Legendary Pokemons are powerfull then non-legendary pokemons." ] }, { @@ -140,12 +375,44 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Type 1\n", + "Water 112\n", + "Normal 98\n", + "Grass 70\n", + "Bug 69\n", + "Psychic 57\n", + "Fire 52\n", + "Electric 44\n", + "Rock 44\n", + "Dragon 32\n", + "Ground 32\n", + "Ghost 32\n", + "Dark 31\n", + "Poison 28\n", + "Steel 27\n", + "Fighting 27\n", + "Ice 24\n", + "Fairy 17\n", + "Flying 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "type1_counts = pokemon['Type 1'].value_counts()\n", + "\n", + "type1_counts" ] }, { @@ -157,12 +424,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Water Pokémon: mean 430.455357\n", + "std 113.188266\n", + "Name: Total, dtype: float64\n", + "Other Pokémon: mean 435.859012\n", + "std 121.091682\n", + "Name: Total, dtype: float64\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "water_pokemon = pokemon[pokemon['Type 1'] == 'Water']['Total']\n", + "other_pokemon = pokemon[pokemon['Type 1'] != 'Water']['Total']\n", + "\n", + "water_stats = water_pokemon.agg(['mean', 'std'])\n", + "other_stats = other_pokemon.agg(['mean', 'std'])\n", + "\n", + "print(f\"Water Pokémon: {water_stats}\")\n", + "\n", + "print(f\"Other Pokémon: {other_stats}\")" ] }, { @@ -174,12 +462,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: -0.4418547448849676\n", + "P-value: 0.6587140317488793\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "t_stat, p_value = ttest_ind(water_pokemon, other_pokemon, equal_var=True)\n", + "\n", + "print(f\"T-statistic: {t_stat}\")\n", + "print(f\"P-value: {p_value}\")" ] }, { @@ -190,13 +490,13 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "T-stats: is close to zero, indicating almost no difference between both.\n", + "P-value: larger than significance levels.\n", + "Conclusion: there is no significant difference between both groups. " ] }, { @@ -210,12 +510,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 4.325566393330478\n", + "P-value: 1.7140303479358558e-05\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "t_stat, p_value = ttest_rel(pokemon['Attack'], pokemon['Defense'])\n", + "\n", + "print(f\"T-statistic: {t_stat}\")\n", + "print(f\"P-value: {p_value}\")" ] }, { @@ -226,13 +538,13 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "T-stats: positive indicator, so there is difference between attack and defence of Pokemons.\n", + "P-value: is smaller then significance values.\n", + "Conclusion: There is difference between attack and defence of Pokemons. Attack scores are not equal to defence scores." ] }, { @@ -244,12 +556,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: 0.853986188453353\n", + "P-value: 0.3933685997548122\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "t_stat, p_value = ttest_rel(pokemon['Sp. Atk'], pokemon['Sp. Def'])\n", + "\n", + "print(f\"T-statistic: {t_stat}\")\n", + "print(f\"P-value: {p_value}\")" ] }, { @@ -260,13 +584,13 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "T-stats: positive indicator, but small difference between special attack and special defence of Pokemons.\n", + "P-value: is larger then significance values.\n", + "Conclusion: There is no significant difference between special attack and special defence of Pokemons." ] }, { @@ -280,13 +604,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: -4.325566393330478\n", + "P-value: 1.7140303479358558e-05\n" + ] + } + ], "source": [ "# Your code here:\n", - " \n", - " " + "difference = pokemon['Defense'] - pokemon['Attack']\n", + "\n", + "t_stat, p_value = ttest_1samp(difference, 0)\n", + "\n", + "print(f\"T-statistic: {t_stat}\")\n", + "print(f\"P-value: {p_value}\") " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Conclusion: there is significant difference between defense and attack scores, with defense scores being lower than attack scores." ] }, { @@ -302,12 +646,71 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Type 1FalseTrue
Legendary
False627108
True614
\n", + "
" + ], + "text/plain": [ + "Type 1 False True \n", + "Legendary \n", + "False 627 108\n", + "True 61 4" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "contingency_table = pd.crosstab(pokemon['Legendary'], pokemon['Type 1'] == 'Water')\n", + "\n", + "contingency_table" ] }, { @@ -319,12 +722,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-Squared Statistic: 2.9429200762850503\n", + "P-value: 0.08625467249550985\n", + "Degrees of Freedom: 1\n", + "Expected Frequencies:\n", + "[[632.1 102.9]\n", + " [ 55.9 9.1]]\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table)\n", + "\n", + "print(f\"Chi-Squared Statistic: {chi2_stat}\")\n", + "print(f\"P-value: {p_value}\")\n", + "print(f\"Degrees of Freedom: {dof}\")\n", + "print(\"Expected Frequencies:\")\n", + "print(expected)" ] }, { @@ -335,13 +757,11 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ "# Your answer here:\n", - "\n" + "The Chi value and P-value sugest that there is no significant difference between both groups of Pokemon. So we should not reject the null hypothesis." ] }, { @@ -354,7 +774,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -368,9 +788,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.7" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }