diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..46b0ccf 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# import numpy and pandas\n", - "\n" + "# Libraries\n", + "import pandas as pd # manipulate dataframes\n", + "import numpy as np # numerical python\n", + "import math # numerical python\n", + "import matplotlib.pyplot as plt # viz\n", + "\n", + "# New libraries\n", + "import scipy.stats as stats \n", + "import statsmodels.api as sm\n", + "import statsmodels.formula.api as smf" ] }, { @@ -35,13 +43,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# Run this code:\n", - "\n", - "pokemon = pd.read_csv('../pokemon.csv')" + "pokemon = pd.read_csv('../pokemon.csv')\n", + "pokemon_df = pokemon.copy()" ] }, { @@ -53,12 +60,153 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon_df.head(5)" ] }, { @@ -70,12 +218,23 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Legendary Name\n", + "0 False 735\n", + "1 True 65\n" + ] + } + ], + "source": [ + "pokemon_df_legendary = pokemon_df.groupby('Legendary')['Name'].count().reset_index() \n", + "\n", + "print(round(pokemon_df_legendary,2))" ] }, { @@ -87,12 +246,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Legendary Mean\n", + "0 False 417.21\n", + "1 True 637.38\n", + " Legendary Std\n", + "0 False 106.76\n", + "1 True 60.94\n" + ] + } + ], + "source": [ + "pokemon_df_legendary_mean = pokemon_df.groupby('Legendary')['Total'].mean().reset_index(name = \"Mean\")\n", + "pokemon_df_legendary_std = pokemon_df.groupby('Legendary')['Total'].std().reset_index(name = \"Std\")\n", + "print(round(pokemon_df_legendary_mean,2))\n", + "print(round(pokemon_df_legendary_std,2))" ] }, { @@ -106,12 +280,29 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 25.83\n", + "P-Value: 0.0000\n", + "\n" + ] + } + ], + "source": [ + "# Extract Total for legendary and non legendary pokemons\n", + "legendary_pokemon = pokemon_df[pokemon_df[\"Legendary\"] == True][\"Total\"].dropna()\n", + "nonlegendary_pokemon = pokemon_df[pokemon_df[\"Legendary\"] == False][\"Total\"].dropna()\n", + "\n", + "# Perform two-sample t-test for independent samples\n", + "t_stat, p_value = stats.ttest_ind(legendary_pokemon, nonlegendary_pokemon, equal_var=False) # equal_var True for Welch's test (more robus, relies less on variance)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.4f}\")\n", + "print()" ] }, { @@ -123,12 +314,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your conclusions here:\n", - "\n" + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean in total points are different for legendary and non legendary pokemon.\n" + ] + } + ], + "source": [ + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean of total points for legendary and non legendary pokemons are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean in total points are different for legendary and non legendary pokemon.\")" ] }, { @@ -140,12 +345,59 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Type 1 Mean\n", + "0 Bug 378.93\n", + "1 Dark 445.74\n", + "2 Dragon 550.53\n", + "3 Electric 443.41\n", + "4 Fairy 413.18\n", + "5 Fighting 416.44\n", + "6 Fire 458.08\n", + "7 Flying 485.00\n", + "8 Ghost 439.56\n", + "9 Grass 421.14\n", + "10 Ground 437.50\n", + "11 Ice 433.46\n", + "12 Normal 401.68\n", + "13 Poison 399.14\n", + "14 Psychic 475.95\n", + "15 Rock 453.75\n", + "16 Steel 487.70\n", + "17 Water 430.46\n", + " Type 1 Std\n", + "0 Bug 117.88\n", + "1 Dark 109.13\n", + "2 Dragon 146.27\n", + "3 Electric 105.72\n", + "4 Fairy 123.78\n", + "5 Fighting 102.46\n", + "6 Fire 109.76\n", + "7 Flying 161.40\n", + "8 Ghost 110.07\n", + "9 Grass 106.65\n", + "10 Ground 123.91\n", + "11 Ice 108.28\n", + "12 Normal 115.73\n", + "13 Poison 92.36\n", + "14 Psychic 139.03\n", + "15 Rock 108.06\n", + "16 Steel 115.42\n", + "17 Water 113.19\n" + ] + } + ], + "source": [ + "pokemon_df_type1_mean = pokemon_df.groupby('Type 1')['Total'].mean().reset_index(name = \"Mean\")\n", + "pokemon_df_type1_std = pokemon_df.groupby('Type 1')['Total'].std().reset_index(name = \"Std\")\n", + "print(round(pokemon_df_type1_mean,2))\n", + "print(round(pokemon_df_type1_std,2))" ] }, { @@ -157,12 +409,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# Extract Total for water and non water pokemons\n", + "water_pokemon = pokemon_df[pokemon_df[\"Type 1\"] == \"Water\"][\"Total\"].dropna()\n", + "nonwater_pokemon = pokemon_df[pokemon_df[\"Type 1\"] != \"Water\"][\"Total\"].dropna()" ] }, { @@ -174,12 +427,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -0.44\n", + "P-Value: 0.6587\n", + "\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "# Perform two-sample t-test for independent samples\n", + "wt_stat, wp_value = stats.ttest_ind(water_pokemon, nonwater_pokemon, equal_var=True) # equal_var True for Welch's test (more robus, relies less on variance)\n", + "print(f\"Test Statistic (t): {wt_stat:.2f}\")\n", + "print(f\"P-Value: {wp_value:.4f}\")\n", + "print()" ] }, { @@ -191,12 +457,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your conclusions here:\n", - "\n" + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fail to Reject the Null Hypothesis: The mean of total points for waater and non water pokemons are not significantly different.\n" + ] + } + ], + "source": [ + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if wp_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean of total points for waater and non water pokemons are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean in total points are different for water and non water pokemon.\")" ] }, { @@ -210,12 +490,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# Attack and defense scores\n", + "pokemon_attack = pokemon_df[\"Attack\"]\n", + "pokemon_defense = pokemon_df[\"Defense\"]" ] }, { @@ -227,12 +508,35 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your conclusions here:\n", - "\n" + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.00002\n", + "\n", + "Reject the Null Hypothesis: There is a significant difference between defense and attack scores.\n" + ] + } + ], + "source": [ + "# Perform paired t-test\n", + "pkp_stat, pkp_value = stats.ttest_rel(pokemon_attack, pokemon_defense)\n", + "print(f\"Test Statistic (t): {pkp_stat:.2f}\")\n", + "print(f\"P-Value: {pkp_value:.5f}\")\n", + "print()\n", + "\n", + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if pkp_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between defense and attack scores.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between defense and attack scores.\")" ] }, { @@ -244,12 +548,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.00002\n" + ] + } + ], + "source": [ + "# Special attack and special defense scores\n", + "pokemon_sp_attack = pokemon_df[\"Sp. Atk\"]\n", + "pokemon_sp_defense = pokemon_df[\"Sp. Def\"]\n", + "\n", + "# Perform paired t-test\n", + "spkp_stat, spkp_value = stats.ttest_rel(pokemon_attack, pokemon_defense)\n", + "print(f\"Test Statistic (t): {spkp_stat:.2f}\")\n", + "print(f\"P-Value: {spkp_value:.5f}\")" ] }, { @@ -261,12 +580,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your conclusions here:\n", - "\n" + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject the Null Hypothesis: There is a significant difference between special defense and special attack scores.\n" + ] + } + ], + "source": [ + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision-Making\n", + "if spkp_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between special defense and special attack scores.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between special defense and special attack scores.\")" ] }, { @@ -280,13 +613,35 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - " \n", - " " + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.0000\n", + "\n", + "Reject the Null Hypothesis: There is evidence to say the mean of the difference is not cero.\n" + ] + } + ], + "source": [ + "pokemon_df['pokemon_dif'] = pokemon_df[\"Attack\"] - pokemon_df[\"Defense\"]\n", + "pokemon_diff = pokemon_df[\"pokemon_dif\"]\n", + "pokemon_dif_mu = 0\n", + "\n", + "# One-Tailed Test \n", + "dif_t_stat, dif_p_value = stats.ttest_1samp(pokemon_diff, pokemon_dif_mu)\n", + "print(f\"Test Statistic (t): {dif_t_stat:.2f}\")\n", + "print(f\"P-Value: {dif_p_value:.4f}\")\n", + "print()\n", + "\n", + "if dif_p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: Not enough evidence to say the mean of the difference is cero.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is evidence to say the mean of the difference is not cero.\") " ] }, { @@ -302,12 +657,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Type1_water False True \n", + "Legendary \n", + "False 627 108\n", + "True 61 4\n" + ] + } + ], + "source": [ + "pokemon_df['Type1_water'] = pokemon_df['Type 1'] == 'Water'\n", + "crosstab_result = pd.crosstab(pokemon_df['Legendary'], pokemon_df['Type1_water'])\n", + "\n", + "print(crosstab_result)" ] }, { @@ -319,12 +687,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Chi2ContingencyResult(statistic=2.9429200762850503, pvalue=0.08625467249550985, dof=1, expected_freq=array([[632.1, 102.9],\n", + " [ 55.9, 9.1]]))" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "stats.chi2_contingency(crosstab_result)\n" ] }, { @@ -336,20 +715,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your answer here:\n", - "\n" + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fail to Reject the Null Hypothesis\n" + ] + } + ], + "source": [ + "pvalue_cross = 0.08625467249550985\n", + "\n", + "if pvalue_cross > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis\") " ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -368,7 +752,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.4" } }, "nbformat": 4,