diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index cdc1acb..46b0ccf 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -14,12 +14,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# import numpy and pandas\n",
- "\n"
+ "# Libraries\n",
+ "import pandas as pd # manipulate dataframes\n",
+ "import numpy as np # numerical python\n",
+ "import math # numerical python\n",
+ "import matplotlib.pyplot as plt # viz\n",
+ "\n",
+ "# New libraries\n",
+ "import scipy.stats as stats \n",
+ "import statsmodels.api as sm\n",
+ "import statsmodels.formula.api as smf"
]
},
{
@@ -35,13 +43,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "# Run this code:\n",
- "\n",
- "pokemon = pd.read_csv('../pokemon.csv')"
+ "pokemon = pd.read_csv('../pokemon.csv')\n",
+ "pokemon_df = pokemon.copy()"
]
},
{
@@ -53,12 +60,153 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " VenusaurMega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 625 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 309 | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 \n",
+ "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
+ "4 4 Charmander Fire NaN 309 39 52 43 \n",
+ "\n",
+ " Sp. Atk Sp. Def Speed Generation Legendary \n",
+ "0 65 65 45 1 False \n",
+ "1 80 80 60 1 False \n",
+ "2 100 100 80 1 False \n",
+ "3 122 120 80 1 False \n",
+ "4 60 50 65 1 False "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pokemon_df.head(5)"
]
},
{
@@ -70,12 +218,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Legendary Name\n",
+ "0 False 735\n",
+ "1 True 65\n"
+ ]
+ }
+ ],
+ "source": [
+ "pokemon_df_legendary = pokemon_df.groupby('Legendary')['Name'].count().reset_index() \n",
+ "\n",
+ "print(round(pokemon_df_legendary,2))"
]
},
{
@@ -87,12 +246,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Legendary Mean\n",
+ "0 False 417.21\n",
+ "1 True 637.38\n",
+ " Legendary Std\n",
+ "0 False 106.76\n",
+ "1 True 60.94\n"
+ ]
+ }
+ ],
+ "source": [
+ "pokemon_df_legendary_mean = pokemon_df.groupby('Legendary')['Total'].mean().reset_index(name = \"Mean\")\n",
+ "pokemon_df_legendary_std = pokemon_df.groupby('Legendary')['Total'].std().reset_index(name = \"Std\")\n",
+ "print(round(pokemon_df_legendary_mean,2))\n",
+ "print(round(pokemon_df_legendary_std,2))"
]
},
{
@@ -106,12 +280,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 25.83\n",
+ "P-Value: 0.0000\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Extract Total for legendary and non legendary pokemons\n",
+ "legendary_pokemon = pokemon_df[pokemon_df[\"Legendary\"] == True][\"Total\"].dropna()\n",
+ "nonlegendary_pokemon = pokemon_df[pokemon_df[\"Legendary\"] == False][\"Total\"].dropna()\n",
+ "\n",
+ "# Perform two-sample t-test for independent samples\n",
+ "t_stat, p_value = stats.ttest_ind(legendary_pokemon, nonlegendary_pokemon, equal_var=False) # equal_var True for Welch's test (more robus, relies less on variance)\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.4f}\")\n",
+ "print()"
]
},
{
@@ -123,12 +314,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your conclusions here:\n",
- "\n"
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean in total points are different for legendary and non legendary pokemon.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The mean of total points for legendary and non legendary pokemons are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean in total points are different for legendary and non legendary pokemon.\")"
]
},
{
@@ -140,12 +345,59 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Type 1 Mean\n",
+ "0 Bug 378.93\n",
+ "1 Dark 445.74\n",
+ "2 Dragon 550.53\n",
+ "3 Electric 443.41\n",
+ "4 Fairy 413.18\n",
+ "5 Fighting 416.44\n",
+ "6 Fire 458.08\n",
+ "7 Flying 485.00\n",
+ "8 Ghost 439.56\n",
+ "9 Grass 421.14\n",
+ "10 Ground 437.50\n",
+ "11 Ice 433.46\n",
+ "12 Normal 401.68\n",
+ "13 Poison 399.14\n",
+ "14 Psychic 475.95\n",
+ "15 Rock 453.75\n",
+ "16 Steel 487.70\n",
+ "17 Water 430.46\n",
+ " Type 1 Std\n",
+ "0 Bug 117.88\n",
+ "1 Dark 109.13\n",
+ "2 Dragon 146.27\n",
+ "3 Electric 105.72\n",
+ "4 Fairy 123.78\n",
+ "5 Fighting 102.46\n",
+ "6 Fire 109.76\n",
+ "7 Flying 161.40\n",
+ "8 Ghost 110.07\n",
+ "9 Grass 106.65\n",
+ "10 Ground 123.91\n",
+ "11 Ice 108.28\n",
+ "12 Normal 115.73\n",
+ "13 Poison 92.36\n",
+ "14 Psychic 139.03\n",
+ "15 Rock 108.06\n",
+ "16 Steel 115.42\n",
+ "17 Water 113.19\n"
+ ]
+ }
+ ],
+ "source": [
+ "pokemon_df_type1_mean = pokemon_df.groupby('Type 1')['Total'].mean().reset_index(name = \"Mean\")\n",
+ "pokemon_df_type1_std = pokemon_df.groupby('Type 1')['Total'].std().reset_index(name = \"Std\")\n",
+ "print(round(pokemon_df_type1_mean,2))\n",
+ "print(round(pokemon_df_type1_std,2))"
]
},
{
@@ -157,12 +409,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# Extract Total for water and non water pokemons\n",
+ "water_pokemon = pokemon_df[pokemon_df[\"Type 1\"] == \"Water\"][\"Total\"].dropna()\n",
+ "nonwater_pokemon = pokemon_df[pokemon_df[\"Type 1\"] != \"Water\"][\"Total\"].dropna()"
]
},
{
@@ -174,12 +427,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): -0.44\n",
+ "P-Value: 0.6587\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "# Perform two-sample t-test for independent samples\n",
+ "wt_stat, wp_value = stats.ttest_ind(water_pokemon, nonwater_pokemon, equal_var=True) # equal_var True for Welch's test (more robus, relies less on variance)\n",
+ "print(f\"Test Statistic (t): {wt_stat:.2f}\")\n",
+ "print(f\"P-Value: {wp_value:.4f}\")\n",
+ "print()"
]
},
{
@@ -191,12 +457,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your conclusions here:\n",
- "\n"
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fail to Reject the Null Hypothesis: The mean of total points for waater and non water pokemons are not significantly different.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if wp_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The mean of total points for waater and non water pokemons are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean in total points are different for water and non water pokemon.\")"
]
},
{
@@ -210,12 +490,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# Attack and defense scores\n",
+ "pokemon_attack = pokemon_df[\"Attack\"]\n",
+ "pokemon_defense = pokemon_df[\"Defense\"]"
]
},
{
@@ -227,12 +508,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your conclusions here:\n",
- "\n"
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 4.33\n",
+ "P-Value: 0.00002\n",
+ "\n",
+ "Reject the Null Hypothesis: There is a significant difference between defense and attack scores.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Perform paired t-test\n",
+ "pkp_stat, pkp_value = stats.ttest_rel(pokemon_attack, pokemon_defense)\n",
+ "print(f\"Test Statistic (t): {pkp_stat:.2f}\")\n",
+ "print(f\"P-Value: {pkp_value:.5f}\")\n",
+ "print()\n",
+ "\n",
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if pkp_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: No significant difference between defense and attack scores.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is a significant difference between defense and attack scores.\")"
]
},
{
@@ -244,12 +548,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 4.33\n",
+ "P-Value: 0.00002\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Special attack and special defense scores\n",
+ "pokemon_sp_attack = pokemon_df[\"Sp. Atk\"]\n",
+ "pokemon_sp_defense = pokemon_df[\"Sp. Def\"]\n",
+ "\n",
+ "# Perform paired t-test\n",
+ "spkp_stat, spkp_value = stats.ttest_rel(pokemon_attack, pokemon_defense)\n",
+ "print(f\"Test Statistic (t): {spkp_stat:.2f}\")\n",
+ "print(f\"P-Value: {spkp_value:.5f}\")"
]
},
{
@@ -261,12 +580,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your conclusions here:\n",
- "\n"
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reject the Null Hypothesis: There is a significant difference between special defense and special attack scores.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if spkp_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: No significant difference between special defense and special attack scores.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is a significant difference between special defense and special attack scores.\")"
]
},
{
@@ -280,13 +613,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- " \n",
- " "
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 4.33\n",
+ "P-Value: 0.0000\n",
+ "\n",
+ "Reject the Null Hypothesis: There is evidence to say the mean of the difference is not cero.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pokemon_df['pokemon_dif'] = pokemon_df[\"Attack\"] - pokemon_df[\"Defense\"]\n",
+ "pokemon_diff = pokemon_df[\"pokemon_dif\"]\n",
+ "pokemon_dif_mu = 0\n",
+ "\n",
+ "# One-Tailed Test \n",
+ "dif_t_stat, dif_p_value = stats.ttest_1samp(pokemon_diff, pokemon_dif_mu)\n",
+ "print(f\"Test Statistic (t): {dif_t_stat:.2f}\")\n",
+ "print(f\"P-Value: {dif_p_value:.4f}\")\n",
+ "print()\n",
+ "\n",
+ "if dif_p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: Not enough evidence to say the mean of the difference is cero.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is evidence to say the mean of the difference is not cero.\") "
]
},
{
@@ -302,12 +657,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Type1_water False True \n",
+ "Legendary \n",
+ "False 627 108\n",
+ "True 61 4\n"
+ ]
+ }
+ ],
+ "source": [
+ "pokemon_df['Type1_water'] = pokemon_df['Type 1'] == 'Water'\n",
+ "crosstab_result = pd.crosstab(pokemon_df['Legendary'], pokemon_df['Type1_water'])\n",
+ "\n",
+ "print(crosstab_result)"
]
},
{
@@ -319,12 +687,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 42,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Chi2ContingencyResult(statistic=2.9429200762850503, pvalue=0.08625467249550985, dof=1, expected_freq=array([[632.1, 102.9],\n",
+ " [ 55.9, 9.1]]))"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "stats.chi2_contingency(crosstab_result)\n"
]
},
{
@@ -336,20 +715,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your answer here:\n",
- "\n"
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fail to Reject the Null Hypothesis\n"
+ ]
+ }
+ ],
+ "source": [
+ "pvalue_cross = 0.08625467249550985\n",
+ "\n",
+ "if pvalue_cross > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis\") "
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -368,7 +752,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9"
+ "version": "3.12.4"
}
},
"nbformat": 4,