diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index cdc1acb..e9406a5 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -14,12 +14,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "# Libraries\n",
+ "import pandas as pd # manipulate dataframes\n",
+ "import numpy as np # numerical python\n",
+ "import matplotlib.pyplot as plt # viz\n",
+ "import scipy.stats as sts\n",
+ "from scipy.stats import chi2_contingency"
]
},
{
@@ -35,13 +40,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Run this code:\n",
- "\n",
- "pokemon = pd.read_csv('../pokemon.csv')"
+ "data = pd.read_csv('../pokemon.csv')\n",
+ "pokemon = data.copy()"
]
},
{
@@ -53,12 +58,154 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " VenusaurMega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 625 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 309 | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 \n",
+ "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
+ "4 4 Charmander Fire NaN 309 39 52 43 \n",
+ "\n",
+ " Sp. Atk Sp. Def Speed Generation Legendary \n",
+ "0 65 65 45 1 False \n",
+ "1 80 80 60 1 False \n",
+ "2 100 100 80 1 False \n",
+ "3 122 120 80 1 False \n",
+ "4 60 50 65 1 False "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.head()"
]
},
{
@@ -70,12 +217,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Legendary\n",
+ "False 735\n",
+ "True 65\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon[\"Legendary\"].value_counts()\n"
]
},
{
@@ -87,12 +248,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " std | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 417.21 | \n",
+ " 106.76 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 637.38 | \n",
+ " 60.94 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean std\n",
+ "Legendary \n",
+ "False 417.21 106.76\n",
+ "True 637.38 60.94"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "points_legend_df = pokemon.groupby('Legendary')['Total'].agg(['mean', 'std']).round(2)\n",
+ "points_legend_df\n"
]
},
{
@@ -104,14 +323,65 @@
"In the cell below, use the `ttest_ind` function in `scipy.stats` to compare the the total points for legendary and non-legendary Pokemon. Since we do not have any information about the population, assume the variances are not equal."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "# 0\n",
+ "Name 0\n",
+ "Type 1 0\n",
+ "Type 2 386\n",
+ "Total 0\n",
+ "HP 0\n",
+ "Attack 0\n",
+ "Defense 0\n",
+ "Sp. Atk 0\n",
+ "Sp. Def 0\n",
+ "Speed 0\n",
+ "Generation 0\n",
+ "Legendary 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pokemon.isnull().sum()"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 25.83\n",
+ "P-Value: 0.00000000000000000000000000000000000000000000009358\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "# Extract total points for Legendary and non legendary Pokemon\n",
+ "legendary = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n",
+ "non_legendary = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n",
+ "\n",
+ "# Perform two-sample t-test for independent samples\n",
+ "t_stat, p_value = sts.ttest_ind(legendary, non_legendary, equal_var=False) # equal_var False as we do not have any information about the population\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.50f}\")\n",
+ "print()"
]
},
{
@@ -123,12 +393,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points for legendary and non-legendary Pokemon are significantly different.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# We set out significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The mean total points for legendary and non-legendary Pokemon are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points for legendary and non-legendary Pokemon are significantly different.\")"
]
},
{
@@ -140,12 +425,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Type 1\n",
+ "Water 112\n",
+ "Normal 98\n",
+ "Grass 70\n",
+ "Bug 69\n",
+ "Psychic 57\n",
+ "Fire 52\n",
+ "Rock 44\n",
+ "Electric 44\n",
+ "Ground 32\n",
+ "Ghost 32\n",
+ "Dragon 32\n",
+ "Dark 31\n",
+ "Poison 28\n",
+ "Fighting 27\n",
+ "Steel 27\n",
+ "Ice 24\n",
+ "Fairy 17\n",
+ "Flying 4\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon[\"Type 1\"].value_counts()"
]
},
{
@@ -157,12 +472,74 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " std | \n",
+ "
\n",
+ " \n",
+ " | Type1_water | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 435.86 | \n",
+ " 121.09 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 430.46 | \n",
+ " 113.19 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean std\n",
+ "Type1_water \n",
+ "False 435.86 121.09\n",
+ "True 430.46 113.19"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "# Create a new column with True values when it is a Type 1 water Pokemon, else false\n",
+ "pokemon['Type1_water'] = pokemon['Type 1'] == 'Water'\n",
+ "\n",
+ "# Compare the mean and std of total points for water Pokemon to all other Pokemon\n",
+ "points_water_df = pokemon.groupby('Type1_water')['Total'].agg(['mean', 'std']).round(2)\n",
+ "points_water_df"
]
},
{
@@ -176,10 +553,27 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): -0.46\n",
+ "P-Value: 0.64\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "# Extract total points for Legendary and non legendary Pokemon\n",
+ "water = pokemon[pokemon[\"Type1_water\"] == True][\"Total\"]\n",
+ "non_water = pokemon[pokemon[\"Type1_water\"] == False][\"Total\"]\n",
+ "\n",
+ "# Perform two-sample t-test for independent samples\n",
+ "t_stat, p_value = sts.ttest_ind(water, non_water, equal_var=False) # equal_var False as we do not have any information about the population\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.2f}\")"
]
},
{
@@ -191,12 +585,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fail to Reject the Null Hypothesis: The mean total points for water and non-water type Pokemon are not significantly different.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# We set out significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The mean total points for water and non-water type Pokemon are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points for water and non-water type Pokemon are significantly different.\")"
]
},
{
@@ -210,12 +619,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 4.33\n",
+ "P-Value: 0.000017\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "# Perform paired t-test\n",
+ "t_stat, p_value = sts.ttest_rel(pokemon[\"Attack\"], pokemon[\"Defense\"])\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.6f}\")"
]
},
{
@@ -227,12 +648,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reject the Null Hypothesis: There is a significant difference between each Pokemon's defense and attack scores.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: No significant difference between each Pokemon's defense and attack scores.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is a significant difference between each Pokemon's defense and attack scores.\")"
]
},
{
@@ -244,12 +680,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 0.85\n",
+ "P-Value: 0.393369\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "# Perform paired t-test\n",
+ "t_stat, p_value = sts.ttest_rel(pokemon[\"Sp. Atk\"], pokemon[\"Sp. Def\"])\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.6f}\")"
]
},
{
@@ -266,7 +714,14 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: No significant difference between each Pokemon's special defense and special attack scores.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is a significant difference between each Pokemon's special defense and special attack scores.\")"
]
},
{
@@ -282,11 +737,21 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): -4.33\n",
+ "P-Value: 0.000017\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- " \n",
- " "
+ "t_stat, p_value = sts.ttest_1samp(pokemon[\"Defense\"]-pokemon[\"Attack\"], popmean=0) # we want to test whether the mean of the differences is zero\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.6f}\") # We can confirm that the mean of the difference is 0"
]
},
{
@@ -302,12 +767,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Type1_water | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 627 | \n",
+ " 108 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 61 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Type1_water False True \n",
+ "Legendary \n",
+ "False 627 108\n",
+ "True 61 4"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "legend_type_crosstab = pd.crosstab(pokemon['Legendary'], pokemon['Type1_water'])\n",
+ "legend_type_crosstab"
]
},
{
@@ -319,12 +842,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.08625467249550949"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "chi2_stats, chi2_pvalue, _, _ = chi2_contingency(legend_type_crosstab)\n",
+ "chi2_pvalue"
]
},
{
@@ -336,25 +871,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fail to Reject the Null Hypothesis: No significant difference between a Pokemon being legenadary or not and a Pokemon being Type 1 water or not.\n"
+ ]
+ }
+ ],
"source": [
"# Your answer here:\n",
- "\n"
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if chi2_pvalue > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: No significant difference between a Pokemon being legenadary or not and a Pokemon being Type 1 water or not.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is a significant difference between a Pokemon being legenadary or not and a Pokemon being Type 1 water or not.\")"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -368,7 +911,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9"
+ "version": "3.12.7"
}
},
"nbformat": 4,