diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index cdc1acb..c5f5370 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -14,12 +14,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 193,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "import math \n",
+ "import pandas as pd # manipulate dataframes\n",
+ "import numpy as np # numerical python\n",
+ "import matplotlib.pyplot as plt # viz\n",
+ "\n",
+ "# New libraries\n",
+ "import scipy.stats as st # stats\n",
+ "import statsmodels.api as sm\n",
+ "import statsmodels.formula.api as smf\n",
+ "from scipy.stats import chi2_contingency"
]
},
{
@@ -35,7 +44,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 143,
"metadata": {},
"outputs": [],
"source": [
@@ -53,12 +62,154 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 145,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " VenusaurMega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 625 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 309 | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 \n",
+ "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
+ "4 4 Charmander Fire NaN 309 39 52 43 \n",
+ "\n",
+ " Sp. Atk Sp. Def Speed Generation Legendary \n",
+ "0 65 65 45 1 False \n",
+ "1 80 80 60 1 False \n",
+ "2 100 100 80 1 False \n",
+ "3 122 120 80 1 False \n",
+ "4 60 50 65 1 False "
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.head()"
]
},
{
@@ -70,12 +221,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 147,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Legendary\n",
+ "False 735\n",
+ "True 65\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 147,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon['Legendary'].value_counts()"
]
},
{
@@ -87,12 +252,69 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 149,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " std | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 417.213605 | \n",
+ " 106.760417 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 637.384615 | \n",
+ " 60.937389 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean std\n",
+ "Legendary \n",
+ "False 417.213605 106.760417\n",
+ "True 637.384615 60.937389"
+ ]
+ },
+ "execution_count": 149,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.groupby('Legendary')['Total'].agg(['mean', 'std'])"
]
},
{
@@ -106,12 +328,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 153,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "T-statistic: 25.8336\n",
+ "P-value: 0.0000\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "legendary = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n",
+ "non_legendary = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n",
+ "t_stat, p_value = st.ttest_ind(legendary, non_legendary, equal_var=False)\n",
+ "print(f\"T-statistic: {t_stat:.4f}\")\n",
+ "print(f\"P-value: {p_value:.4f}\")"
]
},
{
@@ -128,7 +363,9 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# H0 = there is a significant difference between Legendary and non-legendary\n",
+ "# H1 = there is no significant difference between Legendary and non-legendary\n",
+ "# The sresult is statisticallt significant, fail to reject H0."
]
},
{
@@ -140,12 +377,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 157,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Type 1\n",
+ "Water 112\n",
+ "Normal 98\n",
+ "Grass 70\n",
+ "Bug 69\n",
+ "Psychic 57\n",
+ "Fire 52\n",
+ "Electric 44\n",
+ "Rock 44\n",
+ "Ghost 32\n",
+ "Ground 32\n",
+ "Dragon 32\n",
+ "Dark 31\n",
+ "Poison 28\n",
+ "Fighting 27\n",
+ "Steel 27\n",
+ "Ice 24\n",
+ "Fairy 17\n",
+ "Flying 4\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 157,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.value_counts('Type 1')"
]
},
{
@@ -157,12 +424,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 167,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mean 430.455357\n",
+ "std 113.188266\n",
+ "Name: Total, dtype: float64"
+ ]
+ },
+ "execution_count": 167,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "water_pokemon = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"].agg(['mean', 'std'])\n",
+ "other_pokemon = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"].agg(['mean', 'std'])\n",
+ "water_pokemon"
]
},
{
@@ -174,12 +456,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 171,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "T-statistic: -0.0298\n",
+ "P-value: 0.9790\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "t_stat, p_value = st.ttest_ind(water_pokemon, other_pokemon, equal_var=True)\n",
+ "print(f\"T-statistic: {t_stat:.4f}\")\n",
+ "print(f\"P-value: {p_value:.4f}\")"
]
},
{
@@ -196,7 +489,9 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# H0 = there is no difference between water pokemons and the others. \n",
+ "# H1 = there is a difference between water pokemons and the others.\n",
+ "# result is not statisticallt significant, fail to reject H0."
]
},
{
@@ -210,12 +505,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 174,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "T-statistic: -4.3256\n",
+ "P-value: 0.0000\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "defense = pokemon['Defense']\n",
+ "attack = pokemon['Attack']\n",
+ "t_stat, p_value = st.ttest_rel(defense, attack)\n",
+ "print(f\"T-statistic: {t_stat:.4f}\")\n",
+ "print(f\"P-value: {p_value:.4f}\")"
]
},
{
@@ -232,24 +540,39 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# H0 = there is no difference between pokemons\n",
+ "# H1 = there is a difference between pokemons\n",
+ "# reject H0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "We are also curious about whether therer is a significant difference between the mean of special defense and the mean of special attack. Perform the hypothesis test in the cell below. "
+ "<-- We are also curious about whether therer is a significant difference between the mean of special defense and the mean of special attack. Perform the hypothesis test in the cell below. -->"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 176,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "T-statistic: -0.8540\n",
+ "P-value: 0.3934\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "sp_def = pokemon['Sp. Def']\n",
+ "sp_atk = pokemon['Sp. Atk']\n",
+ "t_stat, p_value = st.ttest_rel(sp_def, sp_atk)\n",
+ "print(f\"T-statistic: {t_stat:.4f}\")\n",
+ "print(f\"P-value: {p_value:.4f}\")"
]
},
{
@@ -266,7 +589,9 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# H0 = there is no difference between pokemons\n",
+ "# H1 = there is a difference between pokemons\n",
+ "# fail to reject H0"
]
},
{
@@ -280,13 +605,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 187,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "One-sample t-test on the difference:\n",
+ "T-statistic: -4.3256, P-value: 0.0000\n",
+ "\n",
+ "Paired t-test (standard approach):\n",
+ "T-statistic: -4.3256, P-value: 0.0000\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- " \n",
- " "
+ "difference = pokemon['Defense'] - pokemon['Attack']\n",
+ "t_stat_1samp, p_value_1samp = st.ttest_1samp(difference, popmean=0)\n",
+ "\n",
+ "# Step 3: Perform the standard paired t-test (should match results)\n",
+ "t_stat_paired, p_value_paired = st.ttest_rel(pokemon['Defense'], pokemon['Attack'])\n",
+ "print(\"One-sample t-test on the difference:\")\n",
+ "print(f\"T-statistic: {t_stat_1samp:.4f}, P-value: {p_value_1samp:.4f}\")\n",
+ "\n",
+ "print(\"\\nPaired t-test (standard approach):\")\n",
+ "print(f\"T-statistic: {t_stat_paired:.4f}, P-value: {p_value_paired:.4f}\")"
]
},
{
@@ -302,12 +647,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 189,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Legendary False True \n",
+ "Type 1 \n",
+ "False 627 61\n",
+ "True 108 4\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "tab = pd.crosstab(pokemon['Type 1'] == 'Water', pokemon['Legendary'])\n",
+ "print(tab)"
]
},
{
@@ -319,12 +676,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 197,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.08625467249550949"
+ ]
+ },
+ "execution_count": 197,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "_, chi2_pvalue, _, _ = chi2_contingency(tab)\n",
+ "chi2_pvalue"
]
},
{
@@ -341,7 +710,7 @@
"outputs": [],
"source": [
"# Your answer here:\n",
- "\n"
+ "# The result is not statistically significant (p = 0.0863). You cannot reject H0.\n"
]
},
{
@@ -372,5 +741,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}