From ab89d3ef5fca24fc347ee243ef52d3feb2655fe5 Mon Sep 17 00:00:00 2001
From: EmiliaHorton12 <emiliavictoriah@gmail.com>
Date: Fri, 14 Mar 2025 18:21:30 +0100
Subject: [PATCH] Update main.ipynb

---
 your-code/main.ipynb | 487 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 428 insertions(+), 59 deletions(-)
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index cdc1acb..c5f5370 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -14,12 +14,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 193,
    "metadata": {},
    "outputs": [],
    "source": [
     "# import numpy and pandas\n",
-    "\n"
+    "import math \n",
+    "import pandas as pd # manipulate dataframes\n",
+    "import numpy as np # numerical python\n",
+    "import matplotlib.pyplot as plt # viz\n",
+    "\n",
+    "# New libraries\n",
+    "import scipy.stats as st # stats\n",
+    "import statsmodels.api as sm\n",
+    "import statsmodels.formula.api as smf\n",
+    "from scipy.stats import chi2_contingency"
    ]
   },
   {
@@ -35,7 +44,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 143,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -53,12 +62,154 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 145,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>#</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>Total</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Bulbasaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>318</td>\n",
+       "      <td>45</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "      <td>65</td>\n",
+       "      <td>65</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Ivysaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>405</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>63</td>\n",
+       "      <td>80</td>\n",
+       "      <td>80</td>\n",
+       "      <td>60</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>525</td>\n",
+       "      <td>80</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>100</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>VenusaurMega Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>625</td>\n",
+       "      <td>80</td>\n",
+       "      <td>100</td>\n",
+       "      <td>123</td>\n",
+       "      <td>122</td>\n",
+       "      <td>120</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Charmander</td>\n",
+       "      <td>Fire</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>309</td>\n",
+       "      <td>39</td>\n",
+       "      <td>52</td>\n",
+       "      <td>43</td>\n",
+       "      <td>60</td>\n",
+       "      <td>50</td>\n",
+       "      <td>65</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   #                   Name Type 1  Type 2  Total  HP  Attack  Defense  \\\n",
+       "0  1              Bulbasaur  Grass  Poison    318  45      49       49   \n",
+       "1  2                Ivysaur  Grass  Poison    405  60      62       63   \n",
+       "2  3               Venusaur  Grass  Poison    525  80      82       83   \n",
+       "3  3  VenusaurMega Venusaur  Grass  Poison    625  80     100      123   \n",
+       "4  4             Charmander   Fire     NaN    309  39      52       43   \n",
+       "\n",
+       "   Sp. Atk  Sp. Def  Speed  Generation  Legendary  \n",
+       "0       65       65     45           1      False  \n",
+       "1       80       80     60           1      False  \n",
+       "2      100      100     80           1      False  \n",
+       "3      122      120     80           1      False  \n",
+       "4       60       50     65           1      False  "
+      ]
+     },
+     "execution_count": 145,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "pokemon.head()"
    ]
   },
   {
@@ -70,12 +221,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 147,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Legendary\n",
+       "False    735\n",
+       "True      65\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 147,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "pokemon['Legendary'].value_counts()"
    ]
   },
   {
@@ -87,12 +252,69 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 149,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mean</th>\n",
+       "      <th>std</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Legendary</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>False</th>\n",
+       "      <td>417.213605</td>\n",
+       "      <td>106.760417</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>True</th>\n",
+       "      <td>637.384615</td>\n",
+       "      <td>60.937389</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                 mean         std\n",
+       "Legendary                        \n",
+       "False      417.213605  106.760417\n",
+       "True       637.384615   60.937389"
+      ]
+     },
+     "execution_count": 149,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "pokemon.groupby('Legendary')['Total'].agg(['mean', 'std'])"
    ]
   },
   {
@@ -106,12 +328,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 153,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 25.8336\n",
+      "P-value: 0.0000\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "legendary = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n",
+    "non_legendary = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n",
+    "t_stat, p_value = st.ttest_ind(legendary, non_legendary, equal_var=False)\n",
+    "print(f\"T-statistic: {t_stat:.4f}\")\n",
+    "print(f\"P-value: {p_value:.4f}\")"
    ]
   },
   {
@@ -128,7 +363,9 @@
    "outputs": [],
    "source": [
     "# Your conclusions here:\n",
-    "\n"
+    "# H0 = there is a significant difference between Legendary and non-legendary\n",
+    "# H1 = there is no significant difference between Legendary and non-legendary\n",
+    "# The sresult is statisticallt significant, fail to reject H0."
    ]
   },
   {
@@ -140,12 +377,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 157,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Type 1\n",
+       "Water       112\n",
+       "Normal       98\n",
+       "Grass        70\n",
+       "Bug          69\n",
+       "Psychic      57\n",
+       "Fire         52\n",
+       "Electric     44\n",
+       "Rock         44\n",
+       "Ghost        32\n",
+       "Ground       32\n",
+       "Dragon       32\n",
+       "Dark         31\n",
+       "Poison       28\n",
+       "Fighting     27\n",
+       "Steel        27\n",
+       "Ice          24\n",
+       "Fairy        17\n",
+       "Flying        4\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 157,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "pokemon.value_counts('Type 1')"
    ]
   },
   {
@@ -157,12 +424,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 167,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mean    430.455357\n",
+       "std     113.188266\n",
+       "Name: Total, dtype: float64"
+      ]
+     },
+     "execution_count": 167,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "water_pokemon = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"].agg(['mean', 'std'])\n",
+    "other_pokemon = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"].agg(['mean', 'std'])\n",
+    "water_pokemon"
    ]
   },
   {
@@ -174,12 +456,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 171,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: -0.0298\n",
+      "P-value: 0.9790\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "t_stat, p_value = st.ttest_ind(water_pokemon, other_pokemon, equal_var=True)\n",
+    "print(f\"T-statistic: {t_stat:.4f}\")\n",
+    "print(f\"P-value: {p_value:.4f}\")"
    ]
   },
   {
@@ -196,7 +489,9 @@
    "outputs": [],
    "source": [
     "# Your conclusions here:\n",
-    "\n"
+    "# H0 = there is no difference between water pokemons and the others. \n",
+    "# H1 = there is a difference between water pokemons and the others.\n",
+    "# result is not statisticallt significant, fail to reject H0."
    ]
   },
   {
@@ -210,12 +505,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 174,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: -4.3256\n",
+      "P-value: 0.0000\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "defense = pokemon['Defense']\n",
+    "attack = pokemon['Attack']\n",
+    "t_stat, p_value = st.ttest_rel(defense, attack)\n",
+    "print(f\"T-statistic: {t_stat:.4f}\")\n",
+    "print(f\"P-value: {p_value:.4f}\")"
    ]
   },
   {
@@ -232,24 +540,39 @@
    "outputs": [],
    "source": [
     "# Your conclusions here:\n",
-    "\n"
+    "# H0 = there is no difference between pokemons\n",
+    "# H1 = there is a difference between pokemons\n",
+    "# reject H0"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We are also curious about whether therer is a significant difference between the mean of special defense and the mean of special attack. Perform the hypothesis test in the cell below. "
+    "<-- We are also curious about whether therer is a significant difference between the mean of special defense and the mean of special attack. Perform the hypothesis test in the cell below.  -->"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 176,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: -0.8540\n",
+      "P-value: 0.3934\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "sp_def = pokemon['Sp. Def']\n",
+    "sp_atk = pokemon['Sp. Atk']\n",
+    "t_stat, p_value = st.ttest_rel(sp_def, sp_atk)\n",
+    "print(f\"T-statistic: {t_stat:.4f}\")\n",
+    "print(f\"P-value: {p_value:.4f}\")"
    ]
   },
   {
@@ -266,7 +589,9 @@
    "outputs": [],
    "source": [
     "# Your conclusions here:\n",
-    "\n"
+    "# H0 = there is no difference between pokemons\n",
+    "# H1 = there is a difference between pokemons\n",
+    "# fail to reject H0"
    ]
   },
   {
@@ -280,13 +605,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 187,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "One-sample t-test on the difference:\n",
+      "T-statistic: -4.3256, P-value: 0.0000\n",
+      "\n",
+      "Paired t-test (standard approach):\n",
+      "T-statistic: -4.3256, P-value: 0.0000\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "    \n",
-    "    "
+    "difference = pokemon['Defense'] - pokemon['Attack']\n",
+    "t_stat_1samp, p_value_1samp = st.ttest_1samp(difference, popmean=0)\n",
+    "\n",
+    "# Step 3: Perform the standard paired t-test (should match results)\n",
+    "t_stat_paired, p_value_paired = st.ttest_rel(pokemon['Defense'], pokemon['Attack'])\n",
+    "print(\"One-sample t-test on the difference:\")\n",
+    "print(f\"T-statistic: {t_stat_1samp:.4f}, P-value: {p_value_1samp:.4f}\")\n",
+    "\n",
+    "print(\"\\nPaired t-test (standard approach):\")\n",
+    "print(f\"T-statistic: {t_stat_paired:.4f}, P-value: {p_value_paired:.4f}\")"
    ]
   },
   {
@@ -302,12 +647,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 189,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Legendary  False  True \n",
+      "Type 1                 \n",
+      "False        627     61\n",
+      "True         108      4\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "tab = pd.crosstab(pokemon['Type 1'] == 'Water', pokemon['Legendary'])\n",
+    "print(tab)"
    ]
   },
   {
@@ -319,12 +676,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 197,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.08625467249550949"
+      ]
+     },
+     "execution_count": 197,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "_, chi2_pvalue, _, _ = chi2_contingency(tab)\n",
+    "chi2_pvalue"
    ]
   },
   {
@@ -341,7 +710,7 @@
    "outputs": [],
    "source": [
     "# Your answer here:\n",
-    "\n"
+    "# The result is not statistically significant (p = 0.0863). You cannot reject H0.\n"
    ]
   },
   {
@@ -372,5 +741,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }

	#	Name	Type 1	Type 2	Total	HP	Attack	Defense	Sp. Atk	Sp. Def	Speed	Generation	Legendary
0	1	Bulbasaur	Grass	Poison	318	45	49	49	65	65	45	1	False
1	2	Ivysaur	Grass	Poison	405	60	62	63	80	80	60	1	False
2	3	Venusaur	Grass	Poison	525	80	82	83	100	100	80	1	False
3	3	VenusaurMega Venusaur	Grass	Poison	625	80	100	123	122	120	80	1	False
4	4	Charmander	Fire	NaN	309	39	52	43	60	50	65	1	False