From 89e8d1fb04a08c661bdb1eb451996c28b406b0af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20S=C3=A1nchez?= <jsctrad@gmail.com>
Date: Thu, 8 Jun 2023 15:35:09 +0100
Subject: [PATCH] [lab-hypothesis-testing-2] Javier Sanchez Camacho

---
 your-code/challenge-1.ipynb | 254 ++++++++++++++++++++++++++++++++----
 your-code/challenge-2.ipynb | 201 +++++++++++++++++++++++++---
 2 files changed, 407 insertions(+), 48 deletions(-)
diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
index c1bb43d..f2fa2c1 100755
--- a/your-code/challenge-1.ipynb
+++ b/your-code/challenge-1.ipynb
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -38,11 +38,119 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>#</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>Total</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Bulbasaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>318</td>\n",
+       "      <td>45</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "      <td>65</td>\n",
+       "      <td>65</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Ivysaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>405</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>63</td>\n",
+       "      <td>80</td>\n",
+       "      <td>80</td>\n",
+       "      <td>60</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>525</td>\n",
+       "      <td>80</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>100</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   #       Name Type 1  Type 2  Total  HP  Attack  Defense  Sp. Atk  Sp. Def  \\\n",
+       "0  1  Bulbasaur  Grass  Poison    318  45      49       49       65       65   \n",
+       "1  2    Ivysaur  Grass  Poison    405  60      62       63       80       80   \n",
+       "2  3   Venusaur  Grass  Poison    525  80      82       83      100      100   \n",
+       "\n",
+       "   Speed  Generation  Legendary  \n",
+       "0     45           1      False  \n",
+       "1     60           1      False  \n",
+       "2     80           1      False  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "pokemon = pd.read_csv('Pokemon.csv')\n",
+    "pokemon.head(3)\n"
    ]
   },
   {
@@ -58,10 +166,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<function __main__.t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total'])>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
+    "from scipy import stats\n",
+    "\n",
     "def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n",
     "    \"\"\"Test means of a feature set of two samples\n",
     "    \n",
@@ -73,11 +194,11 @@
     "    Returns:\n",
     "        dict: a dictionary of t-test scores for each feature where the feature name is the key and the p-value is the value\n",
     "    \"\"\"\n",
-    "    results = {}\n",
-    "\n",
-    "    # Your code here\n",
+    "    results = {k:stats.ttest_ind(s1[k], s2[k])[1] for k in features}\n",
     "    \n",
-    "    return results"
+    "    return results\n",
+    "\n",
+    "t_test_features"
    ]
   },
   {
@@ -101,11 +222,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'HP': 3.330647684846191e-15,\n",
+       " 'Attack': 7.827253003205333e-24,\n",
+       " 'Defense': 1.5842226094427255e-12,\n",
+       " 'Sp. Atk': 6.314915770427266e-41,\n",
+       " 'Sp. Def': 1.8439809580409594e-26,\n",
+       " 'Speed': 2.3540754436898437e-21,\n",
+       " 'Total': 3.0952457469652825e-52}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "legendary = t_test_features(pokemon[pokemon['Legendary'] == True], pokemon[pokemon['Legendary'] == False])\n",
+    "legendary\n"
    ]
   },
   {
@@ -121,7 +260,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# Yeah, specially on attack and sp. atk"
    ]
   },
   {
@@ -133,11 +272,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'HP': 0.13791881412813622,\n",
+       " 'Attack': 0.24050968418101457,\n",
+       " 'Defense': 0.5407630349194362,\n",
+       " 'Sp. Atk': 0.14119788176331508,\n",
+       " 'Sp. Def': 0.16781226231606386,\n",
+       " 'Speed': 0.0028356954812578704,\n",
+       " 'Total': 0.5599140649014442}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "generation = t_test_features(pokemon[pokemon['Generation'] == 1], pokemon[pokemon['Generation'] == 2])\n",
+    "generation\n"
    ]
   },
   {
@@ -153,7 +310,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# It seems the values are pretty stable and homogeneous,\n",
+    "    # maybe speed is the only outlier"
    ]
   },
   {
@@ -165,11 +323,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'HP': 0.11060643144431842,\n",
+       " 'Attack': 0.00015741395666164396,\n",
+       " 'Defense': 3.250594205757004e-08,\n",
+       " 'Sp. Atk': 0.0001454917404035147,\n",
+       " 'Sp. Def': 0.00010893304795534396,\n",
+       " 'Speed': 0.024051410794037463,\n",
+       " 'Total': 1.1749035008828752e-07}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "types = t_test_features(pokemon[pokemon['Type 2'].isnull() == False], pokemon[pokemon['Type 2'].isnull() == True])\n",
+    "types\n"
    ]
   },
   {
@@ -185,7 +361,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# We see that pokemon with 2 types have higher defense"
    ]
   },
   {
@@ -199,11 +375,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "TtestResult(statistic=4.325566393330478, pvalue=1.7140303479358558e-05, df=799)"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TtestResult(statistic=0.853986188453353, pvalue=0.3933685997548122, df=799)"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "# if it's all pokemons it's the same population, types, legendaries, etc.\n",
+    "att_def = stats.ttest_rel(pokemon['Attack'], pokemon['Defense'])\n",
+    "sp_att_def = stats.ttest_rel(pokemon['Sp. Atk'], pokemon['Sp. Def'])\n",
+    "display(att_def)\n",
+    "display(sp_att_def)"
    ]
   },
   {
@@ -219,7 +418,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "\"\"\" Because of the low values of sp_att_sp_df we cannot reject the null hypothesis that the values are different,\n",
+    "but because of the higher values of att_def, we can reject it\"\"\""
    ]
   }
  ],
@@ -239,7 +439,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,
diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb
index 1f0e335..28e52e0 100755
--- a/your-code/challenge-2.ipynb
+++ b/your-code/challenge-2.ipynb
@@ -17,21 +17,129 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import libraries\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from scipy import stats"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>#</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>Total</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Bulbasaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>318</td>\n",
+       "      <td>45</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "      <td>65</td>\n",
+       "      <td>65</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Ivysaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>405</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>63</td>\n",
+       "      <td>80</td>\n",
+       "      <td>80</td>\n",
+       "      <td>60</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>525</td>\n",
+       "      <td>80</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>100</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   #       Name Type 1  Type 2  Total  HP  Attack  Defense  Sp. Atk  Sp. Def  \\\n",
+       "0  1  Bulbasaur  Grass  Poison    318  45      49       49       65       65   \n",
+       "1  2    Ivysaur  Grass  Poison    405  60      62       63       80       80   \n",
+       "2  3   Venusaur  Grass  Poison    525  80      82       83      100      100   \n",
+       "\n",
+       "   Speed  Generation  Legendary  \n",
+       "0     45           1      False  \n",
+       "1     60           1      False  \n",
+       "2     80           1      False  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Load the data:\n"
+    "pokemon = pd.read_csv('Pokemon.csv')\n",
+    "pokemon.head(3)\n"
    ]
   },
   {
@@ -58,13 +166,44 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',\n",
+       "       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',\n",
+       "       'Dragon', 'Dark', 'Steel', 'Flying', nan], dtype=object)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "unique_types = pd.concat([pokemon['Type 1'], pokemon['Type 2']]).unique()\n",
+    "unique_types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "19"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n",
-    "\n",
-    "\n",
     "len(unique_types) # you should see 19"
    ]
   },
@@ -85,13 +224,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "18"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "pokemon_totals = []\n",
-    "\n",
-    "# Your code here\n",
+    "pokemon_totals = [pokemon.loc[pokemon['Type 1'] == x, 'Total'] for x in unique_types if pd.isna(x) == False]\n",
     "\n",
     "len(pokemon_totals) # you should see 18"
    ]
@@ -111,11 +259,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "F_onewayResult(statistic=4.63876748166055, pvalue=2.077215448842098e-09)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "stats.f_oneway(*pokemon_totals)\n"
    ]
   },
   {
@@ -131,7 +290,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# I would say that yes, because the variance is 4, and the pvalue is not very high"
    ]
   }
  ],
@@ -151,7 +310,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,

	#	Name	Type 1	Type 2	Total	HP	Attack	Defense	Sp. Atk	Sp. Def	Speed	Generation	Legendary
0	1	Bulbasaur	Grass	Poison	318	45	49	49	65	65	45	1	False
1	2	Ivysaur	Grass	Poison	405	60	62	63	80	80	60	1	False
2	3	Venusaur	Grass	Poison	525	80	82	83	100	100	80	1	False