ta-data-lis · HenrikSoeder · Nov 22, 2023
diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
@@ -19,12 +19,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import libraries\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import scipy.stats as st"
    ]
   },
   {
@@ -38,11 +39,138 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>#</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>Total</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Bulbasaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>318</td>\n",
+       "      <td>45</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "      <td>65</td>\n",
+       "      <td>65</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Ivysaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>405</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>63</td>\n",
+       "      <td>80</td>\n",
+       "      <td>80</td>\n",
+       "      <td>60</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>525</td>\n",
+       "      <td>80</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>100</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>VenusaurMega Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>625</td>\n",
+       "      <td>80</td>\n",
+       "      <td>100</td>\n",
+       "      <td>123</td>\n",
+       "      <td>122</td>\n",
+       "      <td>120</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   #                   Name Type 1  Type 2  Total  HP  Attack  Defense  \\\n",
+       "0  1              Bulbasaur  Grass  Poison    318  45      49       49   \n",
+       "1  2                Ivysaur  Grass  Poison    405  60      62       63   \n",
+       "2  3               Venusaur  Grass  Poison    525  80      82       83   \n",
+       "3  3  VenusaurMega Venusaur  Grass  Poison    625  80     100      123   \n",
+       "\n",
+       "   Sp. Atk  Sp. Def  Speed  Generation  Legendary  \n",
+       "0       65       65     45           1      False  \n",
+       "1       80       80     60           1      False  \n",
+       "2      100      100     80           1      False  \n",
+       "3      122      120     80           1      False  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "pokemon = pd.read_csv(\"Pokemon.csv\")\n",
+    "pokemon.head(4)"
    ]
   },
   {
@@ -58,11 +186,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
     "def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n",
+    "    \n",
     "    \"\"\"Test means of a feature set of two samples\n",
     "    \n",
     "    Args:\n",
@@ -75,7 +204,19 @@
     "    \"\"\"\n",
     "    results = {}\n",
     "\n",
-    "    # Your code here\n",
+    "    for x in features:\n",
+    "    \n",
+    "        sample_1  = s1[x]\n",
+    "        sample_2  = s2[x]\n",
+    "\n",
+    "        # H0 = sample 1 mu  = sample2 mu\n",
+    "        # H1 = sample 1 mu != sample2 mu\n",
+    "        output = st.ttest_ind(sample_1, sample_2, equal_var = False)\n",
+    "        p_value = output[1]\n",
+    "\n",
+    "\n",
+    "        results[x] = p_value\n",
+    "\n",
     "    \n",
     "    return results"
    ]
@@ -101,11 +242,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'HP': 1.0026911708035284e-13,\n",
+       " 'Attack': 2.5203724492366553e-16,\n",
+       " 'Defense': 4.8269984949193316e-11,\n",
+       " 'Sp. Atk': 1.5514614112239705e-21,\n",
+       " 'Sp. Def': 2.294932786405291e-15,\n",
+       " 'Speed': 1.0490163118824585e-18,\n",
+       " 'Total': 9.357954335957446e-47}"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "df1 = pokemon[pokemon[\"Legendary\"] == True]\n",
+    "df2 = pokemon[pokemon[\"Legendary\"] == False]\n",
+    "\n",
+    "t_test_features(df1, df2)"
    ]
   },
   {
@@ -121,7 +282,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# since the hypothesis was that they have the same mean values, and the hypothesis was rejected,\n",
+    "# the conclusion can be made that they have different values"
    ]
   },
   {
@@ -133,11 +295,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'HP': 0.14551697834219632,\n",
+       " 'Attack': 0.24721958967217725,\n",
+       " 'Defense': 0.5677711011725426,\n",
+       " 'Sp. Atk': 0.12332165977104392,\n",
+       " 'Sp. Def': 0.18829872292645758,\n",
+       " 'Speed': 0.0023926593731213508,\n",
+       " 'Total': 0.5631377907941676}"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "df1 = pokemon[pokemon[\"Generation\"] == 1]\n",
+    "df2 = pokemon[pokemon[\"Generation\"] == 2]\n",
+    "\n",
+    "t_test_features(df1, df2)\n"
    ]
   },
   {
@@ -153,7 +335,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# the conclusion can be made, that we can NOT reject the hypothesis that they differ from each other\n",
+    "# therefore they might have similiar values, but we DONT KNOW"
    ]
   },
   {
@@ -165,11 +348,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 56,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'HP': 0.11314389855379418,\n",
+       " 'Attack': 0.00014932578145948305,\n",
+       " 'Defense': 2.797854041151469e-08,\n",
+       " 'Sp. Atk': 0.00013876216585667901,\n",
+       " 'Sp. Def': 0.00010730610934512777,\n",
+       " 'Speed': 0.024217032818190928,\n",
+       " 'Total': 1.1157056505229961e-07}"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "one_type = pokemon[pokemon[\"Type 2\"].isnull() == True]\n",
+    "two_type = pokemon[pokemon[\"Type 2\"].isnull() == False]\n",
+    "\n",
+    "df1 = one_type\n",
+    "df2 = two_type\n",
+    "\n",
+    "t_test_features(df1, df2)"
    ]
   },
   {
@@ -185,7 +391,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# we can obtain from the \"total\" column (where the hypothesis got rejected),\n",
+    "# that they indeed have different values"
    ]
   },
   {
@@ -199,11 +406,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 61,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "p_value for attack and defense:  0.0012123980547317907\n",
+      "p_value for Sp. attack and Sp. defense:  0.5458436328840434\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "attack_defense = st.f_oneway(pokemon[\"Attack\"], pokemon[\"Defense\"])\n",
+    "print(\"p_value for attack and defense: \",attack_defense[1])\n",
+    "\n",
+    "spatt_spdef = st.f_oneway(pokemon[\"Sp. Atk\"], pokemon[\"Sp. Def\"])\n",
+    "print(\"p_value for Sp. attack and Sp. defense: \",spatt_spdef[1])"
    ]
   },
   {
@@ -219,13 +439,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# there are significant differences between \"Attack\" and \"Defense\"\n",
+    "# we can not say if there are differences between \"Sp. Attack\" and \"Sp. defense\""
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -239,7 +460,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,