ta-data-lis · YaraLis · Dec 14, 2023
diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
@@ -19,12 +19,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Import libraries\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "import scipy.stats as st"
    ]
   },
   {
@@ -38,11 +39,155 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>#</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>Total</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Bulbasaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>318</td>\n",
+       "      <td>45</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "      <td>65</td>\n",
+       "      <td>65</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Ivysaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>405</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>63</td>\n",
+       "      <td>80</td>\n",
+       "      <td>80</td>\n",
+       "      <td>60</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>525</td>\n",
+       "      <td>80</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>100</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>VenusaurMega Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>625</td>\n",
+       "      <td>80</td>\n",
+       "      <td>100</td>\n",
+       "      <td>123</td>\n",
+       "      <td>122</td>\n",
+       "      <td>120</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Charmander</td>\n",
+       "      <td>Fire</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>309</td>\n",
+       "      <td>39</td>\n",
+       "      <td>52</td>\n",
+       "      <td>43</td>\n",
+       "      <td>60</td>\n",
+       "      <td>50</td>\n",
+       "      <td>65</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   #                   Name Type 1  Type 2  Total  HP  Attack  Defense  \\\n",
+       "0  1              Bulbasaur  Grass  Poison    318  45      49       49   \n",
+       "1  2                Ivysaur  Grass  Poison    405  60      62       63   \n",
+       "2  3               Venusaur  Grass  Poison    525  80      82       83   \n",
+       "3  3  VenusaurMega Venusaur  Grass  Poison    625  80     100      123   \n",
+       "4  4             Charmander   Fire     NaN    309  39      52       43   \n",
+       "\n",
+       "   Sp. Atk  Sp. Def  Speed  Generation  Legendary  \n",
+       "0       65       65     45           1      False  \n",
+       "1       80       80     60           1      False  \n",
+       "2      100      100     80           1      False  \n",
+       "3      122      120     80           1      False  \n",
+       "4       60       50     65           1      False  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "df = pd.read_csv('Pokemon.csv')\n",
+    "df.head()"
    ]
   },
   {
@@ -58,11 +203,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n",
+    "\n",
     "    \"\"\"Test means of a feature set of two samples\n",
     "    \n",
     "    Args:\n",
@@ -76,7 +222,9 @@
     "    results = {}\n",
     "\n",
     "    # Your code here\n",
-    "    \n",
+    "    for feature in features:\n",
+    "        t_stat, p_value = st.ttest_ind(s1[feature], s2[feature], equal_var=False)\n",
+    "        results[feature] = {'t_statistic': t_stat, 'p_value': p_value}\n",
     "    return results"
    ]
   },
@@ -101,11 +249,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'HP': {'t_statistic': 8.981370483625046, 'p_value': 1.0026911708035284e-13}, 'Attack': {'t_statistic': 10.438133539322203, 'p_value': 2.520372449236646e-16}, 'Defense': {'t_statistic': 7.637078164784618, 'p_value': 4.826998494919331e-11}, 'Sp. Atk': {'t_statistic': 13.417449984138461, 'p_value': 1.5514614112239816e-21}, 'Sp. Def': {'t_statistic': 10.015696613114878, 'p_value': 2.2949327864052826e-15}, 'Speed': {'t_statistic': 11.47504444631443, 'p_value': 1.0490163118824507e-18}, 'Total': {'t_statistic': 25.8335743895517, 'p_value': 9.357954335957444e-47}}\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "# Your code here\n",
+    "legendary = df[df['Legendary'] == True]\n",
+    "non_legendary = df[df['Legendary'] == False]\n",
+    "\n",
+    "t_test_results = t_test_features(legendary, non_legendary, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n",
+    "print(t_test_results)\n"
    ]
   },
   {
@@ -121,7 +282,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# the p-value is less than 0.05, at confidence level of 95% we therefore reject the null hypothesis"
    ]
   },
   {
@@ -133,11 +294,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'HP': {'t_statistic': -1.4609700002846653, 'p_value': 0.14551697834219626}, 'Attack': {'t_statistic': 1.1603052805533747, 'p_value': 0.24721958967217725}, 'Defense': {'t_statistic': -0.5724173235153119, 'p_value': 0.5677711011725426}, 'Sp. Atk': {'t_statistic': 1.54608675231508, 'p_value': 0.12332165977104388}, 'Sp. Def': {'t_statistic': -1.3203746053318755, 'p_value': 0.18829872292645752}, 'Speed': {'t_statistic': 3.069594374071931, 'p_value': 0.00239265937312135}, 'Total': {'t_statistic': 0.579073329450271, 'p_value': 0.5631377907941676}}\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "generation_1 = df[df['Generation'] == 1]\n",
+    "generation_2 = df[df['Generation'] == 2]\n",
+    "\n",
+    "t_test_results = t_test_features(generation_1, generation_2, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n",
+    "print(t_test_results)\n"
    ]
   },
   {
@@ -153,7 +326,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# the p-value is higher that 0.05, therefore, at a confidence level of 95% we fail to reject the null value"
    ]
   },
   {
@@ -165,11 +338,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'HP': {'t_statistic': -1.586088850338319, 'p_value': 0.11314389855379413}, 'Attack': {'t_statistic': -3.810556219950897, 'p_value': 0.00014932578145948305}, 'Defense': {'t_statistic': -5.60979416640793, 'p_value': 2.7978540411514693e-08}, 'Sp. Atk': {'t_statistic': -3.828976815384819, 'p_value': 0.00013876216585667907}, 'Sp. Def': {'t_statistic': -3.892991138685155, 'p_value': 0.00010730610934512779}, 'Speed': {'t_statistic': -2.258014040079978, 'p_value': 0.02421703281819093}, 'Total': {'t_statistic': -5.355678438759113, 'p_value': 1.1157056505229964e-07}}\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "# Your code here\n",
+    "df['Type Count'] = (df['Type 2'].notnull()).astype(int)\n",
+    "\n",
+    "single_type = df[df['Type Count'] == 0] \n",
+    "two_types = df[df['Type Count'] == 1] \n",
+    "\n",
+    "t_test_results = t_test_features(single_type, two_types, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n",
+    "print(t_test_results)\n"
    ]
   },
   {
@@ -185,7 +373,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# Fail to reject the null hypothesis for 'HP.'\n",
+    "# Reject the null hypothesis for 'Attack,' 'Defense,' 'Sp. Atk,' 'Sp. Def,' 'Speed,' and 'Total.'"
    ]
   },
   {
@@ -199,11 +388,48 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "TtestResult(statistic=4.325566393330478, pvalue=1.7140303479358558e-05, df=799)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from scipy.stats import ttest_rel\n",
+    "#H0: There is no significant difference between the Attack and Defense.\n",
+    "#H1: There is a significant difference between the Attack and Defense.\n",
+    "\n",
+    "ttest_rel(df[\"Attack\"], df[\"Defense\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "TtestResult(statistic=0.853986188453353, pvalue=0.3933685997548122, df=799)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "#H0: There is no significant difference between the Sp. Atk and Sp. Def.\n",
+    "#H1: There is a significant difference between the Sp. Atk and Sp. Def.\n",
+    "ttest_rel(df[\"Sp. Atk\"], df[\"Sp. Def\"])"
    ]
   },
   {
@@ -219,7 +445,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# Reject the null hypothesis for the `Attack` vs `Defense` test, indicating a significant difference.\n",
+    "# Fail to reject the null hypothesis for `Sp. Atk` vs `Sp. Def` test, suggesting no significant difference."
    ]
   }
  ],
@@ -239,7 +466,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.11.5"
   }
  },
  "nbformat": 4,