ta-data-lis · Patchila · Aug 10, 2023
diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
@@ -19,12 +19,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Import libraries\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "from scipy import stats\n",
+    "from scipy.stats import ttest_ind\n",
+    "from scipy.stats import ttest_rel"
    ]
   },
   {
@@ -38,11 +41,94 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   #                   Name Type 1  Type 2  Total  HP  Attack  Defense  \\\n",
+      "0  1              Bulbasaur  Grass  Poison    318  45      49       49   \n",
+      "1  2                Ivysaur  Grass  Poison    405  60      62       63   \n",
+      "2  3               Venusaur  Grass  Poison    525  80      82       83   \n",
+      "3  3  VenusaurMega Venusaur  Grass  Poison    625  80     100      123   \n",
+      "4  4             Charmander   Fire     NaN    309  39      52       43   \n",
+      "\n",
+      "   Sp. Atk  Sp. Def  Speed  Generation  Legendary  \n",
+      "0       65       65     45           1      False  \n",
+      "1       80       80     60           1      False  \n",
+      "2      100      100     80           1      False  \n",
+      "3      122      120     80           1      False  \n",
+      "4       60       50     65           1      False  \n",
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 800 entries, 0 to 799\n",
+      "Data columns (total 13 columns):\n",
+      " #   Column      Non-Null Count  Dtype \n",
+      "---  ------      --------------  ----- \n",
+      " 0   #           800 non-null    int64 \n",
+      " 1   Name        800 non-null    object\n",
+      " 2   Type 1      800 non-null    object\n",
+      " 3   Type 2      414 non-null    object\n",
+      " 4   Total       800 non-null    int64 \n",
+      " 5   HP          800 non-null    int64 \n",
+      " 6   Attack      800 non-null    int64 \n",
+      " 7   Defense     800 non-null    int64 \n",
+      " 8   Sp. Atk     800 non-null    int64 \n",
+      " 9   Sp. Def     800 non-null    int64 \n",
+      " 10  Speed       800 non-null    int64 \n",
+      " 11  Generation  800 non-null    int64 \n",
+      " 12  Legendary   800 non-null    bool  \n",
+      "dtypes: bool(1), int64(9), object(3)\n",
+      "memory usage: 75.9+ KB\n",
+      "None\n",
+      "                #      Total          HP      Attack     Defense     Sp. Atk  \\\n",
+      "count  800.000000  800.00000  800.000000  800.000000  800.000000  800.000000   \n",
+      "mean   362.813750  435.10250   69.258750   79.001250   73.842500   72.820000   \n",
+      "std    208.343798  119.96304   25.534669   32.457366   31.183501   32.722294   \n",
+      "min      1.000000  180.00000    1.000000    5.000000    5.000000   10.000000   \n",
+      "25%    184.750000  330.00000   50.000000   55.000000   50.000000   49.750000   \n",
+      "50%    364.500000  450.00000   65.000000   75.000000   70.000000   65.000000   \n",
+      "75%    539.250000  515.00000   80.000000  100.000000   90.000000   95.000000   \n",
+      "max    721.000000  780.00000  255.000000  190.000000  230.000000  194.000000   \n",
+      "\n",
+      "          Sp. Def       Speed  Generation  \n",
+      "count  800.000000  800.000000   800.00000  \n",
+      "mean    71.902500   68.277500     3.32375  \n",
+      "std     27.828916   29.060474     1.66129  \n",
+      "min     20.000000    5.000000     1.00000  \n",
+      "25%     50.000000   45.000000     2.00000  \n",
+      "50%     70.000000   65.000000     3.00000  \n",
+      "75%     90.000000   90.000000     5.00000  \n",
+      "max    230.000000  180.000000     6.00000  \n",
+      "                    #       Total         HP      Attack    Defense  \\\n",
+      "Legendary                                                             \n",
+      "False      353.315646  417.213605  67.182313   75.669388  71.559184   \n",
+      "True       470.215385  637.384615  92.738462  116.676923  99.661538   \n",
+      "\n",
+      "              Sp. Atk     Sp. Def       Speed  Generation  \n",
+      "Legendary                                                  \n",
+      "False       68.454422   68.892517   65.455782    3.284354  \n",
+      "True       122.184615  105.938462  100.184615    3.769231  \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/wj/9yy2qymx1q58s3n2gkl01vj00000gn/T/ipykernel_3956/559280183.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
+      "  legendary_vs_normal = data.groupby(\"Legendary\").mean()\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "data = pd.read_csv(\"pokemon.csv\")\n",
+    "data\n",
+    "print(data.head())\n",
+    "print(data.info())\n",
+    "print(data.describe())\n",
+    "legendary_vs_normal = data.groupby(\"Legendary\").mean()\n",
+    "print(legendary_vs_normal)"
    ]
   },
   {
@@ -58,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -75,7 +161,16 @@
     "    \"\"\"\n",
     "    results = {}\n",
     "\n",
-    "    # Your code here\n",
+    "    for feature in features:\n",
+    "        \n",
+    "        s1_values = s1[feature]\n",
+    "        s2_values = s2[feature]\n",
+    "        \n",
+    "      \n",
+    "        t_statistic, p_value = ttest_ind(s1_values, s2_values)\n",
+    "        \n",
+    "        \n",
+    "        results[feature] = p_value\n",
     "    \n",
     "    return results"
    ]
@@ -101,11 +196,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 50,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'HP': 3.330647684846191e-15, 'Attack': 7.827253003205333e-24, 'Defense': 1.5842226094427255e-12, 'Sp. Atk': 6.314915770427266e-41, 'Sp. Def': 1.8439809580409594e-26, 'Speed': 2.3540754436898437e-21, 'Total': 3.0952457469652825e-52}\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "legendary_pokemon = data[data['Legendary'] == True]\n",
+    "non_legendary_pokemon = data[data['Legendary'] == False]\n",
+    "\n",
+    "\n",
+    "results_legendary_vs_non_legendary = t_test_features(legendary_pokemon, non_legendary_pokemon)\n",
+    "\n",
+    "print(results_legendary_vs_non_legendary)"
    ]
   },
   {
@@ -121,7 +230,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "#The p-values are far below common significance levels (e.g., 0.05), indicating that the differences observed in the stats (HP, Attack, Defense, etc.) between Legendary and Non-Legendary Pokémon are unlikely to be due to random chance"
    ]
   },
   {
@@ -133,11 +242,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 51,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'HP': 0.13791881412813622, 'Attack': 0.24050968418101445, 'Defense': 0.5407630349194362, 'Sp. Atk': 0.14119788176331508, 'Sp. Def': 0.1678122623160639, 'Speed': 0.0028356954812578704, 'Total': 0.5599140649014442}\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "generation_1_pokemon = data[data['Generation'] == 1]\n",
+    "generation_2_pokemon = data[data['Generation'] == 2]\n",
+    "results_gen1_vs_gen2 = t_test_features(generation_1_pokemon, generation_2_pokemon)\n",
+    "print(results_gen1_vs_gen2)"
    ]
   },
   {
@@ -153,7 +273,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "#In either case, the p-values provide a quantitative measure of the likelihood that the observed differences are due to random chance. It's important to consider the significance level you choose and interpret the results accordingly"
    ]
   },
   {
@@ -165,11 +285,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 52,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'HP': 0.11060643144431842, 'Attack': 0.00015741395666164396, 'Defense': 3.250594205757004e-08, 'Sp. Atk': 0.0001454917404035147, 'Sp. Def': 0.00010893304795534396, 'Speed': 0.02405141079403746, 'Total': 1.1749035008828752e-07}\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "single_type_pokemon = data[data['Type 2'].isnull()]\n",
+    "dual_type_pokemon = data[data['Type 2'].notnull()]\n",
+    "results_single_type_vs_dual_type = t_test_features(single_type_pokemon, dual_type_pokemon)\n",
+    "print(results_single_type_vs_dual_type)"
    ]
   },
   {
@@ -185,7 +316,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# p-values provide a quantitative measure of the likelihood that the observed differences are due to random chance."
    ]
   },
   {
@@ -199,11 +330,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 54,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Attack vs Defense:\n",
+      "T-Statistic: 4.325566393330478\n",
+      "P-Value: 1.7140303479358558e-05\n",
+      "\n",
+      "Sp. Atk vs Sp. Def:\n",
+      "T-Statistic: 0.853986188453353\n",
+      "P-Value: 0.3933685997548122\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here\n"
+    "attack_values = data['Attack']\n",
+    "defense_values = data['Defense']\n",
+    "attack_vs_defense_t_statistic, attack_vs_defense_p_value = ttest_rel(attack_values, defense_values)\n",
+    "\n",
+    "\n",
+    "sp_atk_values = data['Sp. Atk']\n",
+    "sp_def_values = data['Sp. Def']\n",
+    "sp_atk_vs_sp_def_t_statistic, sp_atk_vs_sp_def_p_value = ttest_rel(sp_atk_values, sp_def_values)\n",
+    "\n",
+    "\n",
+    "print(\"Attack vs Defense:\")\n",
+    "print(\"T-Statistic:\", attack_vs_defense_t_statistic)\n",
+    "print(\"P-Value:\", attack_vs_defense_p_value)\n",
+    "\n",
+    "print(\"\\nSp. Atk vs Sp. Def:\")\n",
+    "print(\"T-Statistic:\", sp_atk_vs_sp_def_t_statistic)\n",
+    "print(\"P-Value:\", sp_atk_vs_sp_def_p_value)"
    ]
   },
   {
@@ -219,13 +380,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your comment here"
+    "# you don't have strong evidence to conclude that there is a significant difference between the Special Attack"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -239,7 +400,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,