From 8d0f6c4e78a8a5133e8406ac149fb649916f74f7 Mon Sep 17 00:00:00 2001
From: leticiademarchiferreira
<127995157+leticiademarchiferreira@users.noreply.github.com>
Date: Mon, 22 May 2023 22:36:50 +0200
Subject: [PATCH] C:\Users\Leticia Demarchi\Documents\Ironhack Lisbon
April\LABS\5 Week\lab-hypothesis-testing-2\your-code
---
your-code/challenge-1.ipynb | 437 ++++++++++++++++++++++++++++++++++--
your-code/challenge-2.ipynb | 91 ++++++--
2 files changed, 493 insertions(+), 35 deletions(-)
diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
index c1bb43d..185826e 100755
--- a/your-code/challenge-1.ipynb
+++ b/your-code/challenge-1.ipynb
@@ -1,6 +1,7 @@
{
"cells": [
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -19,15 +20,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
- "# Import libraries\n",
- "import pandas as pd"
+ "import pandas as pd \n",
+ "import numpy as np\n",
+ "import scipy.stats as st \n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "import statistics as stats"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -38,14 +44,269 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " VenusaurMega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 625 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 309 | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 795 | \n",
+ " 719 | \n",
+ " Diancie | \n",
+ " Rock | \n",
+ " Fairy | \n",
+ " 600 | \n",
+ " 50 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 100 | \n",
+ " 150 | \n",
+ " 50 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 796 | \n",
+ " 719 | \n",
+ " DiancieMega Diancie | \n",
+ " Rock | \n",
+ " Fairy | \n",
+ " 700 | \n",
+ " 50 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 160 | \n",
+ " 110 | \n",
+ " 110 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 797 | \n",
+ " 720 | \n",
+ " HoopaHoopa Confined | \n",
+ " Psychic | \n",
+ " Ghost | \n",
+ " 600 | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 60 | \n",
+ " 150 | \n",
+ " 130 | \n",
+ " 70 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 798 | \n",
+ " 720 | \n",
+ " HoopaHoopa Unbound | \n",
+ " Psychic | \n",
+ " Dark | \n",
+ " 680 | \n",
+ " 80 | \n",
+ " 160 | \n",
+ " 60 | \n",
+ " 170 | \n",
+ " 130 | \n",
+ " 80 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 799 | \n",
+ " 721 | \n",
+ " Volcanion | \n",
+ " Fire | \n",
+ " Water | \n",
+ " 600 | \n",
+ " 80 | \n",
+ " 110 | \n",
+ " 120 | \n",
+ " 130 | \n",
+ " 90 | \n",
+ " 70 | \n",
+ " 6 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
800 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense \n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 \\\n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 \n",
+ "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
+ "4 4 Charmander Fire NaN 309 39 52 43 \n",
+ ".. ... ... ... ... ... .. ... ... \n",
+ "795 719 Diancie Rock Fairy 600 50 100 150 \n",
+ "796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 \n",
+ "797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 \n",
+ "798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 \n",
+ "799 721 Volcanion Fire Water 600 80 110 120 \n",
+ "\n",
+ " Sp. Atk Sp. Def Speed Generation Legendary \n",
+ "0 65 65 45 1 False \n",
+ "1 80 80 60 1 False \n",
+ "2 100 100 80 1 False \n",
+ "3 122 120 80 1 False \n",
+ "4 60 50 65 1 False \n",
+ ".. ... ... ... ... ... \n",
+ "795 100 150 50 6 True \n",
+ "796 160 110 110 6 True \n",
+ "797 150 130 70 6 True \n",
+ "798 170 130 80 6 True \n",
+ "799 130 90 70 6 True \n",
+ "\n",
+ "[800 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "pokemon_dataset = pd.read_csv(\"Pokemon.csv\")\n",
+ "pokemon_dataset"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -58,7 +319,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
@@ -75,12 +336,33 @@
" \"\"\"\n",
" results = {}\n",
"\n",
- " # Your code here\n",
" \n",
" return results"
]
},
{
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'c1_sample_f' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[37], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m st\u001b[39m.\u001b[39mttest_1samp(c1_sample_f,alternative \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mgreater\u001b[39m\u001b[39m\"\u001b[39m)\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'c1_sample_f' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "st.ttest_1samp(c1_sample_f,alternative = \"greater\")"
+ ]
+ },
+ {
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -105,10 +387,37 @@
"metadata": {},
"outputs": [],
"source": [
- "# Your code here\n"
+ "# 2. Choose significance\n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "\n",
+ "# 3. Sample\n",
+ "\n",
+ "c3_sample = data[data['Legendary'] == False].sample(30)\n",
+ "c3_sample\n",
+ "\n",
+ "c3_sample_f = c3_sample['Fare']\n",
+ "c3_sample_f\n",
+ "\n",
+ "\n",
+ "# 4. Compute statistic\n",
+ "\n",
+ "#(X - mu)/ (s/np.sqrt(n))\n",
+ "\n",
+ "mean = c3_sample_f.mean()\n",
+ "std = c3_sample_f.std(ddof=1)\n",
+ "#display(mean)\n",
+ "#display(std)\n",
+ "\n",
+ "stat = (mean - 17)/ (std/ np.sqrt(30))\n",
+ "stat\n",
+ "\n",
+ "\n"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -125,6 +434,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -133,14 +443,41 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "HP: p-value = 0.13791881412813622\n",
+ "Attack: p-value = 0.24050968418101457\n",
+ "Defense: p-value = 0.5407630349194362\n",
+ "Sp. Atk: p-value = 0.14119788176331508\n",
+ "Sp. Def: p-value = 0.16781226231606386\n",
+ "Speed: p-value = 0.00283569548125787\n",
+ "Total: p-value = 0.5599140649014442\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here\n"
+ "generation1 = pokemon_dataset[pokemon_dataset['Generation'] == 1]\n",
+ "generation2 = pokemon_dataset[pokemon_dataset['Generation'] == 2]\n",
+ "\n",
+ "features = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']\n",
+ "\n",
+ "results = {}\n",
+ "\n",
+ "for feature in features:\n",
+ " t_statistic, p_value = st.ttest_ind(generation1[feature], generation2[feature])\n",
+ " results[feature] = p_value\n",
+ "\n",
+ "for feature, p_value in results.items():\n",
+ " print(f\"{feature}: p-value = {p_value}\")"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -157,6 +494,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -165,14 +503,46 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 44,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "HP: p-value = 0.00449543553844474\n",
+ "Attack: p-value = 0.000370355609965253\n",
+ "Defense: p-value = 6.45113287201131e-09\n",
+ "Sp. Atk: p-value = 0.00039957189426682263\n",
+ "Sp. Def: p-value = 2.386335925903074e-05\n",
+ "Speed: p-value = 0.010305499382423\n",
+ "Total: p-value = 1.760520006413763e-07\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here\n"
+ "import scipy.stats as stats\n",
+ "import pandas as pd\n",
+ "\n",
+ "\n",
+ "single_type_pokemon = pokemon_dataset[pokemon_dataset['Type 2'].isnull()]\n",
+ "two_type_pokemon = pokemon_dataset[~pokemon_dataset['Type 2'].isnull()]\n",
+ "\n",
+ "features = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']\n",
+ "\n",
+ "results = {}\n",
+ "\n",
+ "for feature in features:\n",
+ " stat, p_value = stats.mannwhitneyu(single_type_pokemon[feature], two_type_pokemon[feature], alternative='two-sided')\n",
+ " results[feature] = p_value\n",
+ "\n",
+ "\n",
+ "for feature, p_value in results.items():\n",
+ " print(f\"{feature}: p-value = {p_value}\")\n"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -189,6 +559,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -199,14 +570,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 46,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Attack vs Defense:\n",
+ "T-Statistic: 4.325566393330478\n",
+ "P-Value: 1.7140303479358558e-05\n",
+ "\n",
+ "Sp. Atk vs Sp. Def:\n",
+ "T-Statistic: 0.853986188453353\n",
+ "P-Value: 0.3933685997548122\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here\n"
+ "attack = pokemon_dataset['Attack']\n",
+ "defense = pokemon_dataset['Defense']\n",
+ "sp_atk = pokemon_dataset['Sp. Atk']\n",
+ "sp_def = pokemon_dataset['Sp. Def']\n",
+ "\n",
+ "t_statistic1, p_value1 = st.ttest_rel(attack, defense)\n",
+ "t_statistic2, p_value2 = st.ttest_rel(sp_atk, sp_def)\n",
+ "\n",
+ "print(\"Attack vs Defense:\")\n",
+ "print(\"T-Statistic:\", t_statistic1)\n",
+ "print(\"P-Value:\", p_value1)\n",
+ "print(\"\\nSp. Atk vs Sp. Def:\")\n",
+ "print(\"T-Statistic:\", t_statistic2)\n",
+ "print(\"P-Value:\", p_value2)\n"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -239,7 +638,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.11.2"
}
},
"nbformat": 4,
diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb
index 1f0e335..b5f3b98 100755
--- a/your-code/challenge-2.ipynb
+++ b/your-code/challenge-2.ipynb
@@ -1,6 +1,7 @@
{
"cells": [
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,24 +18,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
- "# Import libraries\n",
+ "import scipy.stats as st\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
- "# Load the data:\n"
+ "pokemon_dataset = pd.read_csv(\"Pokemon.csv\")\n"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -48,6 +50,7 @@
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -58,17 +61,46 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 Grass\n",
+ "1 Fire\n",
+ "2 Water\n",
+ "3 Bug\n",
+ "4 Normal\n",
+ "5 Poison\n",
+ "6 Electric\n",
+ "7 Ground\n",
+ "8 Fairy\n",
+ "9 Fighting\n",
+ "10 Psychic\n",
+ "11 Rock\n",
+ "12 Ghost\n",
+ "13 Ice\n",
+ "14 Dragon\n",
+ "15 Dark\n",
+ "16 Steel\n",
+ "17 Flying\n",
+ "dtype: object\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here\n",
+ "type1_values = pokemon_dataset['Type 1'].unique()\n",
+ "type2_values = pokemon_dataset['Type 2'].unique()\n",
"\n",
+ "unique_types = pd.Series(pd.concat([pd.Series(type1_values), pd.Series(type2_values)]).dropna().unique())\n",
"\n",
- "len(unique_types) # you should see 19"
+ "print(unique_types)"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -85,18 +117,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "18\n"
+ ]
+ }
+ ],
"source": [
"pokemon_totals = []\n",
"\n",
- "# Your code here\n",
+ "for pokemon_type in unique_types:\n",
+ " if isinstance(pokemon_type, str): \n",
+ " type_total = pokemon_dataset.loc[(pokemon_dataset['Type 1'] == pokemon_type) | (pokemon_dataset['Type 2'] == pokemon_type), 'Total']\n",
+ " pokemon_totals.append(type_total.values)\n",
"\n",
- "len(pokemon_totals) # you should see 18"
+ "print(len(pokemon_totals))"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -111,14 +155,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "F-value: 6.6175382960055344\n",
+ "p-value: 2.6457458815984803e-15\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here\n"
+ "import scipy.stats as stats\n",
+ "\n",
+ "f_value, p_value = stats.f_oneway(*pokemon_totals)\n",
+ "\n",
+ "print(\"F-value:\", f_value)\n",
+ "print(\"p-value:\", p_value)\n"
]
},
{
+ "attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -151,7 +210,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.11.2"
}
},
"nbformat": 4,