From 8d0f6c4e78a8a5133e8406ac149fb649916f74f7 Mon Sep 17 00:00:00 2001 From: leticiademarchiferreira <127995157+leticiademarchiferreira@users.noreply.github.com> Date: Mon, 22 May 2023 22:36:50 +0200 Subject: [PATCH] C:\Users\Leticia Demarchi\Documents\Ironhack Lisbon April\LABS\5 Week\lab-hypothesis-testing-2\your-code --- your-code/challenge-1.ipynb | 437 ++++++++++++++++++++++++++++++++++-- your-code/challenge-2.ipynb | 91 ++++++-- 2 files changed, 493 insertions(+), 35 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c1bb43d..185826e 100755 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -19,15 +20,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ - "# Import libraries\n", - "import pandas as pd" + "import pandas as pd \n", + "import numpy as np\n", + "import scipy.stats as st \n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import statistics as stats" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -38,14 +44,269 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
..........................................
795719DiancieRockFairy60050100150100150506True
796719DiancieMega DiancieRockFairy700501601101601101106True
797720HoopaHoopa ConfinedPsychicGhost6008011060150130706True
798720HoopaHoopa UnboundPsychicDark6808016060170130806True
799721VolcanionFireWater6008011012013090706True
\n", + "

800 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \\\n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + ".. ... ... ... ... ... .. ... ... \n", + "795 719 Diancie Rock Fairy 600 50 100 150 \n", + "796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 \n", + "797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 \n", + "798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 \n", + "799 721 Volcanion Fire Water 600 80 110 120 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False \n", + ".. ... ... ... ... ... \n", + "795 100 150 50 6 True \n", + "796 160 110 110 6 True \n", + "797 150 130 70 6 True \n", + "798 170 130 80 6 True \n", + "799 130 90 70 6 True \n", + "\n", + "[800 rows x 13 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "pokemon_dataset = pd.read_csv(\"Pokemon.csv\")\n", + "pokemon_dataset" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -58,7 +319,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -75,12 +336,33 @@ " \"\"\"\n", " results = {}\n", "\n", - " # Your code here\n", " \n", " return results" ] }, { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'c1_sample_f' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[37], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m st\u001b[39m.\u001b[39mttest_1samp(c1_sample_f,alternative \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mgreater\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[1;31mNameError\u001b[0m: name 'c1_sample_f' is not defined" + ] + } + ], + "source": [ + "st.ttest_1samp(c1_sample_f,alternative = \"greater\")" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -105,10 +387,37 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here\n" + "# 2. Choose significance\n", + "\n", + "alpha = 0.05\n", + "\n", + "\n", + "# 3. Sample\n", + "\n", + "c3_sample = data[data['Legendary'] == False].sample(30)\n", + "c3_sample\n", + "\n", + "c3_sample_f = c3_sample['Fare']\n", + "c3_sample_f\n", + "\n", + "\n", + "# 4. Compute statistic\n", + "\n", + "#(X - mu)/ (s/np.sqrt(n))\n", + "\n", + "mean = c3_sample_f.mean()\n", + "std = c3_sample_f.std(ddof=1)\n", + "#display(mean)\n", + "#display(std)\n", + "\n", + "stat = (mean - 17)/ (std/ np.sqrt(30))\n", + "stat\n", + "\n", + "\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -125,6 +434,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -133,14 +443,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HP: p-value = 0.13791881412813622\n", + "Attack: p-value = 0.24050968418101457\n", + "Defense: p-value = 0.5407630349194362\n", + "Sp. Atk: p-value = 0.14119788176331508\n", + "Sp. Def: p-value = 0.16781226231606386\n", + "Speed: p-value = 0.00283569548125787\n", + "Total: p-value = 0.5599140649014442\n" + ] + } + ], "source": [ - "# Your code here\n" + "generation1 = pokemon_dataset[pokemon_dataset['Generation'] == 1]\n", + "generation2 = pokemon_dataset[pokemon_dataset['Generation'] == 2]\n", + "\n", + "features = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']\n", + "\n", + "results = {}\n", + "\n", + "for feature in features:\n", + " t_statistic, p_value = st.ttest_ind(generation1[feature], generation2[feature])\n", + " results[feature] = p_value\n", + "\n", + "for feature, p_value in results.items():\n", + " print(f\"{feature}: p-value = {p_value}\")" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -157,6 +494,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -165,14 +503,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HP: p-value = 0.00449543553844474\n", + "Attack: p-value = 0.000370355609965253\n", + "Defense: p-value = 6.45113287201131e-09\n", + "Sp. Atk: p-value = 0.00039957189426682263\n", + "Sp. Def: p-value = 2.386335925903074e-05\n", + "Speed: p-value = 0.010305499382423\n", + "Total: p-value = 1.760520006413763e-07\n" + ] + } + ], "source": [ - "# Your code here\n" + "import scipy.stats as stats\n", + "import pandas as pd\n", + "\n", + "\n", + "single_type_pokemon = pokemon_dataset[pokemon_dataset['Type 2'].isnull()]\n", + "two_type_pokemon = pokemon_dataset[~pokemon_dataset['Type 2'].isnull()]\n", + "\n", + "features = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']\n", + "\n", + "results = {}\n", + "\n", + "for feature in features:\n", + " stat, p_value = stats.mannwhitneyu(single_type_pokemon[feature], two_type_pokemon[feature], alternative='two-sided')\n", + " results[feature] = p_value\n", + "\n", + "\n", + "for feature, p_value in results.items():\n", + " print(f\"{feature}: p-value = {p_value}\")\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -189,6 +559,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -199,14 +570,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attack vs Defense:\n", + "T-Statistic: 4.325566393330478\n", + "P-Value: 1.7140303479358558e-05\n", + "\n", + "Sp. Atk vs Sp. Def:\n", + "T-Statistic: 0.853986188453353\n", + "P-Value: 0.3933685997548122\n" + ] + } + ], "source": [ - "# Your code here\n" + "attack = pokemon_dataset['Attack']\n", + "defense = pokemon_dataset['Defense']\n", + "sp_atk = pokemon_dataset['Sp. Atk']\n", + "sp_def = pokemon_dataset['Sp. Def']\n", + "\n", + "t_statistic1, p_value1 = st.ttest_rel(attack, defense)\n", + "t_statistic2, p_value2 = st.ttest_rel(sp_atk, sp_def)\n", + "\n", + "print(\"Attack vs Defense:\")\n", + "print(\"T-Statistic:\", t_statistic1)\n", + "print(\"P-Value:\", p_value1)\n", + "print(\"\\nSp. Atk vs Sp. Def:\")\n", + "print(\"T-Statistic:\", t_statistic2)\n", + "print(\"P-Value:\", p_value2)\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -239,7 +638,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.2" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 1f0e335..b5f3b98 100755 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -17,24 +18,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "# Import libraries\n", + "import scipy.stats as st\n", "import pandas as pd" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# Load the data:\n" + "pokemon_dataset = pd.read_csv(\"Pokemon.csv\")\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -48,6 +50,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -58,17 +61,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 Grass\n", + "1 Fire\n", + "2 Water\n", + "3 Bug\n", + "4 Normal\n", + "5 Poison\n", + "6 Electric\n", + "7 Ground\n", + "8 Fairy\n", + "9 Fighting\n", + "10 Psychic\n", + "11 Rock\n", + "12 Ghost\n", + "13 Ice\n", + "14 Dragon\n", + "15 Dark\n", + "16 Steel\n", + "17 Flying\n", + "dtype: object\n" + ] + } + ], "source": [ - "# Your code here\n", + "type1_values = pokemon_dataset['Type 1'].unique()\n", + "type2_values = pokemon_dataset['Type 2'].unique()\n", "\n", + "unique_types = pd.Series(pd.concat([pd.Series(type1_values), pd.Series(type2_values)]).dropna().unique())\n", "\n", - "len(unique_types) # you should see 19" + "print(unique_types)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -85,18 +117,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "18\n" + ] + } + ], "source": [ "pokemon_totals = []\n", "\n", - "# Your code here\n", + "for pokemon_type in unique_types:\n", + " if isinstance(pokemon_type, str): \n", + " type_total = pokemon_dataset.loc[(pokemon_dataset['Type 1'] == pokemon_type) | (pokemon_dataset['Type 2'] == pokemon_type), 'Total']\n", + " pokemon_totals.append(type_total.values)\n", "\n", - "len(pokemon_totals) # you should see 18" + "print(len(pokemon_totals))" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -111,14 +155,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F-value: 6.6175382960055344\n", + "p-value: 2.6457458815984803e-15\n" + ] + } + ], "source": [ - "# Your code here\n" + "import scipy.stats as stats\n", + "\n", + "f_value, p_value = stats.f_oneway(*pokemon_totals)\n", + "\n", + "print(\"F-value:\", f_value)\n", + "print(\"p-value:\", p_value)\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -151,7 +210,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.2" } }, "nbformat": 4,