From a078d38d81e5332c33b114814e95fdc95add43d8 Mon Sep 17 00:00:00 2001 From: Dulce-04 <136611956+Dulce-04@users.noreply.github.com> Date: Fri, 25 Aug 2023 19:12:12 +0100 Subject: [PATCH] lab-hypothesis-testing-2 --- your-code/challenge-1.ipynb | 433 +++++++++++++++++++++++++++++++++--- your-code/challenge-2.ipynb | 331 +++++++++++++++++++++++++-- 2 files changed, 718 insertions(+), 46 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c1bb43d..0bd87ea 100755 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -19,12 +19,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "import numpy as np\n", + "import scipy as stats\n", + "from scipy.stats import ttest_rel\n", + "from scipy.stats import ttest_ind\n" ] }, { @@ -38,11 +42,303 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
..........................................
795719DiancieRockFairy60050100150100150506True
796719DiancieMega DiancieRockFairy700501601101601101106True
797720HoopaHoopa ConfinedPsychicGhost6008011060150130706True
798720HoopaHoopa UnboundPsychicDark6808016060170130806True
799721VolcanionFireWater6008011012013090706True
\n", + "

800 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + ".. ... ... ... ... ... .. ... ... \n", + "795 719 Diancie Rock Fairy 600 50 100 150 \n", + "796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 \n", + "797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 \n", + "798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 \n", + "799 721 Volcanion Fire Water 600 80 110 120 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False \n", + ".. ... ... ... ... ... \n", + "795 100 150 50 6 True \n", + "796 160 110 110 6 True \n", + "797 150 130 70 6 True \n", + "798 170 130 80 6 True \n", + "799 130 90 70 6 True \n", + "\n", + "[800 rows x 13 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon = pd.read_csv(r\"C:\\Users\\dulce\\OneDrive\\Documentos\\Ironhack git\\Ironhack\\Labs\\Labs week 5\\lab-hypothesis-testing-2\\your-code\\Pokemon.csv\")\n", + "pokemon" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 800 entries, 0 to 799\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 # 800 non-null int64 \n", + " 1 Name 800 non-null object\n", + " 2 Type 1 800 non-null object\n", + " 3 Type 2 414 non-null object\n", + " 4 Total 800 non-null int64 \n", + " 5 HP 800 non-null int64 \n", + " 6 Attack 800 non-null int64 \n", + " 7 Defense 800 non-null int64 \n", + " 8 Sp. Atk 800 non-null int64 \n", + " 9 Sp. Def 800 non-null int64 \n", + " 10 Speed 800 non-null int64 \n", + " 11 Generation 800 non-null int64 \n", + " 12 Legendary 800 non-null bool \n", + "dtypes: bool(1), int64(9), object(3)\n", + "memory usage: 75.9+ KB\n" + ] + } + ], "source": [ - "# Your code here:\n" + "pokemon.columns\n", + "\n", + "pokemon.info()" ] }, { @@ -58,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -72,12 +368,7 @@ " \n", " Returns:\n", " dict: a dictionary of t-test scores for each feature where the feature name is the key and the p-value is the value\n", - " \"\"\"\n", - " results = {}\n", - "\n", - " # Your code here\n", - " \n", - " return results" + " \"\"\"" ] }, { @@ -101,11 +392,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': 3.330647684846191e-15, 'Attack': 7.827253003205333e-24, 'Defense': 1.5842226094427255e-12, 'Sp. Atk': 6.314915770427266e-41, 'Sp. Def': 1.8439809580409594e-26, 'Speed': 2.3540754436898437e-21, 'Total': 3.0952457469652825e-52}\n" + ] + } + ], "source": [ - "# Your code here\n" + "import pandas as pd\n", + "import scipy.stats as stats\n", + "\n", + "def t_test_features(feature, s1, s2):\n", + " legendary = feature[feature[s2] == True][s1]\n", + " non_legendary = feature[feature[s2] == False][s1]\n", + " t_statistic, p_value = stats.ttest_ind(legendary, non_legendary)\n", + " return p_value\n", + "\n", + "\n", + "feature_columns = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']\n", + "t_test_results = {}\n", + "\n", + "for feature_column in feature_columns:\n", + " p_value = t_test_features(pokemon, feature_column, 'Legendary')\n", + " t_test_results[feature_column] = p_value\n", + "\n", + "print(t_test_results)\n" ] }, { @@ -121,7 +437,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# It is possible to conclude that Legendary and non-Legendary Pokémon have significantly \n", + "# different statistics for each feature. What suggests that there is a clear distinctions \n", + "# in stats between the two groups, with Legendary pokemon having higher \n", + "# stats compared to non-Legendary Pokémon." ] }, { @@ -133,11 +452,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': 0.13791881412813622, 'Attack': 0.24050968418101457, 'Defense': 0.5407630349194362, 'Sp. Atk': 0.14119788176331508, 'Sp. Def': 0.16781226231606386, 'Speed': 0.0028356954812578704, 'Total': 0.5599140649014442}\n" + ] + } + ], "source": [ - "# Your code here\n" + "feature_columns = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']\n", + "t_test_results = {}\n", + "\n", + "feature_columns = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']\n", + "t_test_results = {}\n", + "\n", + "for feature_column in feature_columns:\n", + " gen1 = pokemon[pokemon['Generation'] == 1][feature_column]\n", + " gen2 = pokemon[pokemon['Generation'] == 2][feature_column]\n", + " t_statistic, p_value = stats.ttest_ind(gen1, gen2)\n", + " t_test_results[feature_column] = p_value\n", + "\n", + "print(t_test_results)" ] }, { @@ -153,7 +492,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# Generation 1 and Generation 2 Pokémon do not differ much, there is a \n", + "# significant difference in the Speed of the Generations of the Pokemons." ] }, { @@ -165,11 +505,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': 0.11060643144431842, 'Attack': 0.00015741395666164396, 'Defense': 3.250594205757004e-08, 'Sp. Atk': 0.0001454917404035147, 'Sp. Def': 0.00010893304795534396, 'Speed': 0.024051410794037463, 'Total': 1.1749035008828752e-07}\n" + ] + } + ], "source": [ - "# Your code here\n" + "\n", + "\n" ] }, { @@ -199,11 +548,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attack vs Defense:\n", + "t-statistic: 4.325566393330478\n", + "p-value: 1.7140303479358558e-05\n", + "\n", + "Sp. Atk vs Sp. Def:\n", + "t-statistic: 0.853986188453353\n", + "p-value: 0.3933685997548122\n" + ] + } + ], "source": [ - "# Your code here\n" + "import scipy.stats as stats\n", + "\n", + "t_statistic_ad, p_value_ad = stats.ttest_rel(pokemon['Attack'], pokemon['Defense'])\n", + "\n", + "t_statistic_ss, p_value_ss = stats.ttest_rel(pokemon['Sp. Atk'], pokemon['Sp. Def'])\n", + "\n", + "print(\"Attack vs Defense:\")\n", + "print(f\"t-statistic: {t_statistic_ad}\")\n", + "print(f\"p-value: {p_value_ad}\")\n", + "\n", + "print(\"\\nSp. Atk vs Sp. Def:\")\n", + "print(f\"t-statistic: {t_statistic_ss}\")\n", + "print(f\"p-value: {p_value_ss}\")" ] }, { @@ -219,7 +594,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# There is no significant difference between the Special Attack and \n", + "# Special Defense stats of all Pokémon. The p-value is greater than 0.05, \n", + "# indicating that the difference is not statistically significant." ] } ], @@ -239,7 +616,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 1f0e335..b24049f 100755 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -17,21 +17,277 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.stats as st" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
..........................................
795719DiancieRockFairy60050100150100150506True
796719DiancieMega DiancieRockFairy700501601101601101106True
797720HoopaHoopa ConfinedPsychicGhost6008011060150130706True
798720HoopaHoopa UnboundPsychicDark6808016060170130806True
799721VolcanionFireWater6008011012013090706True
\n", + "

800 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + ".. ... ... ... ... ... .. ... ... \n", + "795 719 Diancie Rock Fairy 600 50 100 150 \n", + "796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 \n", + "797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 \n", + "798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 \n", + "799 721 Volcanion Fire Water 600 80 110 120 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False \n", + ".. ... ... ... ... ... \n", + "795 100 150 50 6 True \n", + "796 160 110 110 6 True \n", + "797 150 130 70 6 True \n", + "798 170 130 80 6 True \n", + "799 130 90 70 6 True \n", + "\n", + "[800 rows x 13 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Load the data:\n" + "pokemon = pd.read_csv(r\"C:\\Users\\dulce\\OneDrive\\Documentos\\Ironhack git\\Labs\\Labs week 5\\lab-hypothesis-testing-2\\your-code\\Pokemon.csv\")\n", + "pokemon" ] }, { @@ -58,14 +314,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n", + "combined_types = pd.concat([pokemon['Type 1'], pokemon['Type 2']])\n", "\n", + "unique_types = combined_types.unique()\n", "\n", - "len(unique_types) # you should see 19" + "len(unique_types)" ] }, { @@ -85,15 +353,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pokemon_totals = []\n", "\n", - "# Your code here\n", + "for pokemon_type in unique_types:\n", + " if isinstance(pokemon_type, str):\n", + " \n", + " type_total = pokemon[pokemon['Type 1'] == pokemon_type]['Total']\n", + " pokemon_totals.append(type_total.tolist())\n", "\n", - "len(pokemon_totals) # you should see 18" + "len(pokemon_totals)" ] }, { @@ -111,11 +394,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ANOVA p-value: 2.077215448842098e-09\n" + ] + } + ], "source": [ - "# Your code here\n" + "from scipy import stats\n", + "\n", + "anova_result = stats.f_oneway(*pokemon_totals)\n", + "\n", + "print(\"ANOVA p-value:\", anova_result.pvalue)" ] }, { @@ -131,7 +426,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# There are significant variations in the total values between the different groups of Pokemon types." ] } ], @@ -151,7 +446,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,