From 89e8d1fb04a08c661bdb1eb451996c28b406b0af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20S=C3=A1nchez?= Date: Thu, 8 Jun 2023 15:35:09 +0100 Subject: [PATCH] [lab-hypothesis-testing-2] Javier Sanchez Camacho --- your-code/challenge-1.ipynb | 254 ++++++++++++++++++++++++++++++++---- your-code/challenge-2.ipynb | 201 +++++++++++++++++++++++++--- 2 files changed, 407 insertions(+), 48 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c1bb43d..f2fa2c1 100755 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -38,11 +38,119 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 80 80 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 100 100 \n", + "\n", + " Speed Generation Legendary \n", + "0 45 1 False \n", + "1 60 1 False \n", + "2 80 1 False " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "pokemon = pd.read_csv('Pokemon.csv')\n", + "pokemon.head(3)\n" ] }, { @@ -58,10 +166,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "from scipy import stats\n", + "\n", "def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n", " \"\"\"Test means of a feature set of two samples\n", " \n", @@ -73,11 +194,11 @@ " Returns:\n", " dict: a dictionary of t-test scores for each feature where the feature name is the key and the p-value is the value\n", " \"\"\"\n", - " results = {}\n", - "\n", - " # Your code here\n", + " results = {k:stats.ttest_ind(s1[k], s2[k])[1] for k in features}\n", " \n", - " return results" + " return results\n", + "\n", + "t_test_features" ] }, { @@ -101,11 +222,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 3.330647684846191e-15,\n", + " 'Attack': 7.827253003205333e-24,\n", + " 'Defense': 1.5842226094427255e-12,\n", + " 'Sp. Atk': 6.314915770427266e-41,\n", + " 'Sp. Def': 1.8439809580409594e-26,\n", + " 'Speed': 2.3540754436898437e-21,\n", + " 'Total': 3.0952457469652825e-52}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "legendary = t_test_features(pokemon[pokemon['Legendary'] == True], pokemon[pokemon['Legendary'] == False])\n", + "legendary\n" ] }, { @@ -121,7 +260,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# Yeah, specially on attack and sp. atk" ] }, { @@ -133,11 +272,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 0.13791881412813622,\n", + " 'Attack': 0.24050968418101457,\n", + " 'Defense': 0.5407630349194362,\n", + " 'Sp. Atk': 0.14119788176331508,\n", + " 'Sp. Def': 0.16781226231606386,\n", + " 'Speed': 0.0028356954812578704,\n", + " 'Total': 0.5599140649014442}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "generation = t_test_features(pokemon[pokemon['Generation'] == 1], pokemon[pokemon['Generation'] == 2])\n", + "generation\n" ] }, { @@ -153,7 +310,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# It seems the values are pretty stable and homogeneous,\n", + " # maybe speed is the only outlier" ] }, { @@ -165,11 +323,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 0.11060643144431842,\n", + " 'Attack': 0.00015741395666164396,\n", + " 'Defense': 3.250594205757004e-08,\n", + " 'Sp. Atk': 0.0001454917404035147,\n", + " 'Sp. Def': 0.00010893304795534396,\n", + " 'Speed': 0.024051410794037463,\n", + " 'Total': 1.1749035008828752e-07}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "types = t_test_features(pokemon[pokemon['Type 2'].isnull() == False], pokemon[pokemon['Type 2'].isnull() == True])\n", + "types\n" ] }, { @@ -185,7 +361,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# We see that pokemon with 2 types have higher defense" ] }, { @@ -199,11 +375,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=4.325566393330478, pvalue=1.7140303479358558e-05, df=799)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TtestResult(statistic=0.853986188453353, pvalue=0.3933685997548122, df=799)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# Your code here\n" + "# if it's all pokemons it's the same population, types, legendaries, etc.\n", + "att_def = stats.ttest_rel(pokemon['Attack'], pokemon['Defense'])\n", + "sp_att_def = stats.ttest_rel(pokemon['Sp. Atk'], pokemon['Sp. Def'])\n", + "display(att_def)\n", + "display(sp_att_def)" ] }, { @@ -219,7 +418,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "\"\"\" Because of the low values of sp_att_sp_df we cannot reject the null hypothesis that the values are different,\n", + "but because of the higher values of att_def, we can reject it\"\"\"" ] } ], @@ -239,7 +439,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 1f0e335..28e52e0 100755 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -17,21 +17,129 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 80 80 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 100 100 \n", + "\n", + " Speed Generation Legendary \n", + "0 45 1 False \n", + "1 60 1 False \n", + "2 80 1 False " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Load the data:\n" + "pokemon = pd.read_csv('Pokemon.csv')\n", + "pokemon.head(3)\n" ] }, { @@ -58,13 +166,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',\n", + " 'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',\n", + " 'Dragon', 'Dark', 'Steel', 'Flying', nan], dtype=object)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unique_types = pd.concat([pokemon['Type 1'], pokemon['Type 2']]).unique()\n", + "unique_types" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n", - "\n", - "\n", "len(unique_types) # you should see 19" ] }, @@ -85,13 +224,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "pokemon_totals = []\n", - "\n", - "# Your code here\n", + "pokemon_totals = [pokemon.loc[pokemon['Type 1'] == x, 'Total'] for x in unique_types if pd.isna(x) == False]\n", "\n", "len(pokemon_totals) # you should see 18" ] @@ -111,11 +259,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "F_onewayResult(statistic=4.63876748166055, pvalue=2.077215448842098e-09)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "stats.f_oneway(*pokemon_totals)\n" ] }, { @@ -131,7 +290,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# I would say that yes, because the variance is 4, and the pvalue is not very high" ] } ], @@ -151,7 +310,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,