From a43690589989ec29427ab711b9bef1d4f2f4bd93 Mon Sep 17 00:00:00 2001 From: HenrikSoeder Date: Wed, 22 Nov 2023 14:02:05 +0000 Subject: [PATCH] Lab Done --- your-code/challenge-1.ipynb | 275 +++++++++++++++++++++--- your-code/challenge-2.ipynb | 407 ++++++++++++++++++++++++++++++++++-- 2 files changed, 633 insertions(+), 49 deletions(-) diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c1bb43d..3a4db5a 100755 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -19,12 +19,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.stats as st" ] }, { @@ -38,11 +39,138 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "pokemon = pd.read_csv(\"Pokemon.csv\")\n", + "pokemon.head(4)" ] }, { @@ -58,11 +186,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n", + " \n", " \"\"\"Test means of a feature set of two samples\n", " \n", " Args:\n", @@ -75,7 +204,19 @@ " \"\"\"\n", " results = {}\n", "\n", - " # Your code here\n", + " for x in features:\n", + " \n", + " sample_1 = s1[x]\n", + " sample_2 = s2[x]\n", + "\n", + " # H0 = sample 1 mu = sample2 mu\n", + " # H1 = sample 1 mu != sample2 mu\n", + " output = st.ttest_ind(sample_1, sample_2, equal_var = False)\n", + " p_value = output[1]\n", + "\n", + "\n", + " results[x] = p_value\n", + "\n", " \n", " return results" ] @@ -101,11 +242,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 1.0026911708035284e-13,\n", + " 'Attack': 2.5203724492366553e-16,\n", + " 'Defense': 4.8269984949193316e-11,\n", + " 'Sp. Atk': 1.5514614112239705e-21,\n", + " 'Sp. Def': 2.294932786405291e-15,\n", + " 'Speed': 1.0490163118824585e-18,\n", + " 'Total': 9.357954335957446e-47}" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "df1 = pokemon[pokemon[\"Legendary\"] == True]\n", + "df2 = pokemon[pokemon[\"Legendary\"] == False]\n", + "\n", + "t_test_features(df1, df2)" ] }, { @@ -121,7 +282,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# since the hypothesis was that they have the same mean values, and the hypothesis was rejected,\n", + "# the conclusion can be made that they have different values" ] }, { @@ -133,11 +295,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 0.14551697834219632,\n", + " 'Attack': 0.24721958967217725,\n", + " 'Defense': 0.5677711011725426,\n", + " 'Sp. Atk': 0.12332165977104392,\n", + " 'Sp. Def': 0.18829872292645758,\n", + " 'Speed': 0.0023926593731213508,\n", + " 'Total': 0.5631377907941676}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "df1 = pokemon[pokemon[\"Generation\"] == 1]\n", + "df2 = pokemon[pokemon[\"Generation\"] == 2]\n", + "\n", + "t_test_features(df1, df2)\n" ] }, { @@ -153,7 +335,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# the conclusion can be made, that we can NOT reject the hypothesis that they differ from each other\n", + "# therefore they might have similiar values, but we DONT KNOW" ] }, { @@ -165,11 +348,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 0.11314389855379418,\n", + " 'Attack': 0.00014932578145948305,\n", + " 'Defense': 2.797854041151469e-08,\n", + " 'Sp. Atk': 0.00013876216585667901,\n", + " 'Sp. Def': 0.00010730610934512777,\n", + " 'Speed': 0.024217032818190928,\n", + " 'Total': 1.1157056505229961e-07}" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "one_type = pokemon[pokemon[\"Type 2\"].isnull() == True]\n", + "two_type = pokemon[pokemon[\"Type 2\"].isnull() == False]\n", + "\n", + "df1 = one_type\n", + "df2 = two_type\n", + "\n", + "t_test_features(df1, df2)" ] }, { @@ -185,7 +391,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# we can obtain from the \"total\" column (where the hypothesis got rejected),\n", + "# that they indeed have different values" ] }, { @@ -199,11 +406,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p_value for attack and defense: 0.0012123980547317907\n", + "p_value for Sp. attack and Sp. defense: 0.5458436328840434\n" + ] + } + ], "source": [ - "# Your code here\n" + "attack_defense = st.f_oneway(pokemon[\"Attack\"], pokemon[\"Defense\"])\n", + "print(\"p_value for attack and defense: \",attack_defense[1])\n", + "\n", + "spatt_spdef = st.f_oneway(pokemon[\"Sp. Atk\"], pokemon[\"Sp. Def\"])\n", + "print(\"p_value for Sp. attack and Sp. defense: \",spatt_spdef[1])" ] }, { @@ -219,13 +439,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# there are significant differences between \"Attack\" and \"Defense\"\n", + "# we can not say if there are differences between \"Sp. Attack\" and \"Sp. defense\"" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -239,7 +460,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 1f0e335..19cc59e 100755 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -17,21 +17,317 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ - "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "from scipy.stats import stats\n", + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Load the data:\n" + "pokemon = pd.read_csv(\"Pokemon.csv\")\n", + "pokemon.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"Pokemon.csv\")\n", + "df.head()" ] }, { @@ -58,14 +354,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n", + "#getting the types \n", + "type_1 = list(pokemon[\"Type 1\"].unique())\n", + "type_2 = list(pokemon[\"Type 2\"].unique())\n", + "\n", + "#concatinating and not considering duplicates\n", + "unique_types = list(set(type_1 + type_2))\n", "\n", "\n", - "len(unique_types) # you should see 19" + "len(unique_types) " ] }, { @@ -85,15 +397,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 65, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pokemon_totals = []\n", "\n", - "# Your code here\n", + "for x in unique_types:\n", + " if type(x) == str:\n", + " \n", + " try: \n", + " total = pokemon[\"Total\"][pokemon[\"Type 1\"] == x]\n", + " pokemon_totals.append(total)\n", + " except: \n", + " total = pokemon[\"Total\"][pokemon[\"Type 2\"] == x]\n", + " pokemon_totals.append(total)\n", "\n", - "len(pokemon_totals) # you should see 18" + "len(pokemon_totals)" ] }, { @@ -111,11 +444,41 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 72, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/bk/02hh56sx1b7622qtqhym98tc0000gn/T/ipykernel_45328/4063958140.py:1: DeprecationWarning: Please use `f_oneway` from the `scipy.stats` namespace, the `scipy.stats.stats` namespace is deprecated.\n", + " output = stats.f_oneway(*pokemon_totals)\n" + ] + } + ], + "source": [ + "output = stats.f_oneway(*pokemon_totals)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the p value is: 2.077215448842098e-09\n" + ] + } + ], "source": [ - "# Your code here\n" + "print(\"the p value is:\",output[1])" ] }, { @@ -131,13 +494,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# it can be said that there is a significant difference, since the 0 hypothesis was that they are even" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -151,7 +514,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.4" } }, "nbformat": 4,