diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c1bb43d..39d93b5 100755 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -19,12 +19,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "import scipy.stats as st" ] }, { @@ -38,11 +39,155 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "df = pd.read_csv('Pokemon.csv')\n", + "df.head()" ] }, { @@ -58,11 +203,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n", + "\n", " \"\"\"Test means of a feature set of two samples\n", " \n", " Args:\n", @@ -76,7 +222,9 @@ " results = {}\n", "\n", " # Your code here\n", - " \n", + " for feature in features:\n", + " t_stat, p_value = st.ttest_ind(s1[feature], s2[feature], equal_var=False)\n", + " results[feature] = {'t_statistic': t_stat, 'p_value': p_value}\n", " return results" ] }, @@ -101,11 +249,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': {'t_statistic': 8.981370483625046, 'p_value': 1.0026911708035284e-13}, 'Attack': {'t_statistic': 10.438133539322203, 'p_value': 2.520372449236646e-16}, 'Defense': {'t_statistic': 7.637078164784618, 'p_value': 4.826998494919331e-11}, 'Sp. Atk': {'t_statistic': 13.417449984138461, 'p_value': 1.5514614112239816e-21}, 'Sp. Def': {'t_statistic': 10.015696613114878, 'p_value': 2.2949327864052826e-15}, 'Speed': {'t_statistic': 11.47504444631443, 'p_value': 1.0490163118824507e-18}, 'Total': {'t_statistic': 25.8335743895517, 'p_value': 9.357954335957444e-47}}\n" + ] + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "legendary = df[df['Legendary'] == True]\n", + "non_legendary = df[df['Legendary'] == False]\n", + "\n", + "t_test_results = t_test_features(legendary, non_legendary, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n", + "print(t_test_results)\n" ] }, { @@ -121,7 +282,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# the p-value is less than 0.05, at confidence level of 95% we therefore reject the null hypothesis" ] }, { @@ -133,11 +294,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': {'t_statistic': -1.4609700002846653, 'p_value': 0.14551697834219626}, 'Attack': {'t_statistic': 1.1603052805533747, 'p_value': 0.24721958967217725}, 'Defense': {'t_statistic': -0.5724173235153119, 'p_value': 0.5677711011725426}, 'Sp. Atk': {'t_statistic': 1.54608675231508, 'p_value': 0.12332165977104388}, 'Sp. Def': {'t_statistic': -1.3203746053318755, 'p_value': 0.18829872292645752}, 'Speed': {'t_statistic': 3.069594374071931, 'p_value': 0.00239265937312135}, 'Total': {'t_statistic': 0.579073329450271, 'p_value': 0.5631377907941676}}\n" + ] + } + ], "source": [ - "# Your code here\n" + "generation_1 = df[df['Generation'] == 1]\n", + "generation_2 = df[df['Generation'] == 2]\n", + "\n", + "t_test_results = t_test_features(generation_1, generation_2, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n", + "print(t_test_results)\n" ] }, { @@ -153,7 +326,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# the p-value is higher that 0.05, therefore, at a confidence level of 95% we fail to reject the null value" ] }, { @@ -165,11 +338,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'HP': {'t_statistic': -1.586088850338319, 'p_value': 0.11314389855379413}, 'Attack': {'t_statistic': -3.810556219950897, 'p_value': 0.00014932578145948305}, 'Defense': {'t_statistic': -5.60979416640793, 'p_value': 2.7978540411514693e-08}, 'Sp. Atk': {'t_statistic': -3.828976815384819, 'p_value': 0.00013876216585667907}, 'Sp. Def': {'t_statistic': -3.892991138685155, 'p_value': 0.00010730610934512779}, 'Speed': {'t_statistic': -2.258014040079978, 'p_value': 0.02421703281819093}, 'Total': {'t_statistic': -5.355678438759113, 'p_value': 1.1157056505229964e-07}}\n" + ] + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "df['Type Count'] = (df['Type 2'].notnull()).astype(int)\n", + "\n", + "single_type = df[df['Type Count'] == 0] \n", + "two_types = df[df['Type Count'] == 1] \n", + "\n", + "t_test_results = t_test_features(single_type, two_types, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n", + "print(t_test_results)\n" ] }, { @@ -185,7 +373,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# Fail to reject the null hypothesis for 'HP.'\n", + "# Reject the null hypothesis for 'Attack,' 'Defense,' 'Sp. Atk,' 'Sp. Def,' 'Speed,' and 'Total.'" ] }, { @@ -199,11 +388,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=4.325566393330478, pvalue=1.7140303479358558e-05, df=799)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.stats import ttest_rel\n", + "#H0: There is no significant difference between the Attack and Defense.\n", + "#H1: There is a significant difference between the Attack and Defense.\n", + "\n", + "ttest_rel(df[\"Attack\"], df[\"Defense\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=0.853986188453353, pvalue=0.3933685997548122, df=799)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "#H0: There is no significant difference between the Sp. Atk and Sp. Def.\n", + "#H1: There is a significant difference between the Sp. Atk and Sp. Def.\n", + "ttest_rel(df[\"Sp. Atk\"], df[\"Sp. Def\"])" ] }, { @@ -219,7 +445,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# Reject the null hypothesis for the `Attack` vs `Defense` test, indicating a significant difference.\n", + "# Fail to reject the null hypothesis for `Sp. Atk` vs `Sp. Def` test, suggesting no significant difference." ] } ], @@ -239,7 +466,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 1f0e335..024c51b 100755 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -17,21 +17,277 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "from scipy.stats import f_oneway" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
..........................................
795719DiancieRockFairy60050100150100150506True
796719DiancieMega DiancieRockFairy700501601101601101106True
797720HoopaHoopa ConfinedPsychicGhost6008011060150130706True
798720HoopaHoopa UnboundPsychicDark6808016060170130806True
799721VolcanionFireWater6008011012013090706True
\n", + "

800 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + ".. ... ... ... ... ... .. ... ... \n", + "795 719 Diancie Rock Fairy 600 50 100 150 \n", + "796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 \n", + "797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 \n", + "798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 \n", + "799 721 Volcanion Fire Water 600 80 110 120 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False \n", + ".. ... ... ... ... ... \n", + "795 100 150 50 6 True \n", + "796 160 110 110 6 True \n", + "797 150 130 70 6 True \n", + "798 170 130 80 6 True \n", + "799 130 90 70 6 True \n", + "\n", + "[800 rows x 13 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Load the data:\n" + "# Load the data:\n", + "df = pd.read_csv('Pokemon.csv')\n", + "df\n" ] }, { @@ -58,12 +314,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Grass' 'Fire' 'Water' 'Bug' 'Normal' 'Poison' 'Electric' 'Ground'\n", + " 'Fairy' 'Fighting' 'Psychic' 'Rock' 'Ghost' 'Ice' 'Dragon' 'Dark' 'Steel'\n", + " 'Flying' nan]\n" + ] + }, + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here\n", + "unique_types = pd.concat([df['Type 1'], df['Type 2']]).unique()\n", "\n", + "print(unique_types)\n", "\n", "len(unique_types) # you should see 19" ] @@ -85,13 +363,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pokemon_totals = []\n", "\n", - "# Your code here\n", + "melted_df = pd.melt(df, id_vars='Total', value_vars=['Type 1', 'Type 2'], var_name='Type')\n", + "\n", + "pokemon_totals = melted_df.dropna().groupby('value')['Total'].sum().tolist()\n", "\n", "len(pokemon_totals) # you should see 18" ] @@ -111,11 +402,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'int' object has no attribute 'flatten'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[9], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m pokemon_totals \u001b[38;5;241m=\u001b[39m [arr\u001b[38;5;241m.\u001b[39mflatten() \u001b[38;5;28;01mfor\u001b[39;00m arr \u001b[38;5;129;01min\u001b[39;00m pokemon_totals]\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Now, perform the one-way ANOVA\u001b[39;00m\n\u001b[0;32m 4\u001b[0m anova_result \u001b[38;5;241m=\u001b[39m f_oneway(\u001b[38;5;241m*\u001b[39mpokemon_totals)\n", + "Cell \u001b[1;32mIn[9], line 1\u001b[0m, in \u001b[0;36m\u001b[1;34m(.0)\u001b[0m\n\u001b[1;32m----> 1\u001b[0m pokemon_totals \u001b[38;5;241m=\u001b[39m [arr\u001b[38;5;241m.\u001b[39mflatten() \u001b[38;5;28;01mfor\u001b[39;00m arr \u001b[38;5;129;01min\u001b[39;00m pokemon_totals]\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Now, perform the one-way ANOVA\u001b[39;00m\n\u001b[0;32m 4\u001b[0m anova_result \u001b[38;5;241m=\u001b[39m f_oneway(\u001b[38;5;241m*\u001b[39mpokemon_totals)\n", + "\u001b[1;31mAttributeError\u001b[0m: 'int' object has no attribute 'flatten'" + ] + } + ], "source": [ - "# Your code here\n" + "\n", + "pokemon_totals = [arr.flatten() for arr in pokemon_totals]\n", + "\n", + "# Now, perform the one-way ANOVA\n", + "anova_result = f_oneway(*pokemon_totals)\n", + "print(anova_result)\n", + "\n", + "#cannot overcome this error message" ] }, { @@ -131,7 +442,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# the code is not working" ] } ], @@ -151,7 +462,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.5" } }, "nbformat": 4,