diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb index c1bb43d..6aa1f7a 100755 --- a/your-code/challenge-1.ipynb +++ b/your-code/challenge-1.ipynb @@ -19,12 +19,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "import numpy as np\n", + "from scipy.stats import ttest_1samp\n", + "import scipy.stats as st" ] }, { @@ -38,11 +41,155 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "pokemon = pd.read_csv(\"Pokemon.csv\")\n", + "pokemon.head()" ] }, { @@ -58,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -73,10 +220,13 @@ " Returns:\n", " dict: a dictionary of t-test scores for each feature where the feature name is the key and the p-value is the value\n", " \"\"\"\n", + " \n", " results = {}\n", "\n", - " # Your code here\n", - " \n", + " for feature in features:\n", + " stat, p_value = st.ttest_ind(s1[feature], s2[feature], equal_var = False)\n", + " results[feature] = p_value\n", + " \n", " return results" ] }, @@ -101,11 +251,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 1.0026911708035284e-13,\n", + " 'Attack': 2.520372449236646e-16,\n", + " 'Defense': 4.8269984949193316e-11,\n", + " 'Sp. Atk': 1.5514614112239812e-21,\n", + " 'Sp. Def': 2.2949327864052826e-15,\n", + " 'Speed': 1.049016311882451e-18,\n", + " 'Total': 9.357954335957446e-47}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "s1 = pokemon[pokemon[\"Legendary\"]==True] #legendary pokemons\n", + "s2 = pokemon[pokemon[\"Legendary\"]==False] #non legendary pokemons\n", + "\n", + "t_test_features(s1,s2)" ] }, { @@ -121,7 +292,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# Your comment here\n", + "# the results indicate that the means of the features are significantly different between the two groups of Pokémon. \n", + "# This can provide insights into the characteristics that tend to be more distinct between Legendary and non-Legendary Pokémon." ] }, { @@ -133,11 +306,150 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 80 80 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 100 100 \n", + "\n", + " Speed Generation Legendary \n", + "0 45 1 False \n", + "1 60 1 False \n", + "2 80 1 False " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "pokemon.head(3)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 0.14551697834219623,\n", + " 'Attack': 0.24721958967217725,\n", + " 'Defense': 0.5677711011725426,\n", + " 'Sp. Atk': 0.12332165977104388,\n", + " 'Sp. Def': 0.18829872292645752,\n", + " 'Speed': 0.00239265937312135,\n", + " 'Total': 0.5631377907941676}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Hypothesis\n", + "#H0: The mean of the features between Generation 1 and Generation 2 pokémons is equal.\n", + "#H1 : The mean of the features between Generation 1 and Generation 2 pokémons is different.\n", + "alpha = 0.05\n", + "s1 = pokemon[pokemon[\"Generation\"]==1] #generation 1 pokemons\n", + "s2 = pokemon[pokemon[\"Generation\"]==2] #generation 2 pokemons\n", + "t_test_features(s1,s2)" ] }, { @@ -153,7 +465,77 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "#the only feature that shows a statistically significant difference between Generation 1 and Generation 2 Pokémon is the Speed attribute. \n", + "# This suggests that Generation 1 and Generation 2 Pokémon have different average Speed values, while other attributes do not show significant differences between the two generations." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic (Attack vs Defense): 4.325566393330478\n", + "P-value (Attack vs Defense): 1.7140303479358558e-05\n", + "Reject the null hypothesis for Attack vs Defense\n", + "\n", + "T-statistic (Sp. Atk vs Sp. Def): 0.853986188453353\n", + "P-value (Sp. Atk vs Sp. Def): 0.3933685997548122\n", + "Do not reject the null hypothesis for Sp. Atk vs Sp. Def\n", + "\n", + "T-statistic (HP - Generation 1 vs 2): 0.14551697834219623\n", + "Do not reject the null hypothesis for HP - Generation 1 vs 2\n", + "\n", + "T-statistic (Attack - Generation 1 vs 2): 0.24721958967217725\n", + "Do not reject the null hypothesis for Attack - Generation 1 vs 2\n", + "\n", + "T-statistic (Defense - Generation 1 vs 2): 0.5677711011725426\n", + "Do not reject the null hypothesis for Defense - Generation 1 vs 2\n", + "\n", + "T-statistic (Sp. Atk - Generation 1 vs 2): 0.12332165977104388\n", + "Do not reject the null hypothesis for Sp. Atk - Generation 1 vs 2\n", + "\n", + "T-statistic (Sp. Def - Generation 1 vs 2): 0.18829872292645752\n", + "Do not reject the null hypothesis for Sp. Def - Generation 1 vs 2\n", + "\n", + "T-statistic (Speed - Generation 1 vs 2): 0.00239265937312135\n", + "Reject the null hypothesis for Speed - Generation 1 vs 2\n", + "\n", + "T-statistic (Total - Generation 1 vs 2): 0.5631377907941676\n", + "Do not reject the null hypothesis for Total - Generation 1 vs 2\n", + "\n" + ] + } + ], + "source": [ + "print(f\"T-statistic (Attack vs Defense): {t_statistic1}\")\n", + "print(f\"P-value (Attack vs Defense): {p_value1}\")\n", + "if p_value1 < alpha:\n", + " print(\"Reject the null hypothesis for Attack vs Defense\")\n", + "else:\n", + " print(\"Do not reject the null hypothesis for Attack vs Defense\")\n", + "print(\"\")\n", + "\n", + "print(f\"T-statistic (Sp. Atk vs Sp. Def): {t_statistic2}\")\n", + "print(f\"P-value (Sp. Atk vs Sp. Def): {p_value2}\")\n", + "if p_value2 < alpha:\n", + " print(\"Reject the null hypothesis for Sp. Atk vs Sp. Def\")\n", + "else:\n", + " print(\"Do not reject the null hypothesis for Sp. Atk vs Sp. Def\")\n", + "print(\"\")\n", + "\n", + "# Hypothesis for Generation 1 vs Generation 2\n", + "\n", + "for feature, p_value in results_generation.items():\n", + " print(f\"T-statistic ({feature} - Generation 1 vs 2): {p_value}\")\n", + " if p_value < alpha:\n", + " print(f\"Reject the null hypothesis for {feature} - Generation 1 vs 2\")\n", + " else:\n", + " print(f\"Do not reject the null hypothesis for {feature} - Generation 1 vs 2\")\n", + " print(\"\")" ] }, { @@ -165,11 +547,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'HP': 0.11314389855379414,\n", + " 'Attack': 0.00014932578145948305,\n", + " 'Defense': 2.7978540411514693e-08,\n", + " 'Sp. Atk': 0.00013876216585667907,\n", + " 'Sp. Def': 0.00010730610934512779,\n", + " 'Speed': 0.02421703281819093,\n", + " 'Total': 1.1157056505229961e-07}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "#I Choose Type 2 column with null values to distinguish between Pokemon with single type and those with two types\n", + "#If a Pokémon has only one type, the \"Type 2\" column will be null for it\n", + "#If a Pokémon has two types, the \"Type 2\" column will have a non-null value, indicating the Pokémon's second type\n", + "\n", + "\n", + "s1 = pokemon[pokemon[\"Type 2\"].isnull()] # Pokémon with single type\n", + "s2 = pokemon[pokemon[\"Type 2\"].notnull()] # Pokémon with two types\n", + "\n", + "t_test_results = t_test_features(s1, s2)\n", + "t_test_results" ] }, { @@ -185,7 +593,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "#he results indicate that several features (Attack, Defense, Sp. Atk, Sp. Def, and Total) show statistically significant differences between Pokémon with a single type and those with two types. The differences in Speed are also statistically significant but with a slightly lower significance level. \n", + "# HP, however, does not show a significant difference between the two groups.\n", + "\n" ] }, { @@ -199,18 +609,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results\n", + "\n", + "T-statistic (Attack vs Defense): 4.325566393330478\n", + "P-value (Attack vs Defense): 1.7140303479358558e-05\n", + "T-statistic (Sp. Atk vs Sp. Def): 0.853986188453353\n", + "P-value (Sp. Atk vs Sp. Def): 0.3933685997548122\n", + "\n", + "Reject the null hypothesis for Attack vs Defense\n", + "T-statistic (Sp. Atk vs Sp. Def): 0.853986188453353\n", + "P-value (Sp. Atk vs Sp. Def): 0.3933685997548122\n", + "Do not to reject the null hypothesis for Sp. Atk vs Sp. Def\n" + ] + } + ], "source": [ - "# Your code here\n" + "#Hypothesis\n", + "#Attack and Defense\n", + "#H0 = The mean difference between \"Attack\" and \"Defense\" = 0\n", + "#H1 = The mean difference between \"Attack\" and \"Defense\" !=0\n", + "\n", + "#Sp. Atk and Sp.Def\n", + "#H0 = The mean difference between \"Sp.Atk\" and \"Sp.Def\" = 0\n", + "#H1 = The mean difference between \"Sp.Atk\" and \"Sp.Def\" !=0\n", + "\n", + "alpha = 0.05\n", + "\n", + "#compute and testing Hypothesis\n", + "pokemon[\"Attack_Defense_Diff\"] = pokemon[\"Attack\"] - pokemon[\"Defense\"] #Difference between Attack and Defense\n", + "pokemon[\"Sp_Atk_Sp_Def_Diff\"] = pokemon[\"Sp. Atk\"] - pokemon[\"Sp. Def\"] #Difference between Sp. Atk and Sp.Def\n", + "t_statistic1, p_value1 = st.ttest_rel(pokemon[\"Attack_Defense_Diff\"], np.zeros(len(pokemon)))\n", + "t_statistic2, p_value2 = st.ttest_rel(pokemon[\"Sp_Atk_Sp_Def_Diff\"], np.zeros(len(pokemon)))\n", + "\n", + "print(\"Results\")\n", + "print(\"\")\n", + "print(f\"T-statistic (Attack vs Defense): {t_statistic1}\")\n", + "print(f\"P-value (Attack vs Defense): {p_value1}\")\n", + "print(f\"T-statistic (Sp. Atk vs Sp. Def): {t_statistic2}\")\n", + "print(f\"P-value (Sp. Atk vs Sp. Def): {p_value2}\")\n", + "\n", + "print(\"\")\n", + "if p_value1 < alpha:\n", + " print(\"Reject the null hypothesis for Attack vs Defense\")\n", + "else:\n", + " print(\"Do not to reject the null hypothesis for Attack vs Defense\")\n", + "\n", + "print(f\"T-statistic (Sp. Atk vs Sp. Def): {t_statistic2}\")\n", + "print(f\"P-value (Sp. Atk vs Sp. Def): {p_value2}\")\n", + "if p_value2 < alpha:\n", + " print(\"Reject the null hypothesis for Sp. Atk vs Sp. Def\")\n", + "else:\n", + " print(\"Do not to reject the null hypothesis for Sp. Atk vs Sp. Def\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### What conclusions can you make?" + "#### What conclusions can you make?\n" ] }, { @@ -219,7 +682,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# We can reject the null hypothesis for Attack vs Defense but not for Sp. Atk vs Sp. Def" ] } ], @@ -239,7 +702,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb index 1f0e335..baa3c36 100755 --- a/your-code/challenge-2.ipynb +++ b/your-code/challenge-2.ipynb @@ -17,21 +17,174 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", - "import pandas as pd" + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.stats as st\n", + "from scipy.stats import bernoulli\n", + "from scipy.stats import binom\n", + "from scipy.stats import geom\n", + "from scipy.stats import poisson\n", + "from scipy.stats import uniform\n", + "from scipy.stats import expon\n", + "from scipy.stats import norm" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Load the data:\n" + "# Load the data:\n", + "pokemon = pd.read_csv(\"Pokemon.csv\")\n", + "pokemon.head()" ] }, { @@ -58,12 +211,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here\n", - "\n", + "unique_types = pd.concat([pokemon[\"Type 1\"], pokemon[\"Type 2\"]]).unique()\n", "\n", "len(unique_types) # you should see 19" ] @@ -85,13 +249,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "18" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pokemon_totals = []\n", "\n", - "# Your code here\n", + "for pokemon_type in unique_types:\n", + " if type(pokemon_type) is str:\n", + " pokemon_totals.append(pokemon[(pokemon[\"Type 1\"]==pokemon_type) | (pokemon[\"Type 2\"]==pokemon_type)][\"Total\"])\n", "\n", "len(pokemon_totals) # you should see 18" ] @@ -111,11 +288,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ANOVA Result:\n", + "F-value: 6.6175382960055344\n", + "p-value: 2.6457458815984803e-15\n" + ] + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "anova_result = st.f_oneway(*pokemon_totals)\n", + "print(\"ANOVA Result:\")\n", + "print(\"F-value:\", anova_result.statistic)\n", + "print(\"p-value:\", anova_result.pvalue)\n" ] }, { @@ -131,7 +322,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# The p-value is significantly smaller than the typical significance level of 0.05. \n", + "# #This indicates that the p-value is well below the threshold, which suggests strong evidence against the null hypothesis. In this case, you can conclude that there are significant differences among the various types of Pokémon's 'Total' values.\n", + "#Therefore, based on the extremely low p-value, we can reject the null hypothesis and conclude that there are significant differences in the 'Total' values among the different types of Pokémon." ] } ], @@ -151,7 +344,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,