diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..33c46af 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import numpy as np\n", + "import pandas as pd\n", + "import scipy.stats as sts" ] }, { @@ -35,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -53,12 +55,205 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 80 80 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 100 100 \n", + "\n", + " Speed Generation Legendary \n", + "0 45 1 False \n", + "1 60 1 False \n", + "2 80 1 False " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "pokemon.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(800, 13)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# 0\n", + "Name 0\n", + "Type 1 0\n", + "Type 2 386\n", + "Total 0\n", + "HP 0\n", + "Attack 0\n", + "Defense 0\n", + "Sp. Atk 0\n", + "Sp. Def 0\n", + "Speed 0\n", + "Generation 0\n", + "Legendary 0\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# 721\n", + "Name 800\n", + "Type 1 18\n", + "Type 2 18\n", + "Total 200\n", + "HP 94\n", + "Attack 111\n", + "Defense 103\n", + "Sp. Atk 105\n", + "Sp. Def 92\n", + "Speed 108\n", + "Generation 6\n", + "Legendary 2\n", + "dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon.nunique()" ] }, { @@ -70,12 +265,53 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# Your code here:\n", - "\n" + "n_legendary = pokemon[pokemon[\"Legendary\"] == True]\n", + "n_regular = pokemon[pokemon[\"Legendary\"] == False]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "65" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_legendary[\"Name\"].value_counts().count()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "735" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_regular[\"Name\"].value_counts().count()" ] }, { @@ -87,12 +323,50 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mean 637.384615\n", + "std 60.937389\n", + "Name: Total, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "t_legendary = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]\n", + "t_regular = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]\n", + "t_legendary.agg([\"mean\", \"std\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mean 417.213605\n", + "std 106.760417\n", + "Name: Total, dtype: float64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "t_regular.agg([\"mean\", \"std\"])" ] }, { @@ -106,12 +380,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 25.83\n", + "P-Value: 0.00000000000000000000000000000000000000000000009358\n", + "\n", + "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points are different for legendary and regular pokemons.\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "t_stat, p_value = sts.ttest_ind(t_legendary, t_regular, equal_var=False)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.50f}\")\n", + "print()\n", + "\n", + "alpha = 0.05\n", + "\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean total points for legendary and regular pokemons are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points are different for legendary and regular pokemons.\")" ] }, { @@ -122,13 +417,11 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Your conclusions here:\n", - "\n" + "### Your conclusions here:\n", + "This test allows us to **reject** the hypotesis that legendary and regular pokemons have similar distributions of Total points.\n" ] }, { @@ -140,12 +433,42 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Type 1\n", + "Water 112\n", + "Normal 98\n", + "Grass 70\n", + "Bug 69\n", + "Psychic 57\n", + "Fire 52\n", + "Electric 44\n", + "Rock 44\n", + "Dragon 32\n", + "Ground 32\n", + "Ghost 32\n", + "Dark 31\n", + "Poison 28\n", + "Steel 27\n", + "Fighting 27\n", + "Ice 24\n", + "Fairy 17\n", + "Flying 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon[\"Type 1\"].value_counts()" ] }, { @@ -157,12 +480,59 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# Your code here:\n", - "\n" + "# Again, I going to use \"Total\" to get distributions for both groups:\n", + "\n", + "water = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"]\n", + "other = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mean 430.455357\n", + "std 113.188266\n", + "Name: Total, dtype: float64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "water.agg([\"mean\", \"std\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mean 435.859012\n", + "std 121.091682\n", + "Name: Total, dtype: float64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "other.agg([\"mean\", \"std\"])" ] }, { @@ -174,12 +544,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -0.44\n", + "P-Value: 0.6587\n", + "\n", + "Fail to Reject the Null Hypothesis: The mean total points for water pokemons and all other pokemons are not significantly different.\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "t_stat, p_value = sts.ttest_ind(water, other, equal_var=True)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.4f}\")\n", + "print()\n", + "\n", + "alpha = 0.05\n", + "\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean total points for water pokemons and all other pokemons are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points are different between water pokemons and non-water pokemons.\")" ] }, { @@ -190,13 +581,12 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Your conclusions here:\n", - "\n" + "### Your conclusions here:\n", + "\n", + "In this case, even though \"Water\" is the largest group of Pokemon, the mean total points of Water pokemon is not signifficantly different from the mean total points of all other pokemon types." ] }, { @@ -210,12 +600,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "# Your code here:\n", - "\n" + "import scipy.stats as sts\n", + "defense = pokemon[\"Defense\"]\n", + "attack = pokemon[\"Attack\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.000017\n", + "\n", + "Reject the Null Hypothesis: There is a significant difference between the defense and attack scores.\n" + ] + } + ], + "source": [ + "t_stat, p_value = sts.ttest_rel(attack, defense)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")\n", + "print()\n", + "\n", + "t_stat_rel = t_stat\n", + "\n", + "alpha = 0.05\n", + "\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between the defense and attack scores.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between the defense and attack scores.\")" ] }, { @@ -226,13 +650,12 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Your conclusions here:\n", - "\n" + "### Your conclusions here:\n", + "\n", + "The p-value is lower that our alpha (signifficance level), and therefore we can be 95% confident in rejecting the hypothesis that the means of defense and attact scores are equal." ] }, { @@ -244,12 +667,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "# Your code here:\n", - "\n" + "sp_defense = pokemon[\"Sp. Def\"]\n", + "sp_attack = pokemon[\"Sp. Atk\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -0.85\n", + "P-Value: 0.393369\n", + "\n", + "Fail to Reject the Null Hypothesis: No significant difference between the special defense and special attack scores.\n" + ] + } + ], + "source": [ + "t_stat, p_value = sts.ttest_rel(sp_defense, sp_attack)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")\n", + "print()\n", + "\n", + "alpha = 0.05\n", + "\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between the special defense and special attack scores.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between the special defense and special attack scores.\")" ] }, { @@ -260,13 +714,13 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Your conclusions here:\n", - "\n" + "### Your conclusions here:\n", + "\n", + "Even though there is a statistically significant difference between the means of defense and attact scores, we can't say the same is tru for the special defense and attack scores. \n", + "This is indicative that \"special\" abilities are more balanced in terms of defense and attack than \"regular\" pokemon abilities, for which the defense and attack scores' distributions are not the same." ] }, { @@ -280,13 +734,62 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": {}, "outputs": [], + "source": [ + "atk_def = attack - defense" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.000017\n", + "\n", + "Reject the Null Hypothesis: There is a significant difference between the defense and attack scores.\n" + ] + } + ], "source": [ "# Your code here:\n", - " \n", - " " + "t_stat, p_value = sts.ttest_1samp(atk_def, 0)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.6f}\")\n", + "print()\n", + "\n", + "t_stat_1samp = t_stat\n", + "\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant difference between the defense and attack scores.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant difference between the defense and attack scores.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t_stat_rel == t_stat_1samp" ] }, { @@ -302,12 +805,71 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Type 1OtherWater
Legendary
False627108
True614
\n", + "
" + ], + "text/plain": [ + "Type 1 Other Water\n", + "Legendary \n", + "False 627 108\n", + "True 61 4" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon[\"Type 1\"] = pokemon['Type 1'].apply(lambda x: x if x == \"Water\" else \"Other\")\n", + "legendary_type1 = pd.crosstab(pokemon['Legendary'], pokemon['Type 1'])\n", + "legendary_type1" ] }, { @@ -319,12 +881,32 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-squared p-value: 0.0863\n", + "\n", + "Fail to Reject the Null Hypothesis: No significant relationship between the ´Legendary´ and the ´Type 1´ categories of pokemon.\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "from scipy.stats import chi2_contingency\n", + "_, chi2_pvalue, _, _ = chi2_contingency(legendary_type1)\n", + "print(f\"Chi-squared p-value: {chi2_pvalue:.4f}\")\n", + "print() \n", + "\n", + "alpha = 0.05\n", + "\n", + "if chi2_pvalue > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: No significant relationship between the ´Legendary´ and the ´Type 1´ categories of pokemon.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is a significant relationship between the ´Legendary´ and the ´Type 1´ categories of pokemon.\")" ] }, { @@ -335,13 +917,12 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Your answer here:\n", - "\n" + "### Your answer here:\n", + "\n", + "No, based on a 95% confidence, we should not reject the null hypothesis. The variables \"Legendary\" and \"Type 1\" are independent." ] }, { @@ -354,7 +935,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -368,9 +949,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.4" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }