From 2e43dbcceb7ad491cd9a8a95edaa4d54e299acf3 Mon Sep 17 00:00:00 2001 From: Tiago Date: Thu, 12 Dec 2024 15:45:32 +0000 Subject: [PATCH] update --- your-code/main.ipynb | 758 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 704 insertions(+), 54 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..ba11aee 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import pandas as pd\n", + "import numpy as np" ] }, { @@ -35,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -53,12 +54,155 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "pokemon.head()" ] }, { @@ -70,12 +214,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "# int64\n", + "Name object\n", + "Type 1 object\n", + "Type 2 object\n", + "Total int64\n", + "HP int64\n", + "Attack int64\n", + "Defense int64\n", + "Sp. Atk int64\n", + "Sp. Def int64\n", + "Speed int64\n", + "Generation int64\n", + "Legendary bool\n", + "dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Legendary\n", + "False 735\n", + "True 65\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "legendary_count = pokemon['Legendary'].value_counts()\n", + "legendary_count" ] }, { @@ -87,12 +279,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "legendary_mean is :645.25\n", + "legendary_std is :59.7\n", + "\n", + "non_legendary_mean is :436.45\n", + "non_legendary_std is :107.59\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "legendary = pokemon[pokemon[\"Legendary\"] == True].dropna()\n", + "non_legendary = pokemon[pokemon[\"Legendary\"] == False].dropna()\n", + "\n", + "legendary_mean = legendary[\"Total\"].mean()\n", + "legendary_std = legendary[\"Total\"].std()\n", + "\n", + "non_legendary_mean = non_legendary[\"Total\"].mean()\n", + "non_legendary_std = non_legendary[\"Total\"].std()\n", + "\n", + "print(f\"legendary_mean is :{round(legendary_mean,2)}\")\n", + "print(f\"legendary_std is :{round(legendary_std,2)}\")\n", + "print()\n", + "print(f\"non_legendary_mean is :{round(non_legendary_mean,2)}\")\n", + "print(f\"non_legendary_std is :{round(non_legendary_std,2)}\")" ] }, { @@ -106,12 +323,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tstatistics: 25.83357\n", + "P-value: 0.00000\n", + "The points difference between legendary and non-legendary Pokemon is statistically significant.\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "from scipy.stats import ttest_ind\n", + "\n", + "legendary_points = pokemon[pokemon[\"Legendary\"] == True][\"Total\"].dropna()\n", + "non_legendary_points = pokemon[pokemon[\"Legendary\"] == False][\"Total\"].dropna()\n", + "\n", + "t_stat, p_value = ttest_ind(legendary_points, non_legendary_points, equal_var=False)\n", + "\n", + "print(f\"Tstatistics: {t_stat:.5f}\")\n", + "print(f\"P-value: {p_value:.5f}\")\n", + "\n", + "if p_value < 0.05:\n", + " print(\"The points difference between legendary and non-legendary Pokemon is statistically significant.\")\n", + "else:\n", + " print(\"The points difference between legendary and non-legendary Pokemon is not statistically significant.\")" ] }, { @@ -123,12 +363,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "\n", + "# From our sample we can conclude that in the sample the difference between points from legendary and non-legendary Pokemon´s \n", + "# are statistical significance since the P-value is less (0%) than 5% (confidence Level).\n", + "# Also the standard deviation of the Total points is bigger on non-legendary Pokemon." ] }, { @@ -140,12 +383,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Type 1\n", + "Water 112\n", + "Normal 98\n", + "Grass 70\n", + "Bug 69\n", + "Psychic 57\n", + "Fire 52\n", + "Electric 44\n", + "Rock 44\n", + "Dragon 32\n", + "Ground 32\n", + "Ghost 32\n", + "Dark 31\n", + "Poison 28\n", + "Steel 27\n", + "Fighting 27\n", + "Ice 24\n", + "Fairy 17\n", + "Flying 4\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "type_1 = pokemon['Type 1'].value_counts()\n", + "type_1" ] }, { @@ -157,12 +431,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "water_mean is :449.06\n", + "water_std is :109.27\n", + "\n", + "non_water_mean is :457.74\n", + "non_water_std is :122.56\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "water_pokemon = pokemon[pokemon[\"Type 1\"] == \"Water\"].dropna()\n", + "non_water_pokemon = pokemon[pokemon[\"Type 1\"] != \"Water\"].dropna()\n", + "\n", + "water_mean = water_pokemon[\"Total\"].mean()\n", + "water_std = water_pokemon[\"Total\"].std()\n", + "\n", + "non_water_mean = non_water_pokemon[\"Total\"].mean()\n", + "non_water_std = non_water_pokemon[\"Total\"].std()\n", + "\n", + "print(f\"water_mean is :{round(water_mean,2)}\")\n", + "print(f\"water_std is :{round(water_std,2)}\")\n", + "print()\n", + "print(f\"non_water_mean is :{round(non_water_mean,2)}\")\n", + "print(f\"non_water_std is :{round(non_water_std,2)}\")" ] }, { @@ -174,12 +473,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tstatistics: 13.25402\n", + "P-value: 0.00000\n", + "The mean of the points difference between for water Pokemon to all non-water Pokemon is statistically significant.\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "water_pokemon = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"].dropna()\n", + "non_water_pokemon = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"].dropna()\n", + "\n", + "t_stat, p_value = ttest_ind(legendary_points, non_water_pokemon, equal_var=True)\n", + "\n", + "print(f\"Tstatistics: {t_stat:.5f}\")\n", + "print(f\"P-value: {p_value:.5f}\")\n", + "\n", + "if p_value < 0.05:\n", + " print(\"The mean of the points difference between for water Pokemon to all non-water Pokemon is statistically significant.\")\n", + "else:\n", + " print(\"The mean of the points difference between for water Pokemon to all non-water Pokemon is not statistically significant.\")" ] }, { @@ -191,12 +511,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# The P-value is the \"same\" as the differnce between legendary and non-legendary Pokemon with a P-value of 0. \n", + "# howhever the mean beteween watwr and non water is very similar\n" ] }, { @@ -210,12 +531,194 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
#800.0362.81375208.3437981.0184.75364.5539.25721.0
Total800.0435.10250119.963040180.0330.00450.0515.00780.0
HP800.069.2587525.5346691.050.0065.080.00255.0
Attack800.079.0012532.4573665.055.0075.0100.00190.0
Defense800.073.8425031.1835015.050.0070.090.00230.0
Sp. Atk800.072.8200032.72229410.049.7565.095.00194.0
Sp. Def800.071.9025027.82891620.050.0070.090.00230.0
Speed800.068.2775029.0604745.045.0065.090.00180.0
Generation800.03.323751.6612901.02.003.05.006.0
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% max\n", + "# 800.0 362.81375 208.343798 1.0 184.75 364.5 539.25 721.0\n", + "Total 800.0 435.10250 119.963040 180.0 330.00 450.0 515.00 780.0\n", + "HP 800.0 69.25875 25.534669 1.0 50.00 65.0 80.00 255.0\n", + "Attack 800.0 79.00125 32.457366 5.0 55.00 75.0 100.00 190.0\n", + "Defense 800.0 73.84250 31.183501 5.0 50.00 70.0 90.00 230.0\n", + "Sp. Atk 800.0 72.82000 32.722294 10.0 49.75 65.0 95.00 194.0\n", + "Sp. Def 800.0 71.90250 27.828916 20.0 50.00 70.0 90.00 230.0\n", + "Speed 800.0 68.27750 29.060474 5.0 45.00 65.0 90.00 180.0\n", + "Generation 800.0 3.32375 1.661290 1.0 2.00 3.0 5.00 6.0" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon.describe().T" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tstatistics: 4.32557\n", + "P-value: 0.00002\n", + " Reject null Hypotheses: The defense and attack scores are significant different.\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "from scipy.stats import ttest_rel\n", + "\n", + "t_stat, p_value = ttest_rel(pokemon[\"Attack\"], pokemon[\"Defense\"])\n", + "\n", + "print(f\"Tstatistics: {t_stat:.5f}\")\n", + "print(f\"P-value: {p_value:.5f}\")\n", + "\n", + "if p_value < 0.05:\n", + " print(\" Reject null Hypotheses: The defense and attack scores are significant different.\")\n", + "else:\n", + " print(\" Fail to Reject null Hypotheses: The defense and attack scores aren´t significant different.\")" ] }, { @@ -227,12 +730,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# since the p-value is less than the aplha = 0.05 we say that defense and attack scores arent´t correlated\n" ] }, { @@ -244,12 +747,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tstatistics: 0.85399\n", + "P-value: 0.39337\n", + "Fail to Reject null Hypotheses: The defense and attack scores aren´t significant different.\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "t_stat, p_value = ttest_rel(pokemon[\"Sp. Atk\"], pokemon[\"Sp. Def\"])\n", + "\n", + "print(f\"Tstatistics: {t_stat:.5f}\")\n", + "print(f\"P-value: {p_value:.5f}\")\n", + "\n", + "if p_value < 0.05:\n", + " print(\"Reject the Null Hypoteses: The defense and attack scores are significant different.\")\n", + "else:\n", + " print(\"Fail to Reject null Hypotheses: The defense and attack scores aren´t significant different.\")" ] }, { @@ -261,12 +783,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "# This indicates that there is no statistically significant difference between special attack and special defense scores in this sample" ] }, { @@ -280,13 +802,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tstatistics: -4.32557\n", + "P-value: 0.00002\n", + "Reject the Null Hypoteses: The mean difference is sgnificantly different from 0.\n" + ] + } + ], "source": [ "# Your code here:\n", - " \n", - " " + "from scipy.stats import ttest_1samp\n", + "poke_differences= pokemon['Defense'] - pokemon['Attack']\n", + "\n", + "t_stat, p_value = ttest_1samp(poke_differences, 0)\n", + "\n", + "print(f\"Tstatistics: {t_stat:.5f}\")\n", + "print(f\"P-value: {p_value:.5f}\")\n", + "\n", + "\n", + "if p_value < 0.05:\n", + " print(\"Reject the Null Hypoteses: The mean difference is sgnificantly different from 0.\")\n", + "else:\n", + " print(\"Fail to Reject null Hypotheses: The mean difference is not sgnificantly different from 0..\")\n" ] }, { @@ -302,12 +845,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',\n", + " 'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',\n", + " 'Dragon', 'Dark', 'Steel', 'Flying'], dtype=object)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon['Type 1'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
is_waterFalseTrue
Legendary
False627108
True614
\n", + "
" + ], + "text/plain": [ + "is_water False True \n", + "Legendary \n", + "False 627 108\n", + "True 61 4" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon['is_water'] = pokemon['Type 1'] == \"Water\"\n", + "\n", + "contigency_table = pd.crosstab(pokemon['Legendary'], pokemon['is_water'])\n", + "contigency_table" ] }, { @@ -319,12 +944,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-Squared Statistic: 2.9429200762850503\n", + "P-value: 0.08625467249550949\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "from scipy.stats import chi2_contingency\n", + "\n", + "chi2, chi_p, __, __ = chi2_contingency(contigency_table)\n", + "\n", + "print(\"Chi-Squared Statistic:\", chi2)\n", + "print(\"P-value:\", chi_p)\n" ] }, { @@ -336,12 +975,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fail to reject the null hypothesis: There is no significant association between being legendary and being a Water type.\n" + ] + } + ], "source": [ "# Your answer here:\n", - "\n" + "if chi_p < 0.05:\n", + " print(\"Reject the null hypothesis: There is a significant association between being legendary and being a Water type.\")\n", + "else:\n", + " print(\"Fail to reject the null hypothesis: There is no significant association between being legendary and being a Water type.\")\n" ] }, { @@ -354,7 +1004,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -368,9 +1018,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.4" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }