diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..12b7d53 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -1,5 +1,36 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Ironhack\n", + "\n", + "# Lab | Two Sample Hypothesis Testing\n", + "\n", + "## Introduction\n", + "\n", + "In this lab we'll learn more about hypothesis testing and expand to 2 sample hypothesis tests. \n", + "\n", + "## Deliverables\n", + "\n", + "- `main.ipynb`.\n", + "\n", + "## Submission\n", + "\n", + "Upon completion, add your deliverables to git. Then commit git and push your branch to the remote.\n", + "\n", + "## Resources\n", + "\n", + "[T Test](https://researchbasics.education.uconn.edu/t-test/)\n", + "\n", + "[Hypothesis Tests in SciPy](https://scipy-lectures.org/packages/statistics/index.html#hypothesis-testing-comparing-two-groups)\n", + "\n", + "[The Chi-Squared Test](https://en.wikipedia.org/wiki/Chi-squared_test)\n", + "\n", + "[The `chi2_contingency` function in SciPy](https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.chi2_contingency.html)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -14,12 +45,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from scipy.stats import ttest_ind" ] }, { @@ -35,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -53,12 +87,154 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pokemon.head()\n" ] }, { @@ -70,12 +246,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hay 735 pokemons no legendarios y 65 pokemon legendarios (8.84%)\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "pokemon_nolegendario = pokemon.groupby(\"Legendary\").get_group(False)\n", + "pokemon_legendario = pokemon.groupby(\"Legendary\").get_group(True)\n", + "\n", + "pokemon_nolegendario.head()\n", + "pokemon_legendario.head()\n", + "\n", + "print(f\"Hay {pokemon_nolegendario.shape[0]} pokemons no legendarios y {pokemon_legendario.shape[0]} pokemon legendarios ({round(pokemon_legendario.shape[0]*100/pokemon_nolegendario.shape[0],2)}%)\")" ] }, { @@ -87,12 +278,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pokemon no Legendary: Mean: 417.21360544217686 Standar deviation: 106.76041745713022\n", + "Pokemon Legendary: Mean: 637.3846153846154 Standar deviation: 60.93738905315346\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "print(f\"Pokemon no Legendary: Mean: {pokemon_nolegendario[\"Total\"].mean()} Standar deviation: {pokemon_nolegendario[\"Total\"].std()}\")\n", + "print(f\"Pokemon Legendary: Mean: {pokemon_legendario[\"Total\"].mean()} Standar deviation: {pokemon_legendario[\"Total\"].std()}\")" ] }, { @@ -106,12 +308,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: -25.8335743895517\n", + "P-value: 9.357954335957446e-47\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "a = pokemon_nolegendario[\"Total\"]\n", + "b = pokemon_legendario[\"Total\"]\n", + "\n", + "t_stat, p_value = ttest_ind(a, b, equal_var=False)\n", + "\n", + "print(\"T-statistic:\", t_stat)\n", + "print(\"P-value:\", p_value)" ] }, { @@ -128,7 +345,8 @@ "outputs": [], "source": [ "# Your conclusions here:\n", - "\n" + "\n", + "printf(f\"The {p_value} value is much smaller than alpha 5% or 1% so the null hypothesis is ruled out. There is a significant difference between legendary and non-legendary\")" ] }, { @@ -140,12 +358,387 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 97, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
Type 1
Bug696952696969696969696969
Dark313121313131313131313131
Dragon323221323232323232323232
Electric444417444444444444444444
Fairy17172171717171717171717
Fighting27277272727272727272727
Fire525224525252525252525252
Flying442444444444
Ghost323222323232323232323232
Grass707037707070707070707070
Ground323219323232323232323232
Ice242411242424242424242424
Normal989837989898989898989898
Poison282813282828282828282828
Psychic575719575757575757575757
Rock444435444444444444444444
Steel272722272727272727272727
Water11211253112112112112112112112112112
\n", + "
" + ], + "text/plain": [ + " # Name Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n", + "Type 1 \n", + "Bug 69 69 52 69 69 69 69 69 69 \n", + "Dark 31 31 21 31 31 31 31 31 31 \n", + "Dragon 32 32 21 32 32 32 32 32 32 \n", + "Electric 44 44 17 44 44 44 44 44 44 \n", + "Fairy 17 17 2 17 17 17 17 17 17 \n", + "Fighting 27 27 7 27 27 27 27 27 27 \n", + "Fire 52 52 24 52 52 52 52 52 52 \n", + "Flying 4 4 2 4 4 4 4 4 4 \n", + "Ghost 32 32 22 32 32 32 32 32 32 \n", + "Grass 70 70 37 70 70 70 70 70 70 \n", + "Ground 32 32 19 32 32 32 32 32 32 \n", + "Ice 24 24 11 24 24 24 24 24 24 \n", + "Normal 98 98 37 98 98 98 98 98 98 \n", + "Poison 28 28 13 28 28 28 28 28 28 \n", + "Psychic 57 57 19 57 57 57 57 57 57 \n", + "Rock 44 44 35 44 44 44 44 44 44 \n", + "Steel 27 27 22 27 27 27 27 27 27 \n", + "Water 112 112 53 112 112 112 112 112 112 \n", + "\n", + " Speed Generation Legendary \n", + "Type 1 \n", + "Bug 69 69 69 \n", + "Dark 31 31 31 \n", + "Dragon 32 32 32 \n", + "Electric 44 44 44 \n", + "Fairy 17 17 17 \n", + "Fighting 27 27 27 \n", + "Fire 52 52 52 \n", + "Flying 4 4 4 \n", + "Ghost 32 32 32 \n", + "Grass 70 70 70 \n", + "Ground 32 32 32 \n", + "Ice 24 24 24 \n", + "Normal 98 98 98 \n", + "Poison 28 28 28 \n", + "Psychic 57 57 57 \n", + "Rock 44 44 44 \n", + "Steel 27 27 27 \n", + "Water 112 112 112 " + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "pokemon.groupby(\"Type 1\").agg(\"count\")" ] }, { @@ -157,12 +750,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 151, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Water pokemon: mean: 430.45535714285717 std: 113.18826606431458\n", + "Absolute difference: (Type 1\n", + "Bug -52.0\n", + "Dark 15.0\n", + "Dragon 120.0\n", + "Electric 13.0\n", + "Fairy -17.0\n", + "Fighting -14.0\n", + "Fire 28.0\n", + "Flying 55.0\n", + "Ghost 9.0\n", + "Grass -9.0\n", + "Ground 7.0\n", + "Ice 3.0\n", + "Normal -29.0\n", + "Poison -31.0\n", + "Psychic 45.0\n", + "Rock 23.0\n", + "Steel 57.0\n", + "Water 0.0\n", + "Name: Total, dtype: float64, 2)\n", + "\n", + "Relative (porcentual) difference: Type 1\n", + "Bug -11.97\n", + "Dark 3.55\n", + "Dragon 27.90\n", + "Electric 3.01\n", + "Fairy -4.01\n", + "Fighting -3.25\n", + "Fire 6.42\n", + "Flying 12.67\n", + "Ghost 2.12\n", + "Grass -2.16\n", + "Ground 1.64\n", + "Ice 0.70\n", + "Normal -6.68\n", + "Poison -7.27\n", + "Psychic 10.57\n", + "Rock 5.41\n", + "Steel 13.30\n", + "Water 0.00\n", + "Name: Total, dtype: float64\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "water_mean = pokemon.groupby(\"Type 1\").get_group(\"Water\")[\"Total\"].mean()\n", + "water_std = pokemon.groupby(\"Type 1\").get_group(\"Water\")[\"Total\"].std()\n", + "\n", + "print(f\"Water pokemon: mean: {water_mean} std: {water_std}\")\n", + "\n", + "print(f\"Absolute difference: {round(pokemon.groupby(\"Type 1\")[\"Total\"].mean() - water_mean),2}\\n\")\n", + "print(f\"Relative (porcentual) difference: {round((pokemon.groupby(\"Type 1\")[\"Total\"].mean() - water_mean)*100/water_mean,2)}\")" ] }, { @@ -174,12 +823,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 165, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "T-statistic: -0.4418547448849676\n", + "P-value: 0.6587140317488793\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "water_pokemon = pokemon.groupby(\"Type 1\").get_group(\"Water\")[\"Total\"]\n", + "nonwater_pokemon = pokemon[pokemon[\"Type 1\"]!=\"Water\"][\"Total\"]\n", + "\n", + "t_stat, p_value = ttest_ind(water_pokemon, nonwater_pokemon, equal_var=True)\n", + "\n", + "# Resultados\n", + "print(\"T-statistic:\", t_stat)\n", + "print(\"P-value:\", p_value)\n", + "\n", + "\"\"\"\n", + "T-statistic: Representa la diferencia relativa entre las medias de los dos grupos.\n", + "P-value: Indica si la diferencia entre las medias es estadísticamente significativa.\n", + "- Si el p-valor es menor que 0.05, rechazas la hipótesis nula y concluyes que hay una diferencia significativa entre las medias.\n", + "- Si el p-valor es mayor o igual a 0.05, no tienes suficiente evidencia para rechazar la hipótesis nula.\n", + "\"\"\"" ] }, { @@ -191,12 +864,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 167, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Since the p-value is high (65.87140317488793%) the null hypothesis cannot be rejected.The alternative hypothesis is that the scores of Water-type Pokémon are very different from those of other types.\n" + ] + } + ], "source": [ "# Your conclusions here:\n", - "\n" + "print(f\"Since the p-value is high ({p_value*100}%) the null hypothesis cannot be rejected. The alternative hypothesis is that the scores of Water-type Pokémon are very different from those of other types.\")" ] }, { @@ -354,7 +1035,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -368,9 +1049,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.7" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }