From 26ca93a81b395c36974b40a2d41086e87dc01a41 Mon Sep 17 00:00:00 2001 From: Marc Jahnert Date: Sun, 6 Apr 2025 16:08:54 +0200 Subject: [PATCH] lab_done --- your-code/main.ipynb | 544 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 445 insertions(+), 99 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index cdc1acb..f85d154 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -14,12 +14,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# import numpy and pandas\n", - "\n" + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.stats as st\n" ] }, { @@ -35,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -53,12 +54,153 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#NameType 1Type 2TotalHPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
01BulbasaurGrassPoison3184549496565451False
12IvysaurGrassPoison4056062638080601False
23VenusaurGrassPoison525808283100100801False
33VenusaurMega VenusaurGrassPoison62580100123122120801False
44CharmanderFireNaN3093952436050651False
\n", + "
" + ], + "text/plain": [ + " # Name Type 1 Type 2 Total HP Attack Defense \\\n", + "0 1 Bulbasaur Grass Poison 318 45 49 49 \n", + "1 2 Ivysaur Grass Poison 405 60 62 63 \n", + "2 3 Venusaur Grass Poison 525 80 82 83 \n", + "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n", + "4 4 Charmander Fire NaN 309 39 52 43 \n", + "\n", + " Sp. Atk Sp. Def Speed Generation Legendary \n", + "0 65 65 45 1 False \n", + "1 80 80 60 1 False \n", + "2 100 100 80 1 False \n", + "3 122 120 80 1 False \n", + "4 60 50 65 1 False " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pokemon.head()" ] }, { @@ -70,12 +212,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Legendary: Legendary\n", + "True 65\n", + "Name: count, dtype: int64\n", + " \n", + " Non-legendary: Legendary\n", + "False 735\n", + "Name: count, dtype: int64\n", + " \n" + ] + } + ], + "source": [ + "print(f\"\"\"\n", + " Legendary: {pokemon[pokemon['Legendary'] == True]['Legendary'].value_counts()}\\n \n", + " Non-legendary: {pokemon[pokemon['Legendary'] == False]['Legendary'].value_counts()}\n", + " \"\"\")\n" ] }, { @@ -87,12 +247,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Legendary Pokémon - Mean: 637.38, Std Dev: 60.94\n", + "Non-Legendary Pokémon - Mean: 417.21, Std Dev: 106.76\n" + ] + } + ], + "source": [ + "legendary_pokemons = pokemon[pokemon['Legendary'] == True]\n", + "non_legendary_pokemons = pokemon[pokemon['Legendary'] == False]\n", + "\n", + "legendary_mean = legendary_pokemons['Total'].mean()\n", + "legendary_std = legendary_pokemons['Total'].std()\n", + "\n", + "non_legendary_mean = non_legendary_pokemons['Total'].mean()\n", + "non_legendary_std = non_legendary_pokemons['Total'].std()\n", + "\n", + "print(f\"Legendary Pokémon - Mean: {legendary_mean:.2f}, Std Dev: {legendary_std:.2f}\")\n", + "print(f\"Non-Legendary Pokémon - Mean: {non_legendary_mean:.2f}, Std Dev: {non_legendary_std:.2f}\")\n" ] }, { @@ -106,12 +284,43 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 19.06\n", + "P-Value: 0.0000\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Marc Jay\\AppData\\Local\\Temp\\ipykernel_2884\\83059581.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " legendary_pokemons.dropna(inplace=True)\n", + "C:\\Users\\Marc Jay\\AppData\\Local\\Temp\\ipykernel_2884\\83059581.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " non_legendary_pokemons.dropna(inplace=True)\n" + ] + } + ], + "source": [ + "legendary_pokemons.dropna(inplace=True)\n", + "non_legendary_pokemons.dropna(inplace=True)\n", + "\n", + "# Perform the t-test assuming unequal variances (Welch's t-test)\n", + "t_stat, p_value = st.ttest_ind(legendary_pokemons['Total'], non_legendary_pokemons['Total'], equal_var=False)\n", + "\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.4f}\")\n" ] }, { @@ -123,12 +332,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your conclusions here:\n", - "\n" + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points are different for legendary and non-legendary Pokemons.\n" + ] + } + ], + "source": [ + "alpha = 0.05 # Significance level\n", + "\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The mean total points for legendary and non-legendary Pokemons are not significantly different.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the mean total points are different for legendary and non-legendary Pokemons.\")\n" ] }, { @@ -140,12 +361,39 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " type_1 count\n", + "0 Water 112\n", + "1 Normal 98\n", + "2 Grass 70\n", + "3 Bug 69\n", + "4 Psychic 57\n", + "5 Fire 52\n", + "6 Electric 44\n", + "7 Rock 44\n", + "8 Dragon 32\n", + "9 Ground 32\n", + "10 Ghost 32\n", + "11 Dark 31\n", + "12 Poison 28\n", + "13 Steel 27\n", + "14 Fighting 27\n", + "15 Ice 24\n", + "16 Fairy 17\n", + "17 Flying 4\n" + ] + } + ], + "source": [ + "type1 = pokemon['Type 1'].value_counts().reset_index()\n", + "type1.columns = [\"type_1\", \"count\"]\n", + "print(type1)\n" ] }, { @@ -157,12 +405,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Water-type Pokémon - Mean: 430.46, Std Dev: 113.19\n", + "Other Pokémon - Mean: 435.86, Std Dev: 121.09\n" + ] + } + ], + "source": [ + "water_pokemons = pokemon[pokemon['Type 1'] == 'Water']\n", + "non_water_pokemons = pokemon[pokemon['Type 1'] != 'Water']\n", + "\n", + "water_mean = water_pokemons['Total'].mean()\n", + "water_std = water_pokemons['Total'].std()\n", + "\n", + "non_water_mean = non_water_pokemons['Total'].mean()\n", + "non_water_std = non_water_pokemons['Total'].std()\n", + "\n", + "print(f\"Water-type Pokémon - Mean: {water_mean:.2f}, Std Dev: {water_std:.2f}\")\n", + "print(f\"Other Pokémon - Mean: {non_water_mean:.2f}, Std Dev: {non_water_std:.2f}\")\n" ] }, { @@ -174,12 +440,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -0.44\n", + "P-Value: 0.6587\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "t_stat, p_value = st.ttest_ind(water_pokemons['Total'], non_water_pokemons['Total'], equal_var=True)\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.4f}\")\n" ] }, { @@ -191,12 +467,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "# Your conclusions here:\n", - "\n" + "# Conclusion:\n", + "# Since the p-value is 0.6587 (greater than 0.05), we fail to reject the null hypothesis. This means there is no significant difference between the mean values of Attack and Defense in the Pokémon dataset.\n" ] }, { @@ -210,12 +486,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -4.33\n", + "P-Value: 0.00001714030347935855799128106180528874347146484070\n" + ] + } + ], + "source": [ + "defense = pokemon['Defense']\n", + "attack = pokemon['Attack']\n", + "\n", + "t_stat, p_value = st.ttest_rel(defense, attack)\n", + "\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.50f}\")\n" ] }, { @@ -227,11 +517,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# Your conclusions here:\n", + "# Conclusion:\n", + "# The p-value is extremely small (0.0000), so we reject the null hypothesis. This indicates a significant difference between the defense and attack scores for Pokémon.\n", "\n" ] }, @@ -244,12 +535,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): -0.85\n", + "P-Value: 0.39336859975481219819926081981975585222244262695312\n" + ] + } + ], + "source": [ + "sp_defense = pokemon['Sp. Def']\n", + "sp_attack = pokemon['Sp. Atk']\n", + "\n", + "t_stat, p_value = st.ttest_rel(sp_defense, sp_attack)\n", + "\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.50f}\")\n" ] }, { @@ -261,12 +566,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "# Your conclusions here:\n", - "\n" + "# Conclusion:\n", + "# The p-value is large (0.393), so we fail to reject the null hypothesis. This suggests there is no significant difference between the special defense and special attack scores.\n" ] }, { @@ -280,13 +585,40 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - " \n", - " " + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Statistic (t): 4.33\n", + "P-Value: 0.0000\n", + "Reject the Null Hypothesis: The difference between the means of defense and attack scores is zero.\n" + ] + } + ], + "source": [ + "import scipy.stats as st\n", + "\n", + "# Calculate the difference between Attack and Defense\n", + "difference = pokemon['Attack'] - pokemon['Defense']\n", + "\n", + "# Perform the one-sample t-test\n", + "t_stat, p_value = st.ttest_1samp(difference, 0)\n", + "\n", + "# Print the results\n", + "print(f\"Test Statistic (t): {t_stat:.2f}\")\n", + "print(f\"P-Value: {p_value:.4f}\")\n", + "\n", + "# Significance level\n", + "alpha = 0.05\n", + "\n", + "# Decision making\n", + "if p_value > alpha:\n", + " print(\"Fail to Reject the Null Hypothesis: The difference between the means of defense and attack scores is NOT zero.\")\n", + "else:\n", + " print(\"Reject the Null Hypothesis: The difference between the means of defense and attack scores is zero.\")\n" ] }, { @@ -302,12 +634,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Type 1 False True \n", + "Legendary \n", + "False 627 108\n", + "True 61 4\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "contingency_table = pd.crosstab(pokemon['Legendary'], pokemon['Type 1'] == 'Water')\n", + "print(contingency_table)\n" ] }, { @@ -319,12 +662,23 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n", - "\n" + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-Square Statistic: 2.94\n", + "P-Value: 0.0863\n" + ] + } + ], + "source": [ + "chi2_stat, p_val, dof, expected = st.chi2_contingency(contingency_table)\n", + "\n", + "print(f\"Chi-Square Statistic: {chi2_stat:.2f}\")\n", + "print(f\"P-Value: {p_val:.4f}\")\n" ] }, { @@ -336,25 +690,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "# Your answer here:\n", - "\n" + "# Your answer here: There is insufficient evidence to suggest a significant relationship between a Pokémon's legendary status and being a Water-type." ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -368,7 +714,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.12.4" } }, "nbformat": 4,