From 7e9c520fa54784426a6982e53dc675df7edbcd21 Mon Sep 17 00:00:00 2001
From: martyna-radziszewicz
<127417127+martyna-radziszewicz@users.noreply.github.com>
Date: Wed, 12 Mar 2025 18:26:02 +0100
Subject: [PATCH] lab
---
your-code/main.ipynb | 744 ++++++++++++++++++++++++++++++++++++++++---
1 file changed, 692 insertions(+), 52 deletions(-)
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index cdc1acb..f7ee495 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -14,12 +14,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "import pandas as pd\n",
+ "import numpy as np\n"
]
},
{
@@ -35,7 +36,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -53,12 +54,154 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " VenusaurMega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 625 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 309 | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 \n",
+ "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
+ "4 4 Charmander Fire NaN 309 39 52 43 \n",
+ "\n",
+ " Sp. Atk Sp. Def Speed Generation Legendary \n",
+ "0 65 65 45 1 False \n",
+ "1 80 80 60 1 False \n",
+ "2 100 100 80 1 False \n",
+ "3 122 120 80 1 False \n",
+ "4 60 50 65 1 False "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.head()\n"
]
},
{
@@ -70,12 +213,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Legendary\n",
+ "False 735\n",
+ "True 65\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon[\"Legendary\"].value_counts()\n"
]
},
{
@@ -87,12 +244,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " std | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 417.213605 | \n",
+ " 106.760417 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 637.384615 | \n",
+ " 60.937389 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean std\n",
+ "Legendary \n",
+ "False 417.213605 106.760417\n",
+ "True 637.384615 60.937389"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "pokemon.groupby(\"Legendary\")[\"Total\"].agg([\"mean\",\"std\"])"
]
},
{
@@ -104,14 +319,67 @@
"In the cell below, use the `ttest_ind` function in `scipy.stats` to compare the the total points for legendary and non-legendary Pokemon. Since we do not have any information about the population, assume the variances are not equal."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from scipy import stats\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "legendary = pokemon[pokemon[\"Legendary\"] == True][\"Total\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "normal = pokemon[pokemon[\"Legendary\"] == False][\"Total\"]"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "t_test, p_value = stats.ttest_ind(legendary,normal, equal_var=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.True_"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p_value < 0.05"
]
},
{
@@ -128,7 +396,8 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "\n",
+ "#We can reject the null hypothesis of no significant difference between two groups"
]
},
{
@@ -140,12 +409,165 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " std | \n",
+ "
\n",
+ " \n",
+ " | Type 1 | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Bug | \n",
+ " 378.927536 | \n",
+ " 117.875223 | \n",
+ "
\n",
+ " \n",
+ " | Dark | \n",
+ " 445.741935 | \n",
+ " 109.126217 | \n",
+ "
\n",
+ " \n",
+ " | Dragon | \n",
+ " 550.531250 | \n",
+ " 146.267538 | \n",
+ "
\n",
+ " \n",
+ " | Electric | \n",
+ " 443.409091 | \n",
+ " 105.721952 | \n",
+ "
\n",
+ " \n",
+ " | Fairy | \n",
+ " 413.176471 | \n",
+ " 123.781680 | \n",
+ "
\n",
+ " \n",
+ " | Fighting | \n",
+ " 416.444444 | \n",
+ " 102.464378 | \n",
+ "
\n",
+ " \n",
+ " | Fire | \n",
+ " 458.076923 | \n",
+ " 109.760496 | \n",
+ "
\n",
+ " \n",
+ " | Flying | \n",
+ " 485.000000 | \n",
+ " 161.400124 | \n",
+ "
\n",
+ " \n",
+ " | Ghost | \n",
+ " 439.562500 | \n",
+ " 110.072685 | \n",
+ "
\n",
+ " \n",
+ " | Grass | \n",
+ " 421.142857 | \n",
+ " 106.650626 | \n",
+ "
\n",
+ " \n",
+ " | Ground | \n",
+ " 437.500000 | \n",
+ " 123.913081 | \n",
+ "
\n",
+ " \n",
+ " | Ice | \n",
+ " 433.458333 | \n",
+ " 108.281027 | \n",
+ "
\n",
+ " \n",
+ " | Normal | \n",
+ " 401.683673 | \n",
+ " 115.733038 | \n",
+ "
\n",
+ " \n",
+ " | Poison | \n",
+ " 399.142857 | \n",
+ " 92.358687 | \n",
+ "
\n",
+ " \n",
+ " | Psychic | \n",
+ " 475.947368 | \n",
+ " 139.026645 | \n",
+ "
\n",
+ " \n",
+ " | Rock | \n",
+ " 453.750000 | \n",
+ " 108.059227 | \n",
+ "
\n",
+ " \n",
+ " | Steel | \n",
+ " 487.703704 | \n",
+ " 115.420907 | \n",
+ "
\n",
+ " \n",
+ " | Water | \n",
+ " 430.455357 | \n",
+ " 113.188266 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean std\n",
+ "Type 1 \n",
+ "Bug 378.927536 117.875223\n",
+ "Dark 445.741935 109.126217\n",
+ "Dragon 550.531250 146.267538\n",
+ "Electric 443.409091 105.721952\n",
+ "Fairy 413.176471 123.781680\n",
+ "Fighting 416.444444 102.464378\n",
+ "Fire 458.076923 109.760496\n",
+ "Flying 485.000000 161.400124\n",
+ "Ghost 439.562500 110.072685\n",
+ "Grass 421.142857 106.650626\n",
+ "Ground 437.500000 123.913081\n",
+ "Ice 433.458333 108.281027\n",
+ "Normal 401.683673 115.733038\n",
+ "Poison 399.142857 92.358687\n",
+ "Psychic 475.947368 139.026645\n",
+ "Rock 453.750000 108.059227\n",
+ "Steel 487.703704 115.420907\n",
+ "Water 430.455357 113.188266"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.groupby(\"Type 1\")[\"Total\"].agg([\"mean\",\"std\"])\n"
]
},
{
@@ -157,14 +579,88 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mean | \n",
+ " std | \n",
+ "
\n",
+ " \n",
+ " | Type 1 | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Water | \n",
+ " 430.455357 | \n",
+ " 113.188266 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mean std\n",
+ "Type 1 \n",
+ "Water 430.455357 113.188266"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
+ "pokemon[pokemon[\"Type 1\"] == \"Water\"].groupby(\"Type 1\")[\"Total\"].agg([\"mean\",\"std\"])\n",
"\n"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mean 435.859012\n",
+ "std 121.091682\n",
+ "Name: Total, dtype: float64"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"].agg([\"mean\",\"std\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -174,12 +670,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.False_"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "water = pokemon[pokemon[\"Type 1\"] == \"Water\"][\"Total\"]\n",
+ "non_water = pokemon[pokemon[\"Type 1\"] != \"Water\"][\"Total\"]\n",
+ "t_test, p_value = stats.ttest_ind(water,non_water)\n",
+ "p_value < 0.05"
]
},
{
@@ -196,7 +706,7 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "#We are not able to reject the null hypothesis. Difference in means between water pokemons and non-water pokemons is not statistically significant\n"
]
},
{
@@ -210,12 +720,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.True_"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "defense = pokemon[\"Defense\"]\n",
+ "attack = pokemon[\"Attack\"]\n",
+ "\n",
+ "t_test, p_value = stats.ttest_rel(defense, attack, )\n",
+ "\n",
+ "p_value < 0.05"
]
},
{
@@ -232,7 +758,8 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "\n",
+ "#we can reject the null hypothesis. There is a statistically significant difference between them\n"
]
},
{
@@ -244,11 +771,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.False_"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
+ "sp_defense = pokemon[\"Sp. Def\"]\n",
+ "sp_attack = pokemon[\"Sp. Atk\"]\n",
+ "\n",
+ "t_test, p_value = stats.ttest_rel(sp_defense, sp_attack, )\n",
+ "\n",
+ "p_value < 0.05\n",
"\n"
]
},
@@ -266,7 +810,8 @@
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "\n",
+ "#there is no statistically significant difference, we can't reject the null hypothesis"
]
},
{
@@ -280,12 +825,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.False_"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- " \n",
+ "\n",
+ "diff = sp_defense - sp_attack # Compute differences\n",
+ "t_test, p_value = stats.ttest_1samp(diff, 0) # Test if mean(diff) = 0\n",
+ "\n",
+ "p_value < 0.05\n",
" "
]
},
@@ -302,12 +862,79 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
"outputs": [],
+ "source": [
+ "pokemon['Type water'] = pokemon['Type 1'] == 'Water'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Type water | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 627 | \n",
+ " 108 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 61 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Type water False True \n",
+ "Legendary \n",
+ "False 627 108\n",
+ "True 61 4"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "chi_df = pd.crosstab(pokemon['Legendary'], pokemon['Type water'])\n",
+ "chi_df"
]
},
{
@@ -319,12 +946,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.False_"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "_, p_value, _, _ = stats.chi2_contingency(chi_df)\n",
+ "p_value < 0.05"
]
},
{
@@ -341,7 +980,8 @@
"outputs": [],
"source": [
"# Your answer here:\n",
- "\n"
+ "\n",
+ "#no we should not"
]
},
{
@@ -354,7 +994,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": ".venv",
"language": "python",
"name": "python3"
},
@@ -368,7 +1008,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9"
+ "version": "3.12.4"
}
},
"nbformat": 4,