diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index cdc1acb..5a23aea 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -14,12 +14,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "import pandas as pd\n",
+ "import numpy as np\n"
]
},
{
@@ -35,7 +36,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
@@ -53,12 +54,154 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " VenusaurMega Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 625 | \n",
+ " 80 | \n",
+ " 100 | \n",
+ " 123 | \n",
+ " 122 | \n",
+ " 120 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " Charmander | \n",
+ " Fire | \n",
+ " NaN | \n",
+ " 309 | \n",
+ " 39 | \n",
+ " 52 | \n",
+ " 43 | \n",
+ " 60 | \n",
+ " 50 | \n",
+ " 65 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 \n",
+ "3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
+ "4 4 Charmander Fire NaN 309 39 52 43 \n",
+ "\n",
+ " Sp. Atk Sp. Def Speed Generation Legendary \n",
+ "0 65 65 45 1 False \n",
+ "1 80 80 60 1 False \n",
+ "2 100 100 80 1 False \n",
+ "3 122 120 80 1 False \n",
+ "4 60 50 65 1 False "
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "pokemon.head()\n"
]
},
{
@@ -70,12 +213,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 45,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Legendary\n",
+ "False 735\n",
+ "True 65\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "# Your code here\n",
+ "pokemon['Legendary'].value_counts()\n"
]
},
{
@@ -87,12 +244,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 46,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "legendary mean: 645.25, legendary std: 59.70, non legendary mean: 436.45, non legendary std: 107.59\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "pokemon_leg = pokemon.groupby(\"Legendary\").get_group(True).dropna()\n",
+ "pokemon_non_leg = pokemon.groupby(\"Legendary\").get_group(False).dropna()\n",
+ "leg_mean = pokemon_leg['Total'].mean()\n",
+ "leg_std = pokemon_leg['Total'].std()\n",
+ "non_leg_mean = pokemon_non_leg['Total'].mean()\n",
+ "non_leg_std = pokemon_non_leg['Total'].std()\n",
+ "print(f\"legendary mean: {leg_mean:.2f}, legendary std: {leg_std:.2f}, non legendary mean: {non_leg_mean:.2f}, non legendary std: {non_leg_std:.2f}\")"
]
},
{
@@ -106,11 +276,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 19.06\n",
+ "P-Value: 0.0000\n",
+ "\n",
+ "Reject the Null Hypothesis: There is sufficient evidence to conclude that the total points are different for legendary and non legendary.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import scipy.stats as sts # stats\n",
+ "\n",
+ "# Perform two-sample t-test for independent samples\n",
+ "t_stat, p_value = sts.ttest_ind(pokemon_leg[\"Total\"], pokemon_non_leg[\"Total\"], equal_var=False) \n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.4f}\")\n",
+ "print()\n",
+ "\n",
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The total points for legendary and non legendary are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the total points are different for legendary and non legendary.\")\n",
"\n"
]
},
@@ -123,12 +319,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "# As the p value is below 0.05 we have enough evidence to reject the null hypothesis and to conclude that the total points are different for legendary and non legendary\n"
]
},
{
@@ -140,12 +336,41 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Type 1\n",
+ "Water 112\n",
+ "Normal 98\n",
+ "Grass 70\n",
+ "Bug 69\n",
+ "Psychic 57\n",
+ "Fire 52\n",
+ "Rock 44\n",
+ "Electric 44\n",
+ "Ground 32\n",
+ "Ghost 32\n",
+ "Dragon 32\n",
+ "Dark 31\n",
+ "Poison 28\n",
+ "Fighting 27\n",
+ "Steel 27\n",
+ "Ice 24\n",
+ "Fairy 17\n",
+ "Flying 4\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pokemon[\"Type 1\"].value_counts()\n"
]
},
{
@@ -157,12 +382,50 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "pk_water = pokemon[pokemon[\"Type 1\"] == \"Water\"]\n",
+ "pk_other = pokemon[pokemon[\"Type 1\"] != \"Water\"]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Type 1\n",
+ "Normal 98\n",
+ "Grass 70\n",
+ "Bug 69\n",
+ "Psychic 57\n",
+ "Fire 52\n",
+ "Rock 44\n",
+ "Electric 44\n",
+ "Ground 32\n",
+ "Ghost 32\n",
+ "Dragon 32\n",
+ "Dark 31\n",
+ "Poison 28\n",
+ "Fighting 27\n",
+ "Steel 27\n",
+ "Ice 24\n",
+ "Fairy 17\n",
+ "Flying 4\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pk_other[\"Type 1\"].value_counts()"
]
},
{
@@ -174,12 +437,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): -0.44\n",
+ "P-Value: 0.6587\n",
+ "\n",
+ "Fail to Reject the Null Hypothesis: The total points for Pokemon Water and not Water Pokemen are not significantly different.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Perform two-sample t-test for independent samples\n",
+ "t_stat, p_value = sts.ttest_ind(pk_water[\"Total\"], pk_other[\"Total\"], equal_var=True) # equal_var True for Welch's test (more robus, relies less on variance)\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.4f}\")\n",
+ "print()\n",
+ "\n",
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: The total points for Pokemon Water and not Water Pokemen are not significantly different.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is sufficient evidence to conclude that the total points are different for for Pokemon Water and not Water Pokemen .\")\n"
]
},
{
@@ -191,12 +477,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "As the P value is above our alpha threshold 0.05 we don't have enough evidence to reject the null hypothesis that Pokemon Water and the rest of Pokemon have significantly different points.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "print(f\"As the P value is above our alpha threshold 0.05 we don't have enough evidence to reject the null hypothesis that Pokemon Water and the rest of Pokemon have significantly different points.\")\n"
]
},
{
@@ -210,11 +504,35 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test Statistic (t): 4.33\n",
+ "P-Value: 0.0000171403\n",
+ "\n",
+ "Reject the Null Hypothesis: There is a significant difference in defense and attack scores.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Perform paired t-test\n",
+ "t_stat, p_value = sts.ttest_rel(pokemon[\"Attack\"], pokemon[\"Defense\"])\n",
+ "print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.10f}\")\n",
+ "print()\n",
+ "\n",
+ "# Significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# Decision-Making\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: No significant difference in defense and attack scores.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is a significant difference in defense and attack scores.\")\n",
"\n"
]
},
@@ -227,12 +545,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The p value is below our alpha threshold of 0,05. For this reason, we have enough evidence to reject the null hypothesis which says there is no significant difference in attack and defense scores.\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "print(f\"The p value is below our alpha threshold of 0,05. For this reason, we have enough evidence to reject the null hypothesis which says there is no significant difference in attack and defense scores.\")"
]
},
{
@@ -244,12 +570,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "def cohen_d(group1, group2):\n",
+ " mean1, mean2 = np.mean(group1), np.mean(group2)\n",
+ " std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)\n",
+ " n1, n2 = len(group1), len(group2)\n",
+ " \n",
+ " # Calculate pooled standard deviation\n",
+ " pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))\n",
+ " \n",
+ " # Calculate Cohen's d\n",
+ " d = (mean1 - mean2) / pooled_std\n",
+ " return d"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Cohen's d: 0.16\n"
+ ]
+ }
+ ],
+ "source": [
+ "# the groups\n",
+ "group1 = pokemon[\"Attack\"]\n",
+ "group2 = pokemon[\"Defense\"]\n",
+ "# Calculate Cohen's d\n",
+ "d_value = cohen_d(group1, group2)\n",
+ "print(f\"Cohen's d: {d_value:.2f}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Effect Size Interpretation: Very small effect\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Interpretation based on Cohen's d value\n",
+ "if d_value < 0.2:\n",
+ " interpretation = \"Very small effect\"\n",
+ "elif 0.2 <= d_value < 0.5:\n",
+ " interpretation = \"Small effect\"\n",
+ "elif 0.5 <= d_value < 0.8:\n",
+ " interpretation = \"Medium effect\"\n",
+ "else:\n",
+ " interpretation = \"Large effect\"\n",
+ "\n",
+ "print(f\"Effect Size Interpretation: {interpretation}\")"
]
},
{
@@ -261,12 +645,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "While the p value lead us to the point that there is a significant difference between Attack and Defense, unfortunately the magnitude of the effect is very small\n"
+ ]
+ }
+ ],
"source": [
"# Your conclusions here:\n",
- "\n"
+ "print(f\"While the p value lead us to the point that there is a significant difference between Attack and Defense, unfortunately the magnitude of the effect is very small\")\n"
]
},
{
@@ -280,11 +672,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "P-Value: 0.00001714\n",
+ "\n",
+ "Reject the Null Hypothesis: There is evidence to say the difference between the means is not 0.\n"
+ ]
+ }
+ ],
+ "source": [
+ "pokemon[\"Difference_D_A\"] = pokemon[\"Defense\"] - pokemon[\"Attack\"]\n",
+ "t_stat, p_value = sts.ttest_1samp(pokemon[\"Difference_D_A\"], 0)\n",
+ "# print(f\"Test Statistic (t): {t_stat:.2f}\")\n",
+ "print(f\"P-Value: {p_value:.8f}\")\n",
+ "print()\n",
+ "if p_value > alpha:\n",
+ " print(\"Fail to Reject the Null Hypothesis: Not enough evidence to say the difference between the means is 0.\")\n",
+ "else:\n",
+ " print(\"Reject the Null Hypothesis: There is evidence to say the difference between the means is not 0.\")\n",
" \n",
" "
]
@@ -302,12 +712,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Type 1 | \n",
+ " Water | \n",
+ " other | \n",
+ "
\n",
+ " \n",
+ " | Legendary | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | False | \n",
+ " 108 | \n",
+ " 627 | \n",
+ "
\n",
+ " \n",
+ " | True | \n",
+ " 4 | \n",
+ " 61 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Type 1 Water other\n",
+ "Legendary \n",
+ "False 108 627\n",
+ "True 4 61"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pokemon['Type 1'] = pokemon['Type 1'].apply(lambda x: 'other' if x != 'Water' else 'Water')\n",
+ "result = pd.crosstab(pokemon[\"Legendary\"], pokemon[\"Type 1\"] )\n",
+ "result\n"
]
},
{
@@ -319,42 +787,76 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 76,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.08625467249550997"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "from scipy.stats import chi2_contingency\n",
+ "# Chi-square test for 'Legendary' and 'Type 1 Water or Not'\n",
+ "_, chi2_pvalue, _, _ = chi2_contingency(result)\n",
+ "float(chi2_pvalue)"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 77,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "Based on a 95% confidence, should we reject the null hypothesis?"
+ "# Check if the chi2_pvalue is smaller than 0.5\n",
+ "float(chi2_pvalue) < 0.05"
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "# Your answer here:\n",
- "\n"
+ "Based on a 95% confidence, should we reject the null hypothesis?"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 78,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Yes, we have enough evidence to reject the null hypothesis and assume a significant relationship between the variables\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"Yes, we have enough evidence to reject the null hypothesis and assume a significant relationship between the variables\")"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -368,7 +870,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.9"
+ "version": "3.12.7"
}
},
"nbformat": 4,