Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
275 changes: 251 additions & 24 deletions your-code/challenge-1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Import libraries\n",
"import pandas as pd"
"import pandas as pd\n",
"import scipy.stats as st"
]
},
{
Expand All @@ -38,11 +39,155 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>#</th>\n",
" <th>Name</th>\n",
" <th>Type 1</th>\n",
" <th>Type 2</th>\n",
" <th>Total</th>\n",
" <th>HP</th>\n",
" <th>Attack</th>\n",
" <th>Defense</th>\n",
" <th>Sp. Atk</th>\n",
" <th>Sp. Def</th>\n",
" <th>Speed</th>\n",
" <th>Generation</th>\n",
" <th>Legendary</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Bulbasaur</td>\n",
" <td>Grass</td>\n",
" <td>Poison</td>\n",
" <td>318</td>\n",
" <td>45</td>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>65</td>\n",
" <td>65</td>\n",
" <td>45</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Ivysaur</td>\n",
" <td>Grass</td>\n",
" <td>Poison</td>\n",
" <td>405</td>\n",
" <td>60</td>\n",
" <td>62</td>\n",
" <td>63</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>60</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Venusaur</td>\n",
" <td>Grass</td>\n",
" <td>Poison</td>\n",
" <td>525</td>\n",
" <td>80</td>\n",
" <td>82</td>\n",
" <td>83</td>\n",
" <td>100</td>\n",
" <td>100</td>\n",
" <td>80</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>VenusaurMega Venusaur</td>\n",
" <td>Grass</td>\n",
" <td>Poison</td>\n",
" <td>625</td>\n",
" <td>80</td>\n",
" <td>100</td>\n",
" <td>123</td>\n",
" <td>122</td>\n",
" <td>120</td>\n",
" <td>80</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>Charmander</td>\n",
" <td>Fire</td>\n",
" <td>NaN</td>\n",
" <td>309</td>\n",
" <td>39</td>\n",
" <td>52</td>\n",
" <td>43</td>\n",
" <td>60</td>\n",
" <td>50</td>\n",
" <td>65</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" # Name Type 1 Type 2 Total HP Attack Defense \\\n",
"0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
"1 2 Ivysaur Grass Poison 405 60 62 63 \n",
"2 3 Venusaur Grass Poison 525 80 82 83 \n",
"3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
"4 4 Charmander Fire NaN 309 39 52 43 \n",
"\n",
" Sp. Atk Sp. Def Speed Generation Legendary \n",
"0 65 65 45 1 False \n",
"1 80 80 60 1 False \n",
"2 100 100 80 1 False \n",
"3 122 120 80 1 False \n",
"4 60 50 65 1 False "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Your code here:\n"
"# Your code here:\n",
"df = pd.read_csv('Pokemon.csv')\n",
"df.head()"
]
},
{
Expand All @@ -58,11 +203,12 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n",
"\n",
" \"\"\"Test means of a feature set of two samples\n",
" \n",
" Args:\n",
Expand All @@ -76,7 +222,9 @@
" results = {}\n",
"\n",
" # Your code here\n",
" \n",
" for feature in features:\n",
" t_stat, p_value = st.ttest_ind(s1[feature], s2[feature], equal_var=False)\n",
" results[feature] = {'t_statistic': t_stat, 'p_value': p_value}\n",
" return results"
]
},
Expand All @@ -101,11 +249,24 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'HP': {'t_statistic': 8.981370483625046, 'p_value': 1.0026911708035284e-13}, 'Attack': {'t_statistic': 10.438133539322203, 'p_value': 2.520372449236646e-16}, 'Defense': {'t_statistic': 7.637078164784618, 'p_value': 4.826998494919331e-11}, 'Sp. Atk': {'t_statistic': 13.417449984138461, 'p_value': 1.5514614112239816e-21}, 'Sp. Def': {'t_statistic': 10.015696613114878, 'p_value': 2.2949327864052826e-15}, 'Speed': {'t_statistic': 11.47504444631443, 'p_value': 1.0490163118824507e-18}, 'Total': {'t_statistic': 25.8335743895517, 'p_value': 9.357954335957444e-47}}\n"
]
}
],
"source": [
"# Your code here\n"
"# Your code here\n",
"legendary = df[df['Legendary'] == True]\n",
"non_legendary = df[df['Legendary'] == False]\n",
"\n",
"t_test_results = t_test_features(legendary, non_legendary, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n",
"print(t_test_results)\n"
]
},
{
Expand All @@ -121,7 +282,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"# the p-value is less than 0.05, at confidence level of 95% we therefore reject the null hypothesis"
]
},
{
Expand All @@ -133,11 +294,23 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'HP': {'t_statistic': -1.4609700002846653, 'p_value': 0.14551697834219626}, 'Attack': {'t_statistic': 1.1603052805533747, 'p_value': 0.24721958967217725}, 'Defense': {'t_statistic': -0.5724173235153119, 'p_value': 0.5677711011725426}, 'Sp. Atk': {'t_statistic': 1.54608675231508, 'p_value': 0.12332165977104388}, 'Sp. Def': {'t_statistic': -1.3203746053318755, 'p_value': 0.18829872292645752}, 'Speed': {'t_statistic': 3.069594374071931, 'p_value': 0.00239265937312135}, 'Total': {'t_statistic': 0.579073329450271, 'p_value': 0.5631377907941676}}\n"
]
}
],
"source": [
"# Your code here\n"
"generation_1 = df[df['Generation'] == 1]\n",
"generation_2 = df[df['Generation'] == 2]\n",
"\n",
"t_test_results = t_test_features(generation_1, generation_2, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n",
"print(t_test_results)\n"
]
},
{
Expand All @@ -153,7 +326,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"# the p-value is higher that 0.05, therefore, at a confidence level of 95% we fail to reject the null value"
]
},
{
Expand All @@ -165,11 +338,26 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'HP': {'t_statistic': -1.586088850338319, 'p_value': 0.11314389855379413}, 'Attack': {'t_statistic': -3.810556219950897, 'p_value': 0.00014932578145948305}, 'Defense': {'t_statistic': -5.60979416640793, 'p_value': 2.7978540411514693e-08}, 'Sp. Atk': {'t_statistic': -3.828976815384819, 'p_value': 0.00013876216585667907}, 'Sp. Def': {'t_statistic': -3.892991138685155, 'p_value': 0.00010730610934512779}, 'Speed': {'t_statistic': -2.258014040079978, 'p_value': 0.02421703281819093}, 'Total': {'t_statistic': -5.355678438759113, 'p_value': 1.1157056505229964e-07}}\n"
]
}
],
"source": [
"# Your code here\n"
"# Your code here\n",
"df['Type Count'] = (df['Type 2'].notnull()).astype(int)\n",
"\n",
"single_type = df[df['Type Count'] == 0] \n",
"two_types = df[df['Type Count'] == 1] \n",
"\n",
"t_test_results = t_test_features(single_type, two_types, features=[\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\", \"Total\"])\n",
"print(t_test_results)\n"
]
},
{
Expand All @@ -185,7 +373,8 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"# Fail to reject the null hypothesis for 'HP.'\n",
"# Reject the null hypothesis for 'Attack,' 'Defense,' 'Sp. Atk,' 'Sp. Def,' 'Speed,' and 'Total.'"
]
},
{
Expand All @@ -199,11 +388,48 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"TtestResult(statistic=4.325566393330478, pvalue=1.7140303479358558e-05, df=799)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from scipy.stats import ttest_rel\n",
"#H0: There is no significant difference between the Attack and Defense.\n",
"#H1: There is a significant difference between the Attack and Defense.\n",
"\n",
"ttest_rel(df[\"Attack\"], df[\"Defense\"])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TtestResult(statistic=0.853986188453353, pvalue=0.3933685997548122, df=799)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Your code here\n"
"#H0: There is no significant difference between the Sp. Atk and Sp. Def.\n",
"#H1: There is a significant difference between the Sp. Atk and Sp. Def.\n",
"ttest_rel(df[\"Sp. Atk\"], df[\"Sp. Def\"])"
]
},
{
Expand All @@ -219,7 +445,8 @@
"metadata": {},
"outputs": [],
"source": [
"# Your comment here"
"# Reject the null hypothesis for the `Attack` vs `Defense` test, indicating a significant difference.\n",
"# Fail to reject the null hypothesis for `Sp. Atk` vs `Sp. Def` test, suggesting no significant difference."
]
}
],
Expand All @@ -239,7 +466,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.11.5"
}
},
"nbformat": 4,
Expand Down
Loading