diff --git a/your-code/challenge-1.ipynb b/your-code/challenge-1.ipynb
index c1bb43d..f2fa2c1 100755
--- a/your-code/challenge-1.ipynb
+++ b/your-code/challenge-1.ipynb
@@ -19,7 +19,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -38,11 +38,119 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 80 80 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 100 100 \n",
+ "\n",
+ " Speed Generation Legendary \n",
+ "0 45 1 False \n",
+ "1 60 1 False \n",
+ "2 80 1 False "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "pokemon = pd.read_csv('Pokemon.csv')\n",
+ "pokemon.head(3)\n"
]
},
{
@@ -58,10 +166,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
+ "from scipy import stats\n",
+ "\n",
"def t_test_features(s1, s2, features=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']):\n",
" \"\"\"Test means of a feature set of two samples\n",
" \n",
@@ -73,11 +194,11 @@
" Returns:\n",
" dict: a dictionary of t-test scores for each feature where the feature name is the key and the p-value is the value\n",
" \"\"\"\n",
- " results = {}\n",
- "\n",
- " # Your code here\n",
+ " results = {k:stats.ttest_ind(s1[k], s2[k])[1] for k in features}\n",
" \n",
- " return results"
+ " return results\n",
+ "\n",
+ "t_test_features"
]
},
{
@@ -101,11 +222,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'HP': 3.330647684846191e-15,\n",
+ " 'Attack': 7.827253003205333e-24,\n",
+ " 'Defense': 1.5842226094427255e-12,\n",
+ " 'Sp. Atk': 6.314915770427266e-41,\n",
+ " 'Sp. Def': 1.8439809580409594e-26,\n",
+ " 'Speed': 2.3540754436898437e-21,\n",
+ " 'Total': 3.0952457469652825e-52}"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "legendary = t_test_features(pokemon[pokemon['Legendary'] == True], pokemon[pokemon['Legendary'] == False])\n",
+ "legendary\n"
]
},
{
@@ -121,7 +260,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# Your comment here"
+ "# Yeah, specially on attack and sp. atk"
]
},
{
@@ -133,11 +272,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'HP': 0.13791881412813622,\n",
+ " 'Attack': 0.24050968418101457,\n",
+ " 'Defense': 0.5407630349194362,\n",
+ " 'Sp. Atk': 0.14119788176331508,\n",
+ " 'Sp. Def': 0.16781226231606386,\n",
+ " 'Speed': 0.0028356954812578704,\n",
+ " 'Total': 0.5599140649014442}"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "generation = t_test_features(pokemon[pokemon['Generation'] == 1], pokemon[pokemon['Generation'] == 2])\n",
+ "generation\n"
]
},
{
@@ -153,7 +310,8 @@
"metadata": {},
"outputs": [],
"source": [
- "# Your comment here"
+ "# It seems the values are pretty stable and homogeneous,\n",
+ " # maybe speed is the only outlier"
]
},
{
@@ -165,11 +323,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'HP': 0.11060643144431842,\n",
+ " 'Attack': 0.00015741395666164396,\n",
+ " 'Defense': 3.250594205757004e-08,\n",
+ " 'Sp. Atk': 0.0001454917404035147,\n",
+ " 'Sp. Def': 0.00010893304795534396,\n",
+ " 'Speed': 0.024051410794037463,\n",
+ " 'Total': 1.1749035008828752e-07}"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "types = t_test_features(pokemon[pokemon['Type 2'].isnull() == False], pokemon[pokemon['Type 2'].isnull() == True])\n",
+ "types\n"
]
},
{
@@ -185,7 +361,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# Your comment here"
+ "# We see that pokemon with 2 types have higher defense"
]
},
{
@@ -199,11 +375,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=4.325566393330478, pvalue=1.7140303479358558e-05, df=799)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=0.853986188453353, pvalue=0.3933685997548122, df=799)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "# if it's all pokemons it's the same population, types, legendaries, etc.\n",
+ "att_def = stats.ttest_rel(pokemon['Attack'], pokemon['Defense'])\n",
+ "sp_att_def = stats.ttest_rel(pokemon['Sp. Atk'], pokemon['Sp. Def'])\n",
+ "display(att_def)\n",
+ "display(sp_att_def)"
]
},
{
@@ -219,7 +418,8 @@
"metadata": {},
"outputs": [],
"source": [
- "# Your comment here"
+ "\"\"\" Because of the low values of sp_att_sp_df we cannot reject the null hypothesis that the values are different,\n",
+ "but because of the higher values of att_def, we can reject it\"\"\""
]
}
],
@@ -239,7 +439,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.10.9"
}
},
"nbformat": 4,
diff --git a/your-code/challenge-2.ipynb b/your-code/challenge-2.ipynb
index 1f0e335..28e52e0 100755
--- a/your-code/challenge-2.ipynb
+++ b/your-code/challenge-2.ipynb
@@ -17,21 +17,129 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "# Import libraries\n",
- "import pandas as pd"
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from scipy import stats"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " # | \n",
+ " Name | \n",
+ " Type 1 | \n",
+ " Type 2 | \n",
+ " Total | \n",
+ " HP | \n",
+ " Attack | \n",
+ " Defense | \n",
+ " Sp. Atk | \n",
+ " Sp. Def | \n",
+ " Speed | \n",
+ " Generation | \n",
+ " Legendary | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Bulbasaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 318 | \n",
+ " 45 | \n",
+ " 49 | \n",
+ " 49 | \n",
+ " 65 | \n",
+ " 65 | \n",
+ " 45 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Ivysaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 405 | \n",
+ " 60 | \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 80 | \n",
+ " 80 | \n",
+ " 60 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Venusaur | \n",
+ " Grass | \n",
+ " Poison | \n",
+ " 525 | \n",
+ " 80 | \n",
+ " 82 | \n",
+ " 83 | \n",
+ " 100 | \n",
+ " 100 | \n",
+ " 80 | \n",
+ " 1 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def \\\n",
+ "0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 \n",
+ "1 2 Ivysaur Grass Poison 405 60 62 63 80 80 \n",
+ "2 3 Venusaur Grass Poison 525 80 82 83 100 100 \n",
+ "\n",
+ " Speed Generation Legendary \n",
+ "0 45 1 False \n",
+ "1 60 1 False \n",
+ "2 80 1 False "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Load the data:\n"
+ "pokemon = pd.read_csv('Pokemon.csv')\n",
+ "pokemon.head(3)\n"
]
},
{
@@ -58,13 +166,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',\n",
+ " 'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',\n",
+ " 'Dragon', 'Dark', 'Steel', 'Flying', nan], dtype=object)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "unique_types = pd.concat([pokemon['Type 1'], pokemon['Type 2']]).unique()\n",
+ "unique_types"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "19"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n",
- "\n",
- "\n",
"len(unique_types) # you should see 19"
]
},
@@ -85,13 +224,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "18"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "pokemon_totals = []\n",
- "\n",
- "# Your code here\n",
+ "pokemon_totals = [pokemon.loc[pokemon['Type 1'] == x, 'Total'] for x in unique_types if pd.isna(x) == False]\n",
"\n",
"len(pokemon_totals) # you should see 18"
]
@@ -111,11 +259,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "F_onewayResult(statistic=4.63876748166055, pvalue=2.077215448842098e-09)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "stats.f_oneway(*pokemon_totals)\n"
]
},
{
@@ -131,7 +290,7 @@
"metadata": {},
"outputs": [],
"source": [
- "# Your comment here"
+ "# I would say that yes, because the variance is 4, and the pvalue is not very high"
]
}
],
@@ -151,7 +310,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.10.9"
}
},
"nbformat": 4,